Merge pull request #6 from washingtonpost/release/0.3.1

Release/0.3.1
washingtonpost · Mar 18, 2019 · 8fc7002 · 8fc7002
2 parents e7085be + 09fc55f
commit 8fc7002
Show file tree

Hide file tree

Showing 20 changed files with 133 additions and 153 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -7,13 +7,18 @@ workflows:
       - test
   deploy:
     jobs:
-      - update_docs
+      - update_docs:
+          filters:
+            tags:
+              only: /[0-9]+(\.[0-9]+)*/
+            branches:
+              ignore: /.*/
       - build_and_deploy:
           filters:
             tags:
               only: /[0-9]+(\.[0-9]+)*/
             branches:
-              only: master
+              ignore: /.*/
 jobs: # A basic unit of work in a run
   build_and_deploy:
     working_directory: ~/html2ans
@@ -22,17 +27,12 @@ jobs: # A basic unit of work in a run
       - image: circleci/python:3.6
     steps:
       - checkout
-      - run:
-          name: init .pypirc
-          command: |
-            echo -e "[pypi]" >> ~/.pypirc
-            echo -e "username = arcpsteam" >> ~/.pypirc
-            echo -e "password = $PYPI_PASSWORD" >> ~/.pypirc
       - run:
           name: Build and deploy
           command: |
             python3 -m virtualenv html2ans
             source html2ans/bin/activate
+            pip install twine
             python setup.py install test verify sdist bdist_wheel
             twine upload dist/*
   update_docs:
@@ -56,7 +56,8 @@ jobs: # A basic unit of work in a run
           command: |
             python3 -m virtualenv html2ans
             source html2ans/bin/activate
-            python setup.py install build_sphinx
+            pip install -e .
+            python setup.py build_sphinx
       - run:
           name: Upload docs
           command: |

diff --git a/MANIFEST.in b/MANIFEST.in
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -1,6 +1,20 @@
 Changelog
 =========
 
+v3.0.1
+------
+
+* Adds ``DailyMotionEmbedParser``, ``FlickrEmbedParser``, ``PollDaddyEmbedParser``, and ``RedditEmbedParser`` to ``DEFAULT_PARSERS`` in ``Html2Ans``/``DefaultHtmlAnsParser``
+
+  - These were accidentally left out of ``DEFAULT_PARSERS`` in v3.0.0
+
+* Updates the ``InstagramEmbedParser`` to accept hyphens in embed IDs
+* Internal improvements:
+
+  - ``setup.py`` is now the source of truth for requirements
+  - Stopped outputting a .pypirc for pypi deployment; instead using Twine environment variables in the circleci build config
+
+
 v3.0.0
 ------
 

diff --git a/docs/dev_guide.rst b/docs/dev_guide.rst
@@ -5,7 +5,6 @@ Getting set up
 --------------
 
 * Clone this repo
-* Make sure you have Python 3.6+ installed
 * Ideally, set up a virtualenv and activate it (http://virtualenvwrapper.readthedocs.io/en/latest/)
 * From your local copy of this project, run ``pip install -r requirements-dev.txt``
 * Run tox with the command ``tox``
@@ -27,3 +26,17 @@ Documentation
   - Run ``python setup.py build_sphinx``
   - ``cd ../html2ans-docs/html``
   - Commit and push the newly generated documentation
+
+
+Release Process
+---------------
+
+* New releases will be pushed to pypi when an appropriate tag (i.e. a version number in the form X.X.X) is pushed
+* In preparation for a new release:
+
+  - Decide what the next version will be per `semantic versioning <https://semver.org/>`_
+  - Make a new branch called ``release/<version number>``
+  - Update the version in ``setup.py`` 
+  - Update the changelog for all changes that will be included in the release
+  - Commit your changes and make a PR against master
+  - Once the changes are merged, tag the branch with the version's release number and push that tag
diff --git a/reqs/default.in b/reqs/default.in
diff --git a/reqs/default.txt b/reqs/default.txt
diff --git a/reqs/doc.in b/reqs/doc.in
diff --git a/reqs/doc.txt b/reqs/doc.txt
diff --git a/reqs/test.in b/reqs/test.in
diff --git a/reqs/test.txt b/reqs/test.txt
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,11 +1,9 @@
--r reqs/default.txt
--r reqs/test.txt
--r reqs/doc.txt
--e .
+-e .[dev]
 
+autopep8
 ipython<6 ; python_version<"3"
 ipython ; python_version>="3"
-pip-tools
 pycodestyle
 pytest-cov
 tox
+twine
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.py b/setup.py
@@ -1,58 +1,35 @@
-import sys
-import os
 from codecs import open
-from setuptools import setup, find_packages
+import os
+from setuptools import find_packages, setup
 from setuptools.command.install import install
-
-
-THIS_FILE_DIR = os.path.dirname(__file__)
-
-try:
-    # pip 9
-    from pip.req import parse_requirements
-    from pip.download import PipSession
-except ImportError:
-    # pip 10
-    from pip._internal.req import parse_requirements
-    from pip._internal.download import PipSession
-
-
-def load_reqs(fn):
-    reqs = []
-    reqs_extras = {}
-    parsed_reqs = parse_requirements(fn, session=PipSession())
-
-    for req in parsed_reqs:
-        markers = req.markers
-        if markers:
-            reqs_extras[":" + str(markers)] = str(req.req)
-        else:
-            reqs.append(str(req.req))
-
-    return reqs, reqs_extras
+import sys
 
 
 NEEDS_DOCS = 'build_sphinx' in sys.argv
 NEEDS_PYTEST = {'pytest', 'test', 'ptr'}.intersection(sys.argv)
-
-DOCS_REQUIRE, DOCS_EXTRAS = load_reqs('reqs/doc.txt') if NEEDS_DOCS else ([], {})
-INSTALL_REQUIRES, EXTRAS_REQUIRE = load_reqs('reqs/default.in')
-TESTS_REQUIRE, TESTS_EXTRAS = load_reqs('reqs/test.txt')
-SETUP_REQUIRES = []
-
-if NEEDS_DOCS:
-    EXTRAS_REQUIRE.update(DOCS_EXTRAS)
-    SETUP_REQUIRES.extend(DOCS_REQUIRE)
-if NEEDS_PYTEST:
-    EXTRAS_REQUIRE.update(TESTS_EXTRAS)
-    SETUP_REQUIRES.append('pytest-runner')
+DOCS_REQUIRE = ('sphinx',)
+INSTALL_REQUIRES = (
+    'BeautifulSoup4<5',
+    'ftfy<5;python_version<"3"',
+    'ftfy<6;python_version>="3"',
+    'html5lib<2',
+    'lxml<5',
+    'six<2',
+)
+TESTS_REQUIRE = ('pytest<5',)
+SETUP_REQUIRES = (('pytest-runner',) if NEEDS_PYTEST else ()) + (DOCS_REQUIRE if NEEDS_DOCS else ())
+EXTRAS_REQUIRE = {
+    'dev': DOCS_REQUIRE + TESTS_REQUIRE,
+    'tests': TESTS_REQUIRE
+}
+THIS_FILE_DIR = os.path.dirname(__file__)
 
 # Get the long description from the README file
 with open(os.path.join(THIS_FILE_DIR, 'README.rst'), encoding='utf-8') as f:
     LONG_DESCRIPTION = f.read()
 
 # The full version, including alpha/beta/rc tags
-RELEASE = '3.0.0'
+RELEASE = '3.0.1'
 # The short X.Y version
 VERSION = '.'.join(RELEASE.split('.')[:2])
 

diff --git a/src/html2ans/default.py b/src/html2ans/default.py
@@ -14,19 +14,23 @@
     ImageParser,
 )
 from html2ans.parsers.embeds import (
+    ArcPlayerEmbedParser,
+    DailyMotionEmbedParser,
+    FacebookPostEmbedParser,
+    FacebookVideoEmbedParser,
+    FlickrEmbedParser,
+    IFrameParser,
+    ImgurEmbedParser,
+    InstagramEmbedParser,
+    PollDaddyEmbedParser,
+    RedditEmbedParser,
     SpotifyEmbedParser,
     TumblrEmbedParser,
     TwitterTweetEmbedParser,
     TwitterVideoEmbedParser,
     VimeoEmbedParser,
     VineEmbedParser,
     YoutubeEmbedParser,
-    FacebookPostEmbedParser,
-    FacebookVideoEmbedParser,
-    IFrameParser,
-    ImgurEmbedParser,
-    InstagramEmbedParser,
-    ArcPlayerEmbedParser,
 )
 from html2ans.parsers.audio import AudioParser
 from html2ans.parsers.raw_html import RawHtmlParser
@@ -39,41 +43,57 @@ class DefaultHtmlAnsParser(BaseHtmlAnsParser):
     """
 
     DEFAULT_PARSERS = [
-        HeaderParser(),  # h1-h6
-        ListParser(),  # ul/ol
-        FormattedTextParser(),  # strong, em, etc.
-        LinkedImageParser(),  # a
-        LinkParser(),  # a
-        ImageParser(),  # img
+        # embed parsers
         ArcPlayerEmbedParser(),  # div
+        DailyMotionEmbedParser(),  # iframe
+        FacebookPostEmbedParser(),  # iframe (with embed inside)
+        FacebookVideoEmbedParser(),  # iframe (with embed inside)
+        FlickrEmbedParser(),  # a (with image inside)
+        ImgurEmbedParser(),  # blockquote (with embed inside)
+        InstagramEmbedParser(),  # blockquote (with embed inside)
+        PollDaddyEmbedParser(),  # noscript
+        RedditEmbedParser(),  # blockquote (with embed inside)
+        SpotifyEmbedParser(),  # iframe (with embed inside)
         TumblrEmbedParser(),  # div
-        FigureParser(),  # figure
-        AudioParser(),  # audio
         TwitterTweetEmbedParser(),  # blockquote (with embed inside)
         TwitterVideoEmbedParser(),  # blockquote (with embed inside)
-        InstagramEmbedParser(),  # blockquote (with embed inside)
-        ImgurEmbedParser(),  # blockquote (with embed inside)
         YoutubeEmbedParser(),  # iframe (with embed inside)
-        FacebookPostEmbedParser(),  # iframe (with embed inside)
-        FacebookVideoEmbedParser(),  # iframe (with embed inside)
-        SpotifyEmbedParser(),  # iframe (with embed inside)
         VimeoEmbedParser(),  # iframe (with embed inside)
         VineEmbedParser(),  # iframe (with embed inside)
+
+        # text parsers
+        HeaderParser(),  # h1-h6
+        ListParser(),  # ul/ol
+        FormattedTextParser(),  # strong, em, etc.
         BlockquoteParser(),  # blockquote
         ParagraphParser(),  # NavigableString, p
-        IFrameParser(),  # iframe
+
+        # image/figure parsers
+        LinkedImageParser(),  # a
+        ImageParser(),  # img
+        FigureParser(),  # figure
+
+        LinkParser(),  # a
+
+        AudioParser(),  # audio
+
+        IFrameParser(),  # generic iframe
+
         NullParser(),  # comments
     ]
     """
-    Default parsers for the default implementation.
+    Default parsers for the default implementation. These will be added to
+    the `BaseHtmlAnsParser` ``parsers`` attribute in the order listed, so
+    order matters!
     """
 
     BACKUP_PARSERS = [
         IFrameParser(),
         RawHtmlParser()
     ]
     """
-    Backup parsers for the default implementation.
+    Backup parsers for the default implementation. These will be tried in
+    the order listed, so order matters!
     """
 
     def __init__(self, *args, **kwargs):

diff --git a/src/html2ans/parsers/base.py b/src/html2ans/parsers/base.py
@@ -13,7 +13,7 @@ class ParseResult(namedtuple('ParseResult', ['output', 'match'])):
     ``output`` is the ANS JSON parsed by the parser.
 
     ``match`` indicates whether or not other parse attempts should be made.
-    
+
     The idea of the parsing "match" is necessary so that we can try
     multiple parsers per tag (and not try multiple parsers when we don't have to).
     For example, when parsing ``<p></p>``, if we only returned an empty dictionary

diff --git a/src/html2ans/parsers/embeds.py b/src/html2ans/parsers/embeds.py
@@ -378,7 +378,7 @@ class InstagramEmbedParser(AbstractEmbedParser):
 
     """
     applicable_classes = ['instagram-media']
-    regex = r'(https?://www.instagram.com/\w+/\w+/)'
+    regex = r'(https?://www.instagram.com/[\w-]+/[\w-]+/)'
     tag = 'a'
     attr = 'href'
     embed_type = 'instagram'

diff --git a/src/html2ans/parsers/utils.py b/src/html2ans/parsers/utils.py
@@ -39,7 +39,7 @@ class AbstractParserUtilities(object):
 
     EMPTY_STRINGS = [None, '', ' ', '\n', '<br>', '<br/>']
     """
-    List of strings considered empty (if a ``NavigableString`` is passed to 
+    List of strings considered empty (if a ``NavigableString`` is passed to
     ``is_empty`` and the string is in this list, ``is_empty`` will return True).
     """