diff --git a/.circleci/config.yml b/.circleci/config.yml
index a47d8ec..0bec0b3 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -7,13 +7,18 @@ workflows:
- test
deploy:
jobs:
- - update_docs
+ - update_docs:
+ filters:
+ tags:
+ only: /[0-9]+(\.[0-9]+)*/
+ branches:
+ ignore: /.*/
- build_and_deploy:
filters:
tags:
only: /[0-9]+(\.[0-9]+)*/
branches:
- only: master
+ ignore: /.*/
jobs: # A basic unit of work in a run
build_and_deploy:
working_directory: ~/html2ans
@@ -22,17 +27,12 @@ jobs: # A basic unit of work in a run
- image: circleci/python:3.6
steps:
- checkout
- - run:
- name: init .pypirc
- command: |
- echo -e "[pypi]" >> ~/.pypirc
- echo -e "username = arcpsteam" >> ~/.pypirc
- echo -e "password = $PYPI_PASSWORD" >> ~/.pypirc
- run:
name: Build and deploy
command: |
python3 -m virtualenv html2ans
source html2ans/bin/activate
+ pip install twine
python setup.py install test verify sdist bdist_wheel
twine upload dist/*
update_docs:
@@ -56,7 +56,8 @@ jobs: # A basic unit of work in a run
command: |
python3 -m virtualenv html2ans
source html2ans/bin/activate
- python setup.py install build_sphinx
+ pip install -e .
+ python setup.py build_sphinx
- run:
name: Upload docs
command: |
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index edf51c7..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,2 +0,0 @@
-include reqs/default.in
-prune tests
diff --git a/docs/changelog.rst b/docs/changelog.rst
index d70f0ea..9bfd32f 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,6 +1,20 @@
Changelog
=========
+v3.0.1
+------
+
+* Adds ``DailyMotionEmbedParser``, ``FlickrEmbedParser``, ``PollDaddyEmbedParser``, and ``RedditEmbedParser`` to ``DEFAULT_PARSERS`` in ``Html2Ans``/``DefaultHtmlAnsParser``
+
+ - These were accidentally left out of ``DEFAULT_PARSERS`` in v3.0.0
+
+* Updates the ``InstagramEmbedParser`` to accept hyphens in embed IDs
+* Internal improvements:
+
+ - ``setup.py`` is now the source of truth for requirements
+ - Stopped outputting a .pypirc for pypi deployment; instead using Twine environment variables in the circleci build config
+
+
v3.0.0
------
diff --git a/docs/dev_guide.rst b/docs/dev_guide.rst
index ae846c2..9b8b388 100644
--- a/docs/dev_guide.rst
+++ b/docs/dev_guide.rst
@@ -5,7 +5,6 @@ Getting set up
--------------
* Clone this repo
-* Make sure you have Python 3.6+ installed
* Ideally, set up a virtualenv and activate it (http://virtualenvwrapper.readthedocs.io/en/latest/)
* From your local copy of this project, run ``pip install -r requirements-dev.txt``
* Run tox with the command ``tox``
@@ -27,3 +26,17 @@ Documentation
- Run ``python setup.py build_sphinx``
- ``cd ../html2ans-docs/html``
- Commit and push the newly generated documentation
+
+
+Release Process
+---------------
+
+* New releases will be pushed to pypi when an appropriate tag (i.e. a version number in the form X.X.X) is pushed
+* In preparation for a new release:
+
+ - Decide what the next version will be per `semantic versioning `_
+ - Make a new branch called ``release/``
+ - Update the version in ``setup.py``
+ - Update the changelog for all changes that will be included in the release
+ - Commit your changes and make a PR against master
+ - Once the changes are merged, tag the branch with the version's release number and push that tag
diff --git a/reqs/default.in b/reqs/default.in
deleted file mode 100644
index 3348662..0000000
--- a/reqs/default.in
+++ /dev/null
@@ -1,9 +0,0 @@
-BeautifulSoup4<5
-# pip-compile will only get the constraint for the running Python interpreter
-ftfy<5;python_version<"3"
-ftfy<6;python_version>="3"
-# html5lib has hardcoded version because they are not following semantic versioning
-html5lib
-lxml<5
-six<2
-twine
diff --git a/reqs/default.txt b/reqs/default.txt
deleted file mode 100644
index 5c41908..0000000
--- a/reqs/default.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-#
-# This file is autogenerated by pip-compile
-# To update, run:
-#
-# pip-compile --output-file reqs/default.txt reqs/default.in
-#
-beautifulsoup4==4.6.3
-ftfy==5.5.0 ; python_version >= "3"
-ftfy==4.4.1 ; python_version < "3"
-html5lib==1.0.1
-lxml==4.2.5
-six==1.11.0
-twine==1.13.0
-wcwidth==0.1.7 # via ftfy
-webencodings==0.5.1 # via html5lib
diff --git a/reqs/doc.in b/reqs/doc.in
deleted file mode 100644
index 6966869..0000000
--- a/reqs/doc.in
+++ /dev/null
@@ -1 +0,0 @@
-sphinx
diff --git a/reqs/doc.txt b/reqs/doc.txt
deleted file mode 100644
index 27a1bd4..0000000
--- a/reqs/doc.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-# This file is autogenerated by pip-compile
-# To update, run:
-#
-# pip-compile --output-file reqs/doc.txt reqs/doc.in
-#
-alabaster==0.7.12 # via sphinx
-babel==2.6.0 # via sphinx
-certifi==2018.10.15 # via requests
-chardet==3.0.4 # via requests
-docutils==0.14 # via sphinx
-idna==2.7 # via requests
-imagesize==1.1.0 # via sphinx
-jinja2==2.10 # via sphinx
-markupsafe==1.1.0 # via jinja2
-packaging==18.0 # via sphinx
-pygments==2.3.0 # via sphinx
-pyparsing==2.3.0 # via packaging
-pytz==2018.7 # via babel
-requests==2.20.1 # via sphinx
-six==1.11.0 # via packaging, sphinx
-snowballstemmer==1.2.1 # via sphinx
-sphinx==1.8.2
-sphinxcontrib-websupport==1.1.0 # via sphinx
-urllib3==1.24.1 # via requests
diff --git a/reqs/test.in b/reqs/test.in
deleted file mode 100644
index e079f8a..0000000
--- a/reqs/test.in
+++ /dev/null
@@ -1 +0,0 @@
-pytest
diff --git a/reqs/test.txt b/reqs/test.txt
deleted file mode 100644
index d42fa09..0000000
--- a/reqs/test.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-#
-# This file is autogenerated by pip-compile
-# To update, run:
-#
-# pip-compile --output-file test.txt test.in
-#
-py==1.7.0 # via pytest
-pytest==4.1.0
diff --git a/requirements-dev.txt b/requirements-dev.txt
index cd4aa02..97edc59 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,11 +1,9 @@
--r reqs/default.txt
--r reqs/test.txt
--r reqs/doc.txt
--e .
+-e .[dev]
+autopep8
ipython<6 ; python_version<"3"
ipython ; python_version>="3"
-pip-tools
pycodestyle
pytest-cov
tox
+twine
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index db40090..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
--r reqs/default.txt
diff --git a/setup.py b/setup.py
index 8e28cfc..d6ae02c 100644
--- a/setup.py
+++ b/setup.py
@@ -1,58 +1,35 @@
-import sys
-import os
from codecs import open
-from setuptools import setup, find_packages
+import os
+from setuptools import find_packages, setup
from setuptools.command.install import install
-
-
-THIS_FILE_DIR = os.path.dirname(__file__)
-
-try:
- # pip 9
- from pip.req import parse_requirements
- from pip.download import PipSession
-except ImportError:
- # pip 10
- from pip._internal.req import parse_requirements
- from pip._internal.download import PipSession
-
-
-def load_reqs(fn):
- reqs = []
- reqs_extras = {}
- parsed_reqs = parse_requirements(fn, session=PipSession())
-
- for req in parsed_reqs:
- markers = req.markers
- if markers:
- reqs_extras[":" + str(markers)] = str(req.req)
- else:
- reqs.append(str(req.req))
-
- return reqs, reqs_extras
+import sys
NEEDS_DOCS = 'build_sphinx' in sys.argv
NEEDS_PYTEST = {'pytest', 'test', 'ptr'}.intersection(sys.argv)
-
-DOCS_REQUIRE, DOCS_EXTRAS = load_reqs('reqs/doc.txt') if NEEDS_DOCS else ([], {})
-INSTALL_REQUIRES, EXTRAS_REQUIRE = load_reqs('reqs/default.in')
-TESTS_REQUIRE, TESTS_EXTRAS = load_reqs('reqs/test.txt')
-SETUP_REQUIRES = []
-
-if NEEDS_DOCS:
- EXTRAS_REQUIRE.update(DOCS_EXTRAS)
- SETUP_REQUIRES.extend(DOCS_REQUIRE)
-if NEEDS_PYTEST:
- EXTRAS_REQUIRE.update(TESTS_EXTRAS)
- SETUP_REQUIRES.append('pytest-runner')
+DOCS_REQUIRE = ('sphinx',)
+INSTALL_REQUIRES = (
+ 'BeautifulSoup4<5',
+ 'ftfy<5;python_version<"3"',
+ 'ftfy<6;python_version>="3"',
+ 'html5lib<2',
+ 'lxml<5',
+ 'six<2',
+)
+TESTS_REQUIRE = ('pytest<5',)
+SETUP_REQUIRES = (('pytest-runner',) if NEEDS_PYTEST else ()) + (DOCS_REQUIRE if NEEDS_DOCS else ())
+EXTRAS_REQUIRE = {
+ 'dev': DOCS_REQUIRE + TESTS_REQUIRE,
+ 'tests': TESTS_REQUIRE
+}
+THIS_FILE_DIR = os.path.dirname(__file__)
# Get the long description from the README file
with open(os.path.join(THIS_FILE_DIR, 'README.rst'), encoding='utf-8') as f:
LONG_DESCRIPTION = f.read()
# The full version, including alpha/beta/rc tags
-RELEASE = '3.0.0'
+RELEASE = '3.0.1'
# The short X.Y version
VERSION = '.'.join(RELEASE.split('.')[:2])
diff --git a/src/html2ans/default.py b/src/html2ans/default.py
index 5fd6694..57d65e9 100644
--- a/src/html2ans/default.py
+++ b/src/html2ans/default.py
@@ -14,6 +14,16 @@
ImageParser,
)
from html2ans.parsers.embeds import (
+ ArcPlayerEmbedParser,
+ DailyMotionEmbedParser,
+ FacebookPostEmbedParser,
+ FacebookVideoEmbedParser,
+ FlickrEmbedParser,
+ IFrameParser,
+ ImgurEmbedParser,
+ InstagramEmbedParser,
+ PollDaddyEmbedParser,
+ RedditEmbedParser,
SpotifyEmbedParser,
TumblrEmbedParser,
TwitterTweetEmbedParser,
@@ -21,12 +31,6 @@
VimeoEmbedParser,
VineEmbedParser,
YoutubeEmbedParser,
- FacebookPostEmbedParser,
- FacebookVideoEmbedParser,
- IFrameParser,
- ImgurEmbedParser,
- InstagramEmbedParser,
- ArcPlayerEmbedParser,
)
from html2ans.parsers.audio import AudioParser
from html2ans.parsers.raw_html import RawHtmlParser
@@ -39,33 +43,48 @@ class DefaultHtmlAnsParser(BaseHtmlAnsParser):
"""
DEFAULT_PARSERS = [
- HeaderParser(), # h1-h6
- ListParser(), # ul/ol
- FormattedTextParser(), # strong, em, etc.
- LinkedImageParser(), # a
- LinkParser(), # a
- ImageParser(), # img
+ # embed parsers
ArcPlayerEmbedParser(), # div
+ DailyMotionEmbedParser(), # iframe
+ FacebookPostEmbedParser(), # iframe (with embed inside)
+ FacebookVideoEmbedParser(), # iframe (with embed inside)
+ FlickrEmbedParser(), # a (with image inside)
+ ImgurEmbedParser(), # blockquote (with embed inside)
+ InstagramEmbedParser(), # blockquote (with embed inside)
+ PollDaddyEmbedParser(), # noscript
+ RedditEmbedParser(), # blockquote (with embed inside)
+ SpotifyEmbedParser(), # iframe (with embed inside)
TumblrEmbedParser(), # div
- FigureParser(), # figure
- AudioParser(), # audio
TwitterTweetEmbedParser(), # blockquote (with embed inside)
TwitterVideoEmbedParser(), # blockquote (with embed inside)
- InstagramEmbedParser(), # blockquote (with embed inside)
- ImgurEmbedParser(), # blockquote (with embed inside)
YoutubeEmbedParser(), # iframe (with embed inside)
- FacebookPostEmbedParser(), # iframe (with embed inside)
- FacebookVideoEmbedParser(), # iframe (with embed inside)
- SpotifyEmbedParser(), # iframe (with embed inside)
VimeoEmbedParser(), # iframe (with embed inside)
VineEmbedParser(), # iframe (with embed inside)
+
+ # text parsers
+ HeaderParser(), # h1-h6
+ ListParser(), # ul/ol
+ FormattedTextParser(), # strong, em, etc.
BlockquoteParser(), # blockquote
ParagraphParser(), # NavigableString, p
- IFrameParser(), # iframe
+
+ # image/figure parsers
+ LinkedImageParser(), # a
+ ImageParser(), # img
+ FigureParser(), # figure
+
+ LinkParser(), # a
+
+ AudioParser(), # audio
+
+ IFrameParser(), # generic iframe
+
NullParser(), # comments
]
"""
- Default parsers for the default implementation.
+ Default parsers for the default implementation. These will be added to
+ the `BaseHtmlAnsParser` ``parsers`` attribute in the order listed, so
+ order matters!
"""
BACKUP_PARSERS = [
@@ -73,7 +92,8 @@ class DefaultHtmlAnsParser(BaseHtmlAnsParser):
RawHtmlParser()
]
"""
- Backup parsers for the default implementation.
+ Backup parsers for the default implementation. These will be tried in
+ the order listed, so order matters!
"""
def __init__(self, *args, **kwargs):
diff --git a/src/html2ans/parsers/base.py b/src/html2ans/parsers/base.py
index 29df4de..0a75f51 100644
--- a/src/html2ans/parsers/base.py
+++ b/src/html2ans/parsers/base.py
@@ -13,7 +13,7 @@ class ParseResult(namedtuple('ParseResult', ['output', 'match'])):
``output`` is the ANS JSON parsed by the parser.
``match`` indicates whether or not other parse attempts should be made.
-
+
The idea of the parsing "match" is necessary so that we can try
multiple parsers per tag (and not try multiple parsers when we don't have to).
For example, when parsing ````, if we only returned an empty dictionary
diff --git a/src/html2ans/parsers/embeds.py b/src/html2ans/parsers/embeds.py
index 7090c05..852476e 100644
--- a/src/html2ans/parsers/embeds.py
+++ b/src/html2ans/parsers/embeds.py
@@ -378,7 +378,7 @@ class InstagramEmbedParser(AbstractEmbedParser):
"""
applicable_classes = ['instagram-media']
- regex = r'(https?://www.instagram.com/\w+/\w+/)'
+ regex = r'(https?://www.instagram.com/[\w-]+/[\w-]+/)'
tag = 'a'
attr = 'href'
embed_type = 'instagram'
diff --git a/src/html2ans/parsers/utils.py b/src/html2ans/parsers/utils.py
index 6f67540..5e58f8a 100644
--- a/src/html2ans/parsers/utils.py
+++ b/src/html2ans/parsers/utils.py
@@ -39,7 +39,7 @@ class AbstractParserUtilities(object):
EMPTY_STRINGS = [None, '', ' ', '\n', '
', '
']
"""
- List of strings considered empty (if a ``NavigableString`` is passed to
+ List of strings considered empty (if a ``NavigableString`` is passed to
``is_empty`` and the string is in this list, ``is_empty`` will return True).
"""
diff --git a/tests/parsers/embeds/test_instagram.py b/tests/parsers/embeds/test_instagram.py
index 4d15722..7adf1a6 100644
--- a/tests/parsers/embeds/test_instagram.py
+++ b/tests/parsers/embeds/test_instagram.py
@@ -27,7 +27,13 @@
' target="_blank">A post shared by CNN (@cnn) on '
''
'
',
- "https://www.instagram.com/p/BqTW3VBDl2c/")
+ "https://www.instagram.com/p/BqTW3VBDl2c/"),
+ ('',
+ "https://www.instagram.com/p/Bsd-bD-Hdim/")
])
def test_embed_parser(tag_string, expected_id, make_http_tag, make_https_tag):
for test_function in [make_http_tag, make_https_tag]:
diff --git a/tests/test_html2ans.py b/tests/test_html2ans.py
index 193a8a4..7b4a750 100644
--- a/tests/test_html2ans.py
+++ b/tests/test_html2ans.py
@@ -21,14 +21,24 @@ def test_empty(body_html, test_html2ans):
"",
''
'',
- ""
- ])
+ "",
+ "",
+ "",
+ "",
+ ''
+ '',
+ ""
+])
def test_comments(test_html, test_html2ans):
assert test_html2ans.generate_ans(test_html) == []
-def test_empty_except_navigable_string(test_html2ans):
- parsed = test_html2ans.generate_ans('This is a navigable string')
+@pytest.mark.parametrize('test_html', [
+ 'This is a navigable string',
+ 'This is a navigable string',
+])
+def test_empty_except_navigable_string(test_html, test_html2ans):
+ parsed = test_html2ans.generate_ans(test_html)
assert len(parsed) == 1
assert parsed[0].get('content') == 'This is a navigable string'
diff --git a/tox.ini b/tox.ini
index 2af5a94..6adcdbc 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,13 +4,12 @@ skipsdist=True
[base]
deps=
- -U
- -rreqs/default.txt
- -rreqs/test.txt
pytest-cov
pycodestyle
commands=
-pycodestyle src tests
+extras=
+ tests
[testenv:circleci]
commands=
@@ -18,6 +17,8 @@ commands=
pytest --junit-xml=test-reports/junit.xml --cov-report=html:test-reports/coverage.html --cov=src
deps=
{[base]deps}
+extras=
+ {[base]extras}
usedevelop=True
[testenv]
@@ -26,4 +27,6 @@ commands=
pytest --cov-report term-missing --cov=src
deps=
{[base]deps}
+extras=
+ {[base]extras}
usedevelop=True