From 74161ea277c75a29ba6970d12d15b73e6d0f6050 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 8 Jan 2024 16:03:20 +0100
Subject: [PATCH 01/29] chg: add python 3.12 support.

---
 .github/workflows/python-package.yml | 4 ++--
 setup.py                             | 5 ++---
 src/inscriptis/metadata.py           | 4 ++--
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 7d2aff2..afd8ae4 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -9,11 +9,11 @@ on:
 jobs:
   build:
 
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
       fail-fast: false
       matrix:
-        python-version: ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11']
+        python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12']
 
     steps:
     - uses: actions/checkout@v3
diff --git a/setup.py b/setup.py
index 9ef7d76..d3ec856 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@
     long_description=long_description,
     author=__author__,
     author_email=__author_email__,
-    python_requires='>=3.6',
+    python_requires='>=3.8',
     classifiers=[
         'Development Status :: 5 - Production/Stable',
         'Intended Audience :: Developers',
@@ -33,12 +33,11 @@
         'Topic :: Text Processing :: Markup :: HTML',
         'Topic :: Utilities',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
         'Programming Language :: Python :: 3.10',
         'Programming Language :: Python :: 3.11',
+        'Programming Language :: Python :: 3.12',
     ],
     keywords='HTML,converter,text',
     url='https://github.com/weblyzard/inscriptis',
diff --git a/src/inscriptis/metadata.py b/src/inscriptis/metadata.py
index ff06c8d..2b2a771 100644
--- a/src/inscriptis/metadata.py
+++ b/src/inscriptis/metadata.py
@@ -2,6 +2,6 @@
 
 __author__ = 'Albert Weichselbraun, Fabian Odoni'
 __author_email__ = 'albert.weichselbraun@fhgr.ch, fabian.odoni@fhgr.ch'
-__copyright__ = '2016-2023 Albert Weichselbraun, Fabian Odoni'
+__copyright__ = '2016-2024 Albert Weichselbraun, Fabian Odoni'
 __license__ = 'Apache 2.0'
-__version__ = '2.3.2'
+__version__ = '2.3.3'

From 176040374eedf4786f4c0508ff70a36090935566 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 8 Jan 2024 16:08:37 +0100
Subject: [PATCH 02/29] chg: run codeql and build on all branches.

---
 .github/workflows/codeql-analysis.yml | 3 ---
 .github/workflows/python-package.yml  | 2 --
 2 files changed, 5 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index bb76eb2..0e79ee4 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -13,10 +13,7 @@ name: "CodeQL"
 
 on:
   push:
-    branches: [ master ]
   pull_request:
-    # The branches below must be a subset of the branches above
-    branches: [ master ]
   schedule:
     - cron: '26 5 * * 2'
 
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index afd8ae4..c313db5 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -2,9 +2,7 @@ name: build
 
 on:
   push:
-    branches: [ master ]
   pull_request:
-    branches: [ master ]
 
 jobs:
   build:

From 3014cdae9684c407c144493504f802a1af4f4dff Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 8 Jan 2024 17:24:12 +0100
Subject: [PATCH 03/29] add: read timeout.

---
 scripts/inscript.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/inscript.py b/scripts/inscript.py
index 6821445..0697e44 100755
--- a/scripts/inscript.py
+++ b/scripts/inscript.py
@@ -15,6 +15,7 @@
 from inscriptis.model.config import ParserConfig
 
 DEFAULT_ENCODING = 'utf8'
+DEFAULT_TIMEOUT = 5  # default timeout in seconds
 
 
 def get_postprocessor(name):
@@ -69,6 +70,9 @@ def get_parser():
     parser.add_argument('--table-cell-separator', default='  ',
                         help='Separator to use between table cells (default: '
                              'three spaces).')
+    parser.add_argument('--timeout', default=DEFAULT_TIMEOUT,
+                        help='Request timeout in seconds (default: '
+                             f'{DEFAULT_TIMEOUT}).')
     parser.add_argument('-v', '--version',
                         action='store_true', default=False,
                         help='display version information')
@@ -95,7 +99,7 @@ def get_parser():
                                    errors='ignore') as f:
             html_content = f.read()
     elif args.input.startswith('http://') or args.input.startswith('https://'):
-        req = requests.get(args.input)
+        req = requests.get(args.input, timeout=args.timeout)
         html_content = req.content.decode(args.encoding or req.encoding)
     else:
         print("ERROR: Cannot open input file '{0}'.\n".format(args.input))

From 263bf997075a763be61a699c56356050aaa1754a Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 8 Jan 2024 17:24:41 +0100
Subject: [PATCH 04/29] add: improved tox.ini

---
 tox.ini | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/tox.ini b/tox.ini
index 63c1093..aba12a6 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,8 +1,11 @@
+[tox]
+envlist = pytest, pyroma, flake8-4
+
 # standard unit tests
 [testenv:pytest]
 deps = pytest ~= 7.1.2
        pytest-cov ~= 3.0.0
-commands = py.test --cov-config=.coveragerc --cov=inscriptis ./tests
+commands = pytest --cov-config=.coveragerc --cov=inscriptis ./tests
 
 # python packaging best practices
 [testenv:pyroma]
@@ -33,15 +36,6 @@ deps = flake8 ~= 4.0.1
        pep8-naming ~= 0.13.1
        flake8-mutable ~= 1.2.0
        flake8-use-pathlib ~= 0.2.1
-commands = flake8
-
-[flake8]
-exclude = .tox 
-          docs
-          benchmarking
-          setup.py
-          tests
-          venv
 
 # S104 - do not cleanup XML data prior to processing
 # S410 - bind to all IPs is okay in the case of the Web service, since it is
@@ -50,12 +44,9 @@ exclude = .tox
 # D102 - missing docstring in public method
 # D105 - missing docstring in magic method (e.g., __str__)
 # D107 - missing docstring in __init__
-ignore = S104, S410, W503, D107, D105, D102
-show-source = true
-enable-extensions=G
-application-import-names = inscriptis
-
 # flake8 cognitive complexity
-max-cognitive-complexity=13
-
-# 
+commands = flake8 --exclude=".tox, setup.py, tests, venv, docs, benchmarking, build" \
+              --ignore="S104, S410, W503, D107, D105, D102" \
+              --show-source \
+	      --enable-extensions=G \
+	      --max-cognitive-complexity=13

From e74c24ffd4089ab1ea54123ac42004e5fb1a41ce Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 8 Jan 2024 17:24:50 +0100
Subject: [PATCH 05/29] chg: use f-strings.

---
 src/inscriptis/html_engine.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/inscriptis/html_engine.py b/src/inscriptis/html_engine.py
index 85664b7..a39ea3f 100644
--- a/src/inscriptis/html_engine.py
+++ b/src/inscriptis/html_engine.py
@@ -164,7 +164,7 @@ def _start_img(self, attrs):
         image_text = attrs.get('alt', '') or attrs.get('title', '')
         if image_text and not (self.config.deduplicate_captions
                                and image_text == self.last_caption):
-            self.tags[-1].write('[{0}]'.format(image_text))
+            self.tags[-1].write(f'[{image_text}]')
             self.last_caption = image_text
 
     def _start_a(self, attrs):
@@ -179,7 +179,7 @@ def _start_a(self, attrs):
 
     def _end_a(self):
         if self.link_target:
-            self.tags[-1].write(']({0})'.format(self.link_target))
+            self.tags[-1].write(f']({self.link_target})')
 
     def _start_ol(self, _):
         self.li_counter.append(1)
@@ -191,7 +191,7 @@ def _start_li(self, _):
         bullet = self.li_counter[-1] if self.li_counter else '* '
         if isinstance(bullet, int):
             self.li_counter[-1] += 1
-            self.tags[-1].list_bullet = '{0}. '.format(bullet)
+            self.tags[-1].list_bullet = f'{bullet}. '
         else:
             self.tags[-1].list_bullet = bullet
 

From ffef3f91549fe321f1e28b41376f17c1397d7ef1 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 8 Jan 2024 18:32:15 +0100
Subject: [PATCH 06/29] chg: optimized tox config.

---
 tox.ini | 52 ++++++++++++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/tox.ini b/tox.ini
index aba12a6..22b7f68 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,10 +1,10 @@
 [tox]
-envlist = pytest, pyroma, flake8-4
+envlist = pytest, pyroma, flake8
 
 # standard unit tests
 [testenv:pytest]
-deps = pytest ~= 7.1.2
-       pytest-cov ~= 3.0.0
+deps = pytest ~= 7.4.4
+       pytest-cov ~= 4.1.0
 commands = pytest --cov-config=.coveragerc --cov=inscriptis ./tests
 
 # python packaging best practices
@@ -12,30 +12,32 @@ commands = pytest --cov-config=.coveragerc --cov=inscriptis ./tests
 deps = pyroma
 commands = pyroma .
 
-# checks compatible with flake 4
-[testenv:flake8-4]
-deps = flake8 ~= 4.0.1
+[testenv:flake8]
+deps = flake8 ~= 7.0.0
+       dlint ~= 0.14.1
+       flake8-bandit ~= 4.1.1
        flake8-blind-except ~= 0.2.1
-       flake8-bandit ~= 3.0.0
-       flake8-bugbear ~= 22.7.1
-       flake8-builtins ~= 1.5.3
+       flake8-bugbear ~= 23.12.2
+       flake8-builtins ~= 2.2.0
        flake8-cognitive-complexity ~= 0.1.0
        flake8-colors ~= 0.1.9
-       flake8-comprehensions ~= 3.10.0
-       flake8-docstrings ~= 1.6.0
-       flake8-encodings ~= 0.5.0.post1
-       flake8-eradicate ~= 1.2.1
+       flake8-comprehensions ~= 3.14.0
+       flake8-docstrings ~= 1.7.0
+       flake8-eradicate ~= 1.5.0
+       flake8-encodings ~= 0.5.1
        flake8-expression-complexity ~= 0.0.11
+       flake8-logging-format ~= 0.9.0
+       flake8-mutable ~= 1.2.0
+       flake8-pie ~= 0.16.0
+       flake8-pytest ~= 1.4
+       flake8-quotes ~= 3.3.2
+       flake8-raise ~= 0.0.5
+       flake8-simplify ~= 0.21.0
        flake8-string-format ~= 0.3.0
        flake8-tuple ~= 0.4.1
-       flake8-logging-format ~= 0.6.0
-       flake8-pytest ~= 1.3
-       flake8-quotes ~= 3.3.1
-       flake8-raise ~= 0.0.5
-       flake8-simplify ~= 0.19.2
-       pep8-naming ~= 0.13.1
-       flake8-mutable ~= 1.2.0
-       flake8-use-pathlib ~= 0.2.1
+       flake8-use-pathlib ~= 0.3.0
+       flake8-warnings ~= 0.4.1
+       pep8-naming ~= 0.13.3
 
 # S104 - do not cleanup XML data prior to processing
 # S410 - bind to all IPs is okay in the case of the Web service, since it is
@@ -44,9 +46,11 @@ deps = flake8 ~= 4.0.1
 # D102 - missing docstring in public method
 # D105 - missing docstring in magic method (e.g., __str__)
 # D107 - missing docstring in __init__
-# flake8 cognitive complexity
 commands = flake8 --exclude=".tox, setup.py, tests, venv, docs, benchmarking, build" \
-              --ignore="S104, S410, W503, D107, D105, D102" \
               --show-source \
-	      --enable-extensions=G \
+	      --ignore="DUO107, W503, D107, D105, D102, S104, S410" \
 	      --max-cognitive-complexity=13
+
+#              --ignore="S104, S410, W503, D107, D105, D102" \
+#	      --enable-extensions=G \
+#	      --max-cognitive-complexity=13

From 57120cea447938a6e7ed855c78b13233c610af6f Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 8 Jan 2024 18:32:48 +0100
Subject: [PATCH 07/29] chg: code cleanup.

---
 src/inscriptis/annotation/output/html.py | 4 +---
 src/inscriptis/html_engine.py            | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/inscriptis/annotation/output/html.py b/src/inscriptis/annotation/output/html.py
index 310f935..2ea498b 100644
--- a/src/inscriptis/annotation/output/html.py
+++ b/src/inscriptis/annotation/output/html.py
@@ -68,9 +68,7 @@ def _get_label_colors(labels: List[str]) -> Dict[str, str]:
             A mapping between the available labels and the corresponding color
             from the COLOR_SCHEMA.
         """
-        return {label: color
-                for label, color in zip({a[2] for a in sorted(labels)},
-                                        cycle(COLOR_SCHEMA))}
+        return dict(zip({a[2] for a in sorted(labels)}, cycle(COLOR_SCHEMA)))
 
     def _get_css(self, labels: List[str]) -> str:
         """Compute the CSS to be included into the HTML output.
diff --git a/src/inscriptis/html_engine.py b/src/inscriptis/html_engine.py
index a39ea3f..e2c7f3e 100644
--- a/src/inscriptis/html_engine.py
+++ b/src/inscriptis/html_engine.py
@@ -137,7 +137,7 @@ def handle_starttag(self, tag, attrs):
                 tag, DEFAULT_HTML_ELEMENT).__copy__().set_tag(tag)))
         self.tags.append(cur)
 
-        handler = self.start_tag_handler_dict.get(tag, None)
+        handler = self.start_tag_handler_dict.get(tag)
         if handler:
             handler(attrs)
 
@@ -150,7 +150,7 @@ def handle_endtag(self, tag):
         Args:
           tag: the HTML end tag to process.
         """
-        handler = self.end_tag_handler_dict.get(tag, None)
+        handler = self.end_tag_handler_dict.get(tag)
         if handler:
             handler()
 

From 5cb6c3c8794083dc36563acd805cdd71c7aa4226 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 8 Jan 2024 18:34:36 +0100
Subject: [PATCH 08/29] chg: improved build.

---
 .github/workflows/python-package.yml | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index c313db5..08ee6b2 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -22,15 +22,12 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install flake8 pytest pytest-cov codecov
+        python -m pip install tox
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         python setup.py install
-    - name: Lint with flake8
+    - name: Lint with tox
       run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics
+        tox
     - name: Test with pytest
       run: |
         py.test --cov=inscripits ./tests && codecov

From d346a111cbe29515bdc197708fb0c607709fa30d Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 8 Jan 2024 18:34:46 +0100
Subject: [PATCH 09/29] chg: improved build.

---
 .github/workflows/python-package.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 08ee6b2..f7229d4 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -22,7 +22,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install tox
+        python -m pip install tox setuptools
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         python setup.py install
     - name: Lint with tox

From a9d1a2602eee477a72e741b663f816cff75b6a71 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 8 Jan 2024 18:39:28 +0100
Subject: [PATCH 10/29] chg: improve buid process.

---
 .github/workflows/python-package.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index f7229d4..1498480 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -22,7 +22,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install tox setuptools
+        python -m pip install tox setuptools pytest pytest-cov codecov
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         python setup.py install
     - name: Lint with tox

From 74c6347df76c242c45a0306bddbdb3576eccc304 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Wed, 10 Jan 2024 19:54:25 +0100
Subject: [PATCH 11/29] add: black configuration.

---
 pyproject.toml | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 pyproject.toml

diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..8992400
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,31 @@
+[tool.poetry]
+name = "inscriptis"
+version = "2.4.0"
+description = "inscriptis - HTML to text converter."
+authors = ["Albert Weichselbraun <albert.weichselbraun@fhgr.ch>", "Fabian Odoni <fabian.odoni@fhgr.ch>"]
+license = "Apache 2.0"
+readme = "README.rst"
+
+[tool.poetry.dependencies]
+python = "^3.8, ^3.9, ^3.10, ^3.11, ^3.12"
+requests = "^2.31.0"
+lxml = "^5.1.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+# code formatting with black
+[tool.black]
+line-length = 88
+target-version = ["py38", "py39", "py310", "py311", "py312"]
+include = "\.pyi?$"
+# 'extend-exclude' excludes files or directories in addition to the defaults
+extend-exclude = """
+# A regex preceded with ^/ will apply only to files and directories
+# in the root of the project.
+(
+  ^/foo.py    # exclude a file named foo.py in the root of the project
+  | .*_pb2.py  # exclude autogenerated Protocol Buffer files anywhere in the project
+)
+"""

From 9f995fe6f31c07a65a58e27bdf733e94ce2bf606 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Thu, 11 Jan 2024 14:16:21 +0100
Subject: [PATCH 12/29] fix: specification of the supported python versions.

---
 pyproject.toml | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8992400..2ed5029 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ license = "Apache 2.0"
 readme = "README.rst"
 
 [tool.poetry.dependencies]
-python = "^3.8, ^3.9, ^3.10, ^3.11, ^3.12"
+python = "^3.8 || ^3.9 || ^3.10 || ^3.11 || ^3.12"
 requests = "^2.31.0"
 lxml = "^5.1.0"
 
@@ -19,13 +19,8 @@ build-backend = "poetry.core.masonry.api"
 [tool.black]
 line-length = 88
 target-version = ["py38", "py39", "py310", "py311", "py312"]
-include = "\.pyi?$"
-# 'extend-exclude' excludes files or directories in addition to the defaults
-extend-exclude = """
-# A regex preceded with ^/ will apply only to files and directories
-# in the root of the project.
-(
-  ^/foo.py    # exclude a file named foo.py in the root of the project
-  | .*_pb2.py  # exclude autogenerated Protocol Buffer files anywhere in the project
-)
-"""
+include = '''
+  ^scripts/
+  ^src/
+  ^tests/
+'''

From 8ea3468ac537c2c8e55d82be7015899f8dab6c27 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Thu, 11 Jan 2024 17:28:03 +0100
Subject: [PATCH 13/29] chg: improved tox and pyproject configs.

---
 pyproject.toml | 5 ++---
 tox.ini        | 5 +++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2ed5029..53f64cd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,8 +19,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.black]
 line-length = 88
 target-version = ["py38", "py39", "py310", "py311", "py312"]
+extend-exclude = '\.html$|\.json$|\.txt$'
 include = '''
-  ^scripts/
-  ^src/
-  ^tests/
+  ^/scripts/|^/src/|^/tests/
 '''
diff --git a/tox.ini b/tox.ini
index 22b7f68..8dc0683 100644
--- a/tox.ini
+++ b/tox.ini
@@ -30,7 +30,6 @@ deps = flake8 ~= 7.0.0
        flake8-mutable ~= 1.2.0
        flake8-pie ~= 0.16.0
        flake8-pytest ~= 1.4
-       flake8-quotes ~= 3.3.2
        flake8-raise ~= 0.0.5
        flake8-simplify ~= 0.21.0
        flake8-string-format ~= 0.3.0
@@ -46,9 +45,11 @@ deps = flake8 ~= 7.0.0
 # D102 - missing docstring in public method
 # D105 - missing docstring in magic method (e.g., __str__)
 # D107 - missing docstring in __init__
+# E203, E704 black
 commands = flake8 --exclude=".tox, setup.py, tests, venv, docs, benchmarking, build" \
               --show-source \
-	      --ignore="DUO107, W503, D107, D105, D102, S104, S410" \
+          --max-line-length=88 \  
+	      --ignore="DUO107, W503, D107, D105, D102, S104, S410, E203, E708" \
 	      --max-cognitive-complexity=13
 
 #              --ignore="S104, S410, W503, D107, D105, D102" \

From 55fa29ca39f9ed5895f9e88b2eb0f17e4d84245f Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Thu, 11 Jan 2024 17:29:46 +0100
Subject: [PATCH 14/29] chg: apply black formatting.

---
 scripts/inscript.py                         | 194 +++++++++++++-------
 src/inscriptis/__init__.py                  |  22 +--
 src/inscriptis/annotation/__init__.py       |  15 +-
 src/inscriptis/annotation/output/html.py    |  73 ++++----
 src/inscriptis/annotation/output/surface.py |   8 +-
 src/inscriptis/annotation/output/xml.py     |  23 ++-
 src/inscriptis/annotation/parser.py         |  39 ++--
 src/inscriptis/css_profiles.py              | 108 +++++------
 src/inscriptis/html_engine.py               | 100 +++++-----
 src/inscriptis/html_properties.py           |   6 +-
 src/inscriptis/metadata.py                  |  10 +-
 src/inscriptis/model/attribute.py           |  25 ++-
 src/inscriptis/model/canvas/__init__.py     |  26 +--
 src/inscriptis/model/canvas/block.py        |  20 +-
 src/inscriptis/model/canvas/prefix.py       |  28 ++-
 src/inscriptis/model/config.py              |  25 +--
 src/inscriptis/model/css.py                 |  32 ++--
 src/inscriptis/model/html_element.py        |  90 +++++----
 src/inscriptis/model/table.py               | 104 ++++++-----
 src/inscriptis/service/web.py               |  36 ++--
 tests/test_annotation.py                    |  86 ++++-----
 tests/test_annotation_output_processor.py   |  71 +++----
 tests/test_annotation_rule_parsing.py       |  59 +++---
 tests/test_block.py                         |  32 ++--
 tests/test_broken_table_handling.py         |  13 +-
 tests/test_double_a.py                      |  16 +-
 tests/test_empty_string.py                  |   9 +-
 tests/test_engine.py                        |   4 +-
 tests/test_html_conversion_options.py       |  41 +++--
 tests/test_html_snippets.py                 |  30 +--
 tests/test_html_snippets_annotations.py     |  52 +++---
 tests/test_limit_whitespace_affixes.py      |  44 +++--
 tests/test_list_div.py                      |  22 +--
 tests/test_margin_before_at_start.py        |  18 +-
 tests/test_margin_handling.py               |  20 +-
 tests/test_metadata.py                      |  25 ++-
 tests/test_model_html_element_canvas.py     |  16 +-
 tests/test_model_prefix.py                  |  35 ++--
 tests/test_parse_css.py                     |  49 ++---
 tests/test_strip_xml_header.py              |   6 +-
 tests/test_style_parsing.py                 |   5 +-
 tests/test_table_cell.py                    |  21 ++-
 tests/test_table_cell_formatting.py         |  36 ++--
 tests/test_table_row.py                     |  12 +-
 tests/test_white_space_handling.py          |  56 +++---
 45 files changed, 952 insertions(+), 810 deletions(-)

diff --git a/scripts/inscript.py b/scripts/inscript.py
index 0697e44..2e694a5 100755
--- a/scripts/inscript.py
+++ b/scripts/inscript.py
@@ -14,7 +14,7 @@
 from inscriptis.css_profiles import CSS_PROFILES
 from inscriptis.model.config import ParserConfig
 
-DEFAULT_ENCODING = 'utf8'
+DEFAULT_ENCODING = "utf8"
 DEFAULT_TIMEOUT = 5  # default timeout in seconds
 
 
@@ -27,78 +27,125 @@ def get_postprocessor(name):
     Returns:
         The matching postprocessing function
     """
-    pp_class = name.capitalize() + 'Extractor'
-    mod = __import__('inscriptis.annotation.output.' + name,
-                     fromlist=[pp_class])
+    pp_class = name.capitalize() + "Extractor"
+    mod = __import__("inscriptis.annotation.output." + name, fromlist=[pp_class])
     return getattr(mod, pp_class)()
 
 
 def get_parser():
     """Parse the arguments if script is run via console."""
     parser = argparse.ArgumentParser(
-        description='Convert the given HTML document to text.')
-    parser.add_argument('input', nargs='?', default=None,
-                        help='Html input either from a file or a URL '
-                             '(default:stdin).')
-    parser.add_argument('-o', '--output', type=str,
-                        help='Output file (default:stdout).')
-    parser.add_argument('-e', '--encoding', type=str,
-                        help='Input encoding to use (default:utf-8 for '
-                             'files; detected server encoding for Web URLs).')
-    parser.add_argument('-i', '--display-image-captions',
-                        action='store_true', default=False,
-                        help='Display image captions (default:false).')
-    parser.add_argument('-d', '--deduplicate-image-captions',
-                        action='store_true', default=False,
-                        help='Deduplicate image captions (default:false).')
-    parser.add_argument('-l', '--display-link-targets',
-                        action='store_true', default=False,
-                        help='Display link targets (default:false).')
-    parser.add_argument('-a', '--display-anchor-urls',
-                        action='store_true', default=False,
-                        help='Display anchor URLs (default:false).')
-    parser.add_argument('-r', '--annotation-rules', default=None,
-                        help='Path to an optional JSON file containing rules '
-                             'for annotating the retrieved text.')
-    parser.add_argument('-p', '--postprocessor', type=get_postprocessor,
-                        default=lambda x: x,
-                        help='Optional component for postprocessing the '
-                             'result (html, surface, xml). ')
-    parser.add_argument('--indentation', default='extended',
-                        help='How to handle indentation (extended or strict;'
-                             ' default: extended).')
-    parser.add_argument('--table-cell-separator', default='  ',
-                        help='Separator to use between table cells (default: '
-                             'three spaces).')
-    parser.add_argument('--timeout', default=DEFAULT_TIMEOUT,
-                        help='Request timeout in seconds (default: '
-                             f'{DEFAULT_TIMEOUT}).')
-    parser.add_argument('-v', '--version',
-                        action='store_true', default=False,
-                        help='display version information')
+        description="Convert the given HTML document to text."
+    )
+    parser.add_argument(
+        "input",
+        nargs="?",
+        default=None,
+        help="Html input either from a file or a URL " "(default:stdin).",
+    )
+    parser.add_argument(
+        "-o", "--output", type=str, help="Output file (default:stdout)."
+    )
+    parser.add_argument(
+        "-e",
+        "--encoding",
+        type=str,
+        help="Input encoding to use (default:utf-8 for "
+        "files; detected server encoding for Web URLs).",
+    )
+    parser.add_argument(
+        "-i",
+        "--display-image-captions",
+        action="store_true",
+        default=False,
+        help="Display image captions (default:false).",
+    )
+    parser.add_argument(
+        "-d",
+        "--deduplicate-image-captions",
+        action="store_true",
+        default=False,
+        help="Deduplicate image captions (default:false).",
+    )
+    parser.add_argument(
+        "-l",
+        "--display-link-targets",
+        action="store_true",
+        default=False,
+        help="Display link targets (default:false).",
+    )
+    parser.add_argument(
+        "-a",
+        "--display-anchor-urls",
+        action="store_true",
+        default=False,
+        help="Display anchor URLs (default:false).",
+    )
+    parser.add_argument(
+        "-r",
+        "--annotation-rules",
+        default=None,
+        help="Path to an optional JSON file containing rules "
+        "for annotating the retrieved text.",
+    )
+    parser.add_argument(
+        "-p",
+        "--postprocessor",
+        type=get_postprocessor,
+        default=lambda x: x,
+        help="Optional component for postprocessing the "
+        "result (html, surface, xml). ",
+    )
+    parser.add_argument(
+        "--indentation",
+        default="extended",
+        help="How to handle indentation (extended or strict;" " default: extended).",
+    )
+    parser.add_argument(
+        "--table-cell-separator",
+        default="  ",
+        help="Separator to use between table cells (default: " "three spaces).",
+    )
+    parser.add_argument(
+        "--timeout",
+        default=DEFAULT_TIMEOUT,
+        help="Request timeout in seconds (default: " f"{DEFAULT_TIMEOUT}).",
+    )
+    parser.add_argument(
+        "-v",
+        "--version",
+        action="store_true",
+        default=False,
+        help="display version information",
+    )
     return parser
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = get_parser()
     args = parser.parse_args()
 
     if args.version:
-        print('Inscript HTML to text conversion (based on the inscriptis '
-              'library version {0})'.format(__version__))
-        print('Copyright (C)', __copyright__)
-        print('\nInscript comes with ABSOLUTELY NO WARRANTY.')
-        print('This is free software and you are welcome to redistribute it '
-              'under the terms of the {0}.'.format(__license__))
+        print(
+            "Inscript HTML to text conversion (based on the inscriptis "
+            "library version {0})".format(__version__)
+        )
+        print("Copyright (C)", __copyright__)
+        print("\nInscript comes with ABSOLUTELY NO WARRANTY.")
+        print(
+            "This is free software and you are welcome to redistribute it "
+            "under the terms of the {0}.".format(__license__)
+        )
         sys.exit(0)
 
     if not args.input:
         html_content = sys.stdin.read()
     elif Path(args.input).is_file():
-        with Path(args.input).open(encoding=args.encoding or DEFAULT_ENCODING,
-                                   errors='ignore') as f:
+        with Path(args.input).open(
+            encoding=args.encoding or DEFAULT_ENCODING, errors="ignore"
+        ) as f:
             html_content = f.read()
-    elif args.input.startswith('http://') or args.input.startswith('https://'):
+    elif args.input.startswith("http://") or args.input.startswith("https://"):
         req = requests.get(args.input, timeout=args.timeout)
         html_content = req.content.decode(args.encoding or req.encoding)
     else:
@@ -111,33 +158,38 @@ def get_parser():
             with Path(args.annotation_rules).open() as f:
                 annotation_rules = load(f)
         except IOError:
-            print("ERROR: Cannot open annotation rule file '{0}'.".format(
-                args.annotation_rules
-            ))
+            print(
+                "ERROR: Cannot open annotation rule file '{0}'.".format(
+                    args.annotation_rules
+                )
+            )
             sys.exit(-1)
     else:
         annotation_rules = None
 
-    css_profile = CSS_PROFILES['relaxed'] if args.indentation == 'extended' \
-        else CSS_PROFILES['strict']
-    config = ParserConfig(css=css_profile,
-                          display_images=args.display_image_captions,
-                          deduplicate_captions=args.deduplicate_image_captions,
-                          display_links=args.display_link_targets,
-                          display_anchors=args.display_anchor_urls,
-                          annotation_rules=annotation_rules,
-                          table_cell_separator=args.table_cell_separator)
+    css_profile = (
+        CSS_PROFILES["relaxed"]
+        if args.indentation == "extended"
+        else CSS_PROFILES["strict"]
+    )
+    config = ParserConfig(
+        css=css_profile,
+        display_images=args.display_image_captions,
+        deduplicate_captions=args.deduplicate_image_captions,
+        display_links=args.display_link_targets,
+        display_anchors=args.display_anchor_urls,
+        annotation_rules=annotation_rules,
+        table_cell_separator=args.table_cell_separator,
+    )
     if not annotation_rules:
         output = get_text(html_content, config)
     else:
-        output = args.postprocessor(
-            get_annotated_text(html_content, config))
-        if hasattr(args.postprocessor, 'verbatim') \
-           and not args.postprocessor.verbatim:
+        output = args.postprocessor(get_annotated_text(html_content, config))
+        if hasattr(args.postprocessor, "verbatim") and not args.postprocessor.verbatim:
             output = dumps(output)
 
     if args.output:
-        with Path(args.output).open('w', encoding=DEFAULT_ENCODING) as f:
+        with Path(args.output).open("w", encoding=DEFAULT_ENCODING) as f:
             f.write(output)
     else:
         print(output)
diff --git a/src/inscriptis/__init__.py b/src/inscriptis/__init__.py
index 2eb150e..2ca6414 100644
--- a/src/inscriptis/__init__.py
+++ b/src/inscriptis/__init__.py
@@ -68,7 +68,7 @@
 from inscriptis.model.config import ParserConfig
 from inscriptis.html_engine import Inscriptis
 
-RE_STRIP_XML_DECLARATION = re.compile(r'^<\?xml [^>]+?\?>')
+RE_STRIP_XML_DECLARATION = re.compile(r"^<\?xml [^>]+?\?>")
 
 
 def _get_html_tree(html_content: str) -> Optional[HtmlElement]:
@@ -85,13 +85,13 @@ def _get_html_tree(html_content: str) -> Optional[HtmlElement]:
         return None
 
     # strip XML declaration, if necessary
-    if html_content.startswith('<?xml '):
-        html_content = RE_STRIP_XML_DECLARATION.sub('', html_content, count=1)
+    if html_content.startswith("<?xml "):
+        html_content = RE_STRIP_XML_DECLARATION.sub("", html_content, count=1)
 
     try:
         return fromstring(html_content)
     except ParserError:
-        return fromstring('<pre>' + html_content + '</pre>')
+        return fromstring("<pre>" + html_content + "</pre>")
 
 
 def get_text(html_content: str, config: ParserConfig = None) -> str:
@@ -105,12 +105,12 @@ def get_text(html_content: str, config: ParserConfig = None) -> str:
       The text representation of the HTML content.
     """
     html_tree = _get_html_tree(html_content)
-    return Inscriptis(html_tree, config).get_text() if html_tree is not None \
-        else ''
+    return Inscriptis(html_tree, config).get_text() if html_tree is not None else ""
 
 
-def get_annotated_text(html_content: str,
-                       config: ParserConfig = None) -> Dict[str, Any]:
+def get_annotated_text(
+    html_content: str, config: ParserConfig = None
+) -> Dict[str, Any]:
     """Return a dictionary of the extracted text and annotations.
 
     Notes:
@@ -132,7 +132,5 @@ def get_annotated_text(html_content: str,
         return {}
 
     inscriptis = Inscriptis(html_tree, config)
-    labels = [(a.start, a.end, a.metadata)
-              for a in inscriptis.get_annotations()]
-    return {'text': inscriptis.get_text(),
-            'label': labels}
+    labels = [(a.start, a.end, a.metadata) for a in inscriptis.get_annotations()]
+    return {"text": inscriptis.get_text(), "label": labels}
diff --git a/src/inscriptis/annotation/__init__.py b/src/inscriptis/annotation/__init__.py
index 3d2b626..acf3d09 100644
--- a/src/inscriptis/annotation/__init__.py
+++ b/src/inscriptis/annotation/__init__.py
@@ -29,9 +29,13 @@ class Annotation(NamedTuple):
     """a tuple of tags to be attached to the annotation."""
 
 
-def horizontal_shift(annotations: List[Annotation], content_width: int,
-                     line_width: int, align: HorizontalAlignment,
-                     shift: int = 0) -> List[Annotation]:
+def horizontal_shift(
+    annotations: List[Annotation],
+    content_width: int,
+    line_width: int,
+    align: HorizontalAlignment,
+    shift: int = 0,
+) -> List[Annotation]:
     r"""Shift annotations based on the given line's formatting.
 
     Adjusts the start and end indices of annotations based on the line's
@@ -56,5 +60,6 @@ def horizontal_shift(annotations: List[Annotation], content_width: int,
     else:
         h_align = shift + (line_width - content_width) // 2
 
-    return [Annotation(a.start + h_align, a.end + h_align, a.metadata)
-            for a in annotations]
+    return [
+        Annotation(a.start + h_align, a.end + h_align, a.metadata) for a in annotations
+    ]
diff --git a/src/inscriptis/annotation/output/html.py b/src/inscriptis/annotation/output/html.py
index 2ea498b..f7da4a8 100644
--- a/src/inscriptis/annotation/output/html.py
+++ b/src/inscriptis/annotation/output/html.py
@@ -5,8 +5,7 @@
 
 from inscriptis.annotation.output import AnnotationProcessor
 
-COLOR_SCHEMA = ('#D8115980', '#8F2D5680', '#21838080',
-                '#FBB13C80', '#73D2DE80')
+COLOR_SCHEMA = ("#D8115980", "#8F2D5680", "#21838080", "#FBB13C80", "#73D2DE80")
 
 
 class HtmlExtractor(AnnotationProcessor):
@@ -21,39 +20,43 @@ class HtmlExtractor(AnnotationProcessor):
     def __call__(self, annotated_text: Dict[str, Any]) -> str:
         tag_indices = defaultdict(list)
 
-        for start, end, label in sorted(annotated_text['label']):
+        for start, end, label in sorted(annotated_text["label"]):
             tag_indices[start].append(label)
-            tag_indices[end].append('/' + label)
+            tag_indices[end].append("/" + label)
 
         open_tags = []
-        tagged_content = ['<html><head><style>',
-                          self._get_css(annotated_text['label']),
-                          '</style></head><body><pre>']
-        for idx, ch in enumerate(annotated_text['text']):
+        tagged_content = [
+            "<html><head><style>",
+            self._get_css(annotated_text["label"]),
+            "</style></head><body><pre>",
+        ]
+        for idx, ch in enumerate(annotated_text["text"]):
             if idx in tag_indices:
                 tags = tag_indices[idx]
                 # close tags:
-                for _ in (t for t in sorted(tags, reverse=True)
-                          if t.startswith('/')):
+                for _ in (t for t in sorted(tags, reverse=True) if t.startswith("/")):
                     open_tags.pop()
-                    tagged_content.append('</span>')
+                    tagged_content.append("</span>")
                 # open tags
-                for tag in (t for t in sorted(tags, reverse=True)
-                            if not t.startswith('/')):
+                for tag in (
+                    t for t in sorted(tags, reverse=True) if not t.startswith("/")
+                ):
                     open_tags.append(tag)
                     tagged_content.append(
                         '<span class="{tag}-label">{tag}</span>'
-                        '<span class="{tag}">'.format(tag=tag))
-
-            if ch == '\n':
-                tagged_content.extend(['</span>' for _ in open_tags])
-                tagged_content.append('</pre>\n<pre>')
-                tagged_content.extend(['<span class="{tag}">'.format(tag=tag)
-                                       for tag in open_tags])
+                        '<span class="{tag}">'.format(tag=tag)
+                    )
+
+            if ch == "\n":
+                tagged_content.extend(["</span>" for _ in open_tags])
+                tagged_content.append("</pre>\n<pre>")
+                tagged_content.extend(
+                    ['<span class="{tag}">'.format(tag=tag) for tag in open_tags]
+                )
             else:
                 tagged_content.append(ch)
 
-        return ''.join(tagged_content) + '</pre></body></html>'
+        return "".join(tagged_content) + "</pre></body></html>"
 
     @staticmethod
     def _get_label_colors(labels: List[str]) -> Dict[str, str]:
@@ -84,18 +87,18 @@ def _get_css(self, labels: List[str]) -> str:
         css = []
         for label, color in sorted(self._get_label_colors(labels).items()):
             css.append(
-                'pre{{'
-                '  position: relative;\n'
-                '}}\n'
-                '.{label} {{\n'
-                '  background-color: {color};\n'
-                '  border-radius: 0.4em;\n'
-                '}}\n'
-                '.{label}-label {{\n'
-                '  top: -1.0em;\n'
+                "pre{{"
+                "  position: relative;\n"
+                "}}\n"
+                ".{label} {{\n"
+                "  background-color: {color};\n"
+                "  border-radius: 0.4em;\n"
+                "}}\n"
+                ".{label}-label {{\n"
+                "  top: -1.0em;\n"
                 '  content: "{label}";\n'
-                '  position: absolute;\n'
-                '  background-color: {color};\n'
-                '  font-size: 75%; }}\n'.format(label=label,
-                                                color=color))
-        return '\n'.join(css)
+                "  position: absolute;\n"
+                "  background-color: {color};\n"
+                "  font-size: 75%; }}\n".format(label=label, color=color)
+            )
+        return "\n".join(css)
diff --git a/src/inscriptis/annotation/output/surface.py b/src/inscriptis/annotation/output/surface.py
index 52472d4..e4e5252 100644
--- a/src/inscriptis/annotation/output/surface.py
+++ b/src/inscriptis/annotation/output/surface.py
@@ -21,7 +21,9 @@ def __call__(self, annotated_text: Dict[str, Any]) -> Dict[str, Any]:
             An extended dictionary which contains the extracted surface-forms
             of the annotations under the key 'surface'.
         """
-        surface_forms = [(label, annotated_text['text'][start:end])
-                         for start, end, label in annotated_text['label']]
-        annotated_text['surface'] = surface_forms
+        surface_forms = [
+            (label, annotated_text["text"][start:end])
+            for start, end, label in annotated_text["label"]
+        ]
+        annotated_text["surface"] = surface_forms
         return annotated_text
diff --git a/src/inscriptis/annotation/output/xml.py b/src/inscriptis/annotation/output/xml.py
index 9c983f7..c31aa06 100644
--- a/src/inscriptis/annotation/output/xml.py
+++ b/src/inscriptis/annotation/output/xml.py
@@ -22,23 +22,28 @@ def __call__(self, annotated_text: Dict[str, Any]) -> str:
         """
         tag_indices = defaultdict(list)
 
-        for start, end, label in sorted(annotated_text['label']):
+        for start, end, label in sorted(annotated_text["label"]):
             tag_indices[start].append(label)
-            tag_indices[end].append('/' + label)
+            tag_indices[end].append("/" + label)
 
         current_idx = 0
         tagged_content = ['<?xml version="1.0" encoding="UTF-8" ?>\n']
-        text = annotated_text['text']
+        text = annotated_text["text"]
         for index, tags in sorted(tag_indices.items()):
             tagged_content.append(text[current_idx:index])
             # close tags
-            tagged_content.extend(['<' + tag + '>'
-                                   for tag in sorted(tags, reverse=True)
-                                   if tag.startswith('/')])
+            tagged_content.extend(
+                [
+                    "<" + tag + ">"
+                    for tag in sorted(tags, reverse=True)
+                    if tag.startswith("/")
+                ]
+            )
             # open tags
-            tagged_content.extend(['<' + tag + '>' for tag in sorted(tags)
-                                   if not tag.startswith('/')])
+            tagged_content.extend(
+                ["<" + tag + ">" for tag in sorted(tags) if not tag.startswith("/")]
+            )
             current_idx = index
         tagged_content.append(text[current_idx:])
 
-        return ''.join(tagged_content)
+        return "".join(tagged_content)
diff --git a/src/inscriptis/annotation/parser.py b/src/inscriptis/annotation/parser.py
index 500df4f..56bdf61 100644
--- a/src/inscriptis/annotation/parser.py
+++ b/src/inscriptis/annotation/parser.py
@@ -34,10 +34,15 @@ class ApplyAnnotation:
                      match_value.
     """
 
-    __slots__ = ('annotations', 'match_tag', 'match_value', 'attr', 'matcher')
-
-    def __init__(self, annotations: tuple, attr: str, match_tag: str = None,
-                 match_value: str = None):
+    __slots__ = ("annotations", "match_tag", "match_value", "attr", "matcher")
+
+    def __init__(
+        self,
+        annotations: tuple,
+        attr: str,
+        match_tag: str = None,
+        match_value: str = None,
+    ):
         self.annotations = tuple(annotations)
         self.attr = attr
         self.match_tag = match_tag
@@ -46,17 +51,18 @@ def __init__(self, annotations: tuple, attr: str, match_tag: str = None,
     def apply(self, attr_value: str, html_element: HtmlElement):
         """Apply the annotation to HtmlElements with matching tags."""
         if (self.match_tag and self.match_tag != html_element.tag) or (
-                self.match_value and self.match_value
-                not in attr_value.split()):
+            self.match_value and self.match_value not in attr_value.split()
+        ):
             return
 
         html_element.annotation += self.annotations
 
     def __str__(self):
-        return '<ApplyAnnotation: {tag}#{attr}={value}'.format(
-            tag=self.match_tag or '{any}',
-            attr=self.attr or '{any}',
-            value=self.match_value or '{any}')
+        return "<ApplyAnnotation: {tag}#{attr}={value}".format(
+            tag=self.match_tag or "{any}",
+            attr=self.attr or "{any}",
+            value=self.match_value or "{any}",
+        )
 
     __repr__ = __str__
 
@@ -79,7 +85,7 @@ def __init__(self, css_profile, model: dict):
         self.css = css_profile
 
     @staticmethod
-    def _parse(model: dict) -> 'AnnotationModel':
+    def _parse(model: dict) -> "AnnotationModel":
         """Compute the AnnotationModel from a model dictionary.
 
         Returns:
@@ -88,14 +94,13 @@ def _parse(model: dict) -> 'AnnotationModel':
         tags = defaultdict(list)
         attrs = []
         for key, annotations in model.items():
-            if '#' in key:
-                tag, attr = key.split('#')
-                if '=' in attr:
-                    attr, value = attr.split('=')
+            if "#" in key:
+                tag, attr = key.split("#")
+                if "=" in attr:
+                    attr, value = attr.split("=")
                 else:
                     value = None
-                attrs.append(ApplyAnnotation(annotations, attr,
-                                             tag, value))
+                attrs.append(ApplyAnnotation(annotations, attr, tag, value))
             else:
                 tags[key].extend(annotations)
         return tags, attrs
diff --git a/src/inscriptis/css_profiles.py b/src/inscriptis/css_profiles.py
index 3d08c45..51889b3 100644
--- a/src/inscriptis/css_profiles.py
+++ b/src/inscriptis/css_profiles.py
@@ -12,73 +12,53 @@
 from inscriptis.html_properties import Display, WhiteSpace
 
 STRICT_CSS_PROFILE = {
-    'body': HtmlElement(display=Display.inline,
-                        whitespace=WhiteSpace.normal),
-    'head': HtmlElement(display=Display.none),
-    'link': HtmlElement(display=Display.none),
-    'meta': HtmlElement(display=Display.none),
-    'script': HtmlElement(display=Display.none),
-    'title': HtmlElement(display=Display.none),
-    'style': HtmlElement(display=Display.none),
-
-    'p': HtmlElement(display=Display.block, margin_before=1,
-                     margin_after=1),
-    'figure': HtmlElement(display=Display.block, margin_before=1,
-                          margin_after=1),
-
-    'h1': HtmlElement(display=Display.block, margin_before=1,
-                      margin_after=1),
-    'h2': HtmlElement(display=Display.block, margin_before=1,
-                      margin_after=1),
-    'h3': HtmlElement(display=Display.block, margin_before=1,
-                      margin_after=1),
-    'h4': HtmlElement(display=Display.block, margin_before=1,
-                      margin_after=1),
-    'h5': HtmlElement(display=Display.block, margin_before=1,
-                      margin_after=1),
-    'h6': HtmlElement(display=Display.block, margin_before=1,
-                      margin_after=1),
-
-    'ul': HtmlElement(display=Display.block, margin_before=0,
-                      margin_after=0, padding_inline=4),
-    'ol': HtmlElement(display=Display.block, margin_before=0,
-                      margin_after=0, padding_inline=4),
-    'li': HtmlElement(display=Display.block),
-
-    'address': HtmlElement(display=Display.block),
-    'article': HtmlElement(display=Display.block),
-    'aside': HtmlElement(display=Display.block),
-    'div': HtmlElement(display=Display.block),
-    'footer': HtmlElement(display=Display.block),
-    'header': HtmlElement(display=Display.block),
-    'hgroup': HtmlElement(display=Display.block),
-    'layer': HtmlElement(display=Display.block),
-    'main': HtmlElement(display=Display.block),
-    'nav': HtmlElement(display=Display.block),
-    'figcaption': HtmlElement(display=Display.block),
-
-    'blockquote': HtmlElement(display=Display.block),
-
-    'q': HtmlElement(prefix='"', suffix='"'),
-
+    "body": HtmlElement(display=Display.inline, whitespace=WhiteSpace.normal),
+    "head": HtmlElement(display=Display.none),
+    "link": HtmlElement(display=Display.none),
+    "meta": HtmlElement(display=Display.none),
+    "script": HtmlElement(display=Display.none),
+    "title": HtmlElement(display=Display.none),
+    "style": HtmlElement(display=Display.none),
+    "p": HtmlElement(display=Display.block, margin_before=1, margin_after=1),
+    "figure": HtmlElement(display=Display.block, margin_before=1, margin_after=1),
+    "h1": HtmlElement(display=Display.block, margin_before=1, margin_after=1),
+    "h2": HtmlElement(display=Display.block, margin_before=1, margin_after=1),
+    "h3": HtmlElement(display=Display.block, margin_before=1, margin_after=1),
+    "h4": HtmlElement(display=Display.block, margin_before=1, margin_after=1),
+    "h5": HtmlElement(display=Display.block, margin_before=1, margin_after=1),
+    "h6": HtmlElement(display=Display.block, margin_before=1, margin_after=1),
+    "ul": HtmlElement(
+        display=Display.block, margin_before=0, margin_after=0, padding_inline=4
+    ),
+    "ol": HtmlElement(
+        display=Display.block, margin_before=0, margin_after=0, padding_inline=4
+    ),
+    "li": HtmlElement(display=Display.block),
+    "address": HtmlElement(display=Display.block),
+    "article": HtmlElement(display=Display.block),
+    "aside": HtmlElement(display=Display.block),
+    "div": HtmlElement(display=Display.block),
+    "footer": HtmlElement(display=Display.block),
+    "header": HtmlElement(display=Display.block),
+    "hgroup": HtmlElement(display=Display.block),
+    "layer": HtmlElement(display=Display.block),
+    "main": HtmlElement(display=Display.block),
+    "nav": HtmlElement(display=Display.block),
+    "figcaption": HtmlElement(display=Display.block),
+    "blockquote": HtmlElement(display=Display.block),
+    "q": HtmlElement(prefix='"', suffix='"'),
     # Handling of <pre>
-    'pre': HtmlElement(display=Display.block,
-                       whitespace=WhiteSpace.pre),
-    'xmp': HtmlElement(display=Display.block,
-                       whitespace=WhiteSpace.pre),
-    'listing': HtmlElement(display=Display.block,
-                           whitespace=WhiteSpace.pre),
-    'plaintext': HtmlElement(display=Display.block,
-                             whitespace=WhiteSpace.pre),
+    "pre": HtmlElement(display=Display.block, whitespace=WhiteSpace.pre),
+    "xmp": HtmlElement(display=Display.block, whitespace=WhiteSpace.pre),
+    "listing": HtmlElement(display=Display.block, whitespace=WhiteSpace.pre),
+    "plaintext": HtmlElement(display=Display.block, whitespace=WhiteSpace.pre),
 }
 
 RELAXED_CSS_PROFILE = STRICT_CSS_PROFILE.copy()
-RELAXED_CSS_PROFILE['div'] = HtmlElement(display=Display.block,
-                                         padding_inline=2)
-RELAXED_CSS_PROFILE['span'] = HtmlElement(display=Display.inline,
-                                          prefix=' ', suffix=' ',
-                                          limit_whitespace_affixes=True)
+RELAXED_CSS_PROFILE["div"] = HtmlElement(display=Display.block, padding_inline=2)
+RELAXED_CSS_PROFILE["span"] = HtmlElement(
+    display=Display.inline, prefix=" ", suffix=" ", limit_whitespace_affixes=True
+)
 
 
-CSS_PROFILES = {'strict': STRICT_CSS_PROFILE,
-                'relaxed': RELAXED_CSS_PROFILE}
+CSS_PROFILES = {"strict": STRICT_CSS_PROFILE, "relaxed": RELAXED_CSS_PROFILE}
diff --git a/src/inscriptis/html_engine.py b/src/inscriptis/html_engine.py
index e2c7f3e..35496fb 100644
--- a/src/inscriptis/html_engine.py
+++ b/src/inscriptis/html_engine.py
@@ -35,34 +35,33 @@ class Inscriptis:
       text = parser.get_text()
     """
 
-    UL_COUNTER = ('* ', '+ ', 'o ', '- ')
+    UL_COUNTER = ("* ", "+ ", "o ", "- ")
     UL_COUNTER_LEN = len(UL_COUNTER)
 
-    def __init__(self, html_tree: lxml.html.HtmlElement,
-                 config: ParserConfig = None):
+    def __init__(self, html_tree: lxml.html.HtmlElement, config: ParserConfig = None):
         # use the default configuration, if no config object is provided
         self.config = config or ParserConfig()
 
         # setup start and end tag call tables
         self.start_tag_handler_dict = {
-            'table': self._start_table,
-            'tr': self._start_tr,
-            'td': self._start_td,
-            'th': self._start_td,
-            'ul': self._start_ul,
-            'ol': self._start_ol,
-            'li': self._start_li,
-            'br': self._newline,
-            'a': self._start_a if self.config.parse_a() else None,
-            'img': self._start_img if self.config.display_images else None,
+            "table": self._start_table,
+            "tr": self._start_tr,
+            "td": self._start_td,
+            "th": self._start_td,
+            "ul": self._start_ul,
+            "ol": self._start_ol,
+            "li": self._start_li,
+            "br": self._newline,
+            "a": self._start_a if self.config.parse_a() else None,
+            "img": self._start_img if self.config.display_images else None,
         }
         self.end_tag_handler_dict = {
-            'table': self._end_table,
-            'ul': self._end_ul,
-            'ol': self._end_ol,
-            'td': self._end_td,
-            'th': self._end_td,
-            'a': self._end_a if self.config.parse_a() else None,
+            "table": self._end_table,
+            "ul": self._end_ul,
+            "ol": self._end_ol,
+            "td": self._end_td,
+            "th": self._end_td,
+            "a": self._end_a if self.config.parse_a() else None,
         }
 
         # instance variables
@@ -70,13 +69,13 @@ def __init__(self, html_tree: lxml.html.HtmlElement,
         self.css = self.config.css
         self.apply_attributes = self.config.attribute_handler.apply_attributes
 
-        self.tags = [self.css['body'].set_canvas(self.canvas)]
+        self.tags = [self.css["body"].set_canvas(self.canvas)]
         self.current_table = []
         self.li_counter = []
         self.last_caption = None
 
         # used if display_links is enabled
-        self.link_target = ''
+        self.link_target = ""
 
         # crawl the html tree
         self._parse_html_tree(html_tree)
@@ -133,8 +132,13 @@ def handle_starttag(self, tag, attrs):
         """
         # use the css to handle tags known to it :)
         cur = self.tags[-1].get_refined_html_element(
-            self.apply_attributes(attrs, html_element=self.css.get(
-                tag, DEFAULT_HTML_ELEMENT).__copy__().set_tag(tag)))
+            self.apply_attributes(
+                attrs,
+                html_element=self.css.get(tag, DEFAULT_HTML_ELEMENT)
+                .__copy__()
+                .set_tag(tag),
+            )
+        )
         self.tags.append(cur)
 
         handler = self.start_tag_handler_dict.get(tag)
@@ -161,25 +165,26 @@ def _end_ul(self):
         self.li_counter.pop()
 
     def _start_img(self, attrs):
-        image_text = attrs.get('alt', '') or attrs.get('title', '')
-        if image_text and not (self.config.deduplicate_captions
-                               and image_text == self.last_caption):
-            self.tags[-1].write(f'[{image_text}]')
+        image_text = attrs.get("alt", "") or attrs.get("title", "")
+        if image_text and not (
+            self.config.deduplicate_captions and image_text == self.last_caption
+        ):
+            self.tags[-1].write(f"[{image_text}]")
             self.last_caption = image_text
 
     def _start_a(self, attrs):
-        self.link_target = ''
+        self.link_target = ""
         if self.config.display_links:
-            self.link_target = attrs.get('href', '')
+            self.link_target = attrs.get("href", "")
         if self.config.display_anchors:
-            self.link_target = self.link_target or attrs.get('name', '')
+            self.link_target = self.link_target or attrs.get("name", "")
 
         if self.link_target:
-            self.tags[-1].write('[')
+            self.tags[-1].write("[")
 
     def _end_a(self):
         if self.link_target:
-            self.tags[-1].write(f']({self.link_target})')
+            self.tags[-1].write(f"]({self.link_target})")
 
     def _start_ol(self, _):
         self.li_counter.append(1)
@@ -188,20 +193,23 @@ def _end_ol(self):
         self.li_counter.pop()
 
     def _start_li(self, _):
-        bullet = self.li_counter[-1] if self.li_counter else '* '
+        bullet = self.li_counter[-1] if self.li_counter else "* "
         if isinstance(bullet, int):
             self.li_counter[-1] += 1
-            self.tags[-1].list_bullet = f'{bullet}. '
+            self.tags[-1].list_bullet = f"{bullet}. "
         else:
             self.tags[-1].list_bullet = bullet
 
-        self.tags[-1].write('')
+        self.tags[-1].write("")
 
     def _start_table(self, _):
         self.tags[-1].set_canvas(Canvas())
-        self.current_table.append(Table(
-            left_margin_len=self.tags[-1].canvas.left_margin,
-            cell_separator=self.config.table_cell_separator))
+        self.current_table.append(
+            Table(
+                left_margin_len=self.tags[-1].canvas.left_margin,
+                cell_separator=self.config.table_cell_separator,
+            )
+        )
 
     def _start_tr(self, _):
         if self.current_table:
@@ -210,8 +218,9 @@ def _start_tr(self, _):
     def _start_td(self, _):
         if self.current_table:
             # open td tag
-            table_cell = TableCell(align=self.tags[-1].align,
-                                   valign=self.tags[-1].valign)
+            table_cell = TableCell(
+                align=self.tags[-1].align, valign=self.tags[-1].valign
+            )
             self.tags[-1].canvas = table_cell
             self.current_table[-1].add_cell(table_cell)
 
@@ -239,17 +248,18 @@ def _end_table(self):
         if self.tags[-1].annotation:
             end_idx = self.tags[-2].canvas.current_block.idx
             for a in self.tags[-1].annotation:
-                self.tags[-2].canvas.annotations.append(Annotation(
-                    start_idx, end_idx, a))
+                self.tags[-2].canvas.annotations.append(
+                    Annotation(start_idx, end_idx, a)
+                )
 
         # transfer in-table annotations
         self.tags[-2].canvas.annotations.extend(
-            table.get_annotations(start_idx, self.tags[-2].canvas.left_margin))
+            table.get_annotations(start_idx, self.tags[-2].canvas.left_margin)
+        )
 
     def _newline(self, _):
         self.tags[-1].canvas.write_newline()
 
     def get_bullet(self) -> str:
         """Return the bullet that correspond to the given index."""
-        return Inscriptis.UL_COUNTER[
-            len(self.li_counter) % Inscriptis.UL_COUNTER_LEN]
+        return Inscriptis.UL_COUNTER[len(self.li_counter) % Inscriptis.UL_COUNTER_LEN]
diff --git a/src/inscriptis/html_properties.py b/src/inscriptis/html_properties.py
index b1d24ea..4dc9dea 100644
--- a/src/inscriptis/html_properties.py
+++ b/src/inscriptis/html_properties.py
@@ -39,11 +39,11 @@ class WhiteSpace(Enum):
 class HorizontalAlignment(Enum):
     """Specify the content's horizontal alignment."""
 
-    left = '<'
+    left = "<"
     """Left alignment of the block's content."""
-    right = '>'
+    right = ">"
     """Right alignment of the block's content."""
-    center = '^'
+    center = "^"
     """Center the block's content."""
 
 
diff --git a/src/inscriptis/metadata.py b/src/inscriptis/metadata.py
index 2b2a771..f7112f0 100644
--- a/src/inscriptis/metadata.py
+++ b/src/inscriptis/metadata.py
@@ -1,7 +1,7 @@
 """Inscriptis metadata information."""
 
-__author__ = 'Albert Weichselbraun, Fabian Odoni'
-__author_email__ = 'albert.weichselbraun@fhgr.ch, fabian.odoni@fhgr.ch'
-__copyright__ = '2016-2024 Albert Weichselbraun, Fabian Odoni'
-__license__ = 'Apache 2.0'
-__version__ = '2.3.3'
+__author__ = "Albert Weichselbraun, Fabian Odoni"
+__author_email__ = "albert.weichselbraun@fhgr.ch, fabian.odoni@fhgr.ch"
+__copyright__ = "2016-2024 Albert Weichselbraun, Fabian Odoni"
+__license__ = "Apache 2.0"
+__version__ = "2.3.3"
diff --git a/src/inscriptis/model/attribute.py b/src/inscriptis/model/attribute.py
index 0102e3f..f4f8efc 100644
--- a/src/inscriptis/model/attribute.py
+++ b/src/inscriptis/model/attribute.py
@@ -10,9 +10,9 @@
 from inscriptis.model.html_element import HtmlElement
 
 DEFAULT_ATTRIBUTE_MAP = {
-    'style': CssParse.attr_style,
-    'align': CssParse.attr_horizontal_align,
-    'valign': CssParse.attr_vertical_align
+    "style": CssParse.attr_style,
+    "align": CssParse.attr_horizontal_align,
+    "valign": CssParse.attr_vertical_align,
 }
 
 
@@ -26,9 +26,11 @@ def merge_function(func1, func2):
         func1: the first function
         func2: the second function
     """
+
     def merged(*args):
         func1(*args)
         func2(*args)
+
     return merged
 
 
@@ -46,16 +48,20 @@ class Attribute:
     def __init__(self):
         self.attribute_mapping = DEFAULT_ATTRIBUTE_MAP
 
-    def apply_attributes(self, attributes: Dict[str, str],
-                         html_element: HtmlElement) -> HtmlElement:
+    def apply_attributes(
+        self, attributes: Dict[str, str], html_element: HtmlElement
+    ) -> HtmlElement:
         """Apply the attributes to the given HTML element.
 
         Args:
             attributes: the list of attributes
             html_element: the HTML element for which the attributes are parsed
         """
-        supported_attributes = ((name, val) for name, val in attributes.items()
-                                if name in self.attribute_mapping)
+        supported_attributes = (
+            (name, val)
+            for name, val in attributes.items()
+            if name in self.attribute_mapping
+        )
         for attr_name, attr_value in supported_attributes:
             self.attribute_mapping[attr_name](attr_value, html_element)
         return html_element
@@ -63,6 +69,9 @@ def apply_attributes(self, attributes: Dict[str, str],
     def merge_attribute_map(self, annotations: List[ApplyAnnotation] = None):
         attributes = copy(self.attribute_mapping)
         for a in annotations:
-            attributes[a.attr] = a.apply if a.attr not in attributes \
+            attributes[a.attr] = (
+                a.apply
+                if a.attr not in attributes
                 else merge_function(attributes[a.attr], a.apply)
+            )
         self.attribute_mapping = attributes
diff --git a/src/inscriptis/model/canvas/__init__.py b/src/inscriptis/model/canvas/__init__.py
index ef41254..7cf5ca4 100644
--- a/src/inscriptis/model/canvas/__init__.py
+++ b/src/inscriptis/model/canvas/__init__.py
@@ -37,8 +37,13 @@ class Canvas:
         _open_annotations: a map of open tags that contain annotations.
     """
 
-    __slots__ = ('annotations', 'blocks', 'current_block', '_open_annotations',
-                 'margin')
+    __slots__ = (
+        "annotations",
+        "blocks",
+        "current_block",
+        "_open_annotations",
+        "margin",
+    )
 
     def __init__(self):
         self.margin = 1000  # margin to the previous block
@@ -64,15 +69,14 @@ def open_block(self, tag: HtmlElement):
         # write missing bullets, if no content has been written
         if not self._flush_inline() and tag.list_bullet:
             self.write_unconsumed_bullet()
-        self.current_block.prefix.register_prefix(tag.padding_inline,
-                                                  tag.list_bullet)
+        self.current_block.prefix.register_prefix(tag.padding_inline, tag.list_bullet)
 
         # write the block margin
         required_margin = max(tag.previous_margin_after, tag.margin_before)
         if required_margin > self.margin:
             required_newlines = required_margin - self.margin
             self.current_block.idx += required_newlines
-            self.blocks.append('\n' * (required_newlines - 1))
+            self.blocks.append("\n" * (required_newlines - 1))
             self.margin = required_margin
 
     def write_unconsumed_bullet(self):
@@ -84,8 +88,7 @@ def write_unconsumed_bullet(self):
             self.current_block = self.current_block.new_block()
             self.margin = 0
 
-    def write(self, tag: HtmlElement, text: str,
-              whitespace: WhiteSpace = None) -> None:
+    def write(self, tag: HtmlElement, text: str, whitespace: WhiteSpace = None) -> None:
         """Write the given text to the current block."""
         self.current_block.merge(text, whitespace or tag.whitespace)
 
@@ -110,7 +113,8 @@ def close_tag(self, tag: HtmlElement) -> None:
 
             for annotation in tag.annotation:
                 self.annotations.append(
-                    Annotation(start_idx, self.current_block.idx, annotation))
+                    Annotation(start_idx, self.current_block.idx, annotation)
+                )
 
     def close_block(self, tag: HtmlElement):
         """Close the given HtmlElement by writing its bottom margin.
@@ -121,18 +125,18 @@ def close_block(self, tag: HtmlElement):
         if tag.margin_after > self.margin:
             required_newlines = tag.margin_after - self.margin
             self.current_block.idx += required_newlines
-            self.blocks.append('\n' * (required_newlines - 1))
+            self.blocks.append("\n" * (required_newlines - 1))
             self.margin = tag.margin_after
 
     def write_newline(self):
         if not self._flush_inline():
-            self.blocks.append('')
+            self.blocks.append("")
             self.current_block = self.current_block.new_block()
 
     def get_text(self) -> str:
         """Provide a text representation of the Canvas."""
         self._flush_inline()
-        return '\n'.join(self.blocks)
+        return "\n".join(self.blocks)
 
     def _flush_inline(self) -> bool:
         """Attempt to flush the content in self.current_block into a new block.
diff --git a/src/inscriptis/model/canvas/block.py b/src/inscriptis/model/canvas/block.py
index 23c6906..59ba05f 100644
--- a/src/inscriptis/model/canvas/block.py
+++ b/src/inscriptis/model/canvas/block.py
@@ -17,12 +17,12 @@ class Block:
         prefix: prefix used within the current block.
     """
 
-    __slots__ = ('idx', 'prefix', '_content', 'collapsable_whitespace')
+    __slots__ = ("idx", "prefix", "_content", "collapsable_whitespace")
 
     def __init__(self, idx: int, prefix: str):
         self.idx = idx
         self.prefix = prefix
-        self._content = ''
+        self._content = ""
         self.collapsable_whitespace = True
 
     def merge(self, text: str, whitespace: WhiteSpace) -> None:
@@ -50,12 +50,15 @@ def merge_normal_text(self, text: str) -> None:
                 normalized_text.append(ch)
                 self.collapsable_whitespace = False
             elif not self.collapsable_whitespace:
-                normalized_text.append(' ')
+                normalized_text.append(" ")
                 self.collapsable_whitespace = True
 
         if normalized_text:
-            text = ''.join((self.prefix.first, *normalized_text)) if not \
-                self._content else ''.join(normalized_text)
+            text = (
+                "".join((self.prefix.first, *normalized_text))
+                if not self._content
+                else "".join(normalized_text)
+            )
             text = unescape(text)
             self._content += text
             self.idx += len(text)
@@ -66,8 +69,7 @@ def merge_pre_text(self, text: str) -> None:
         Args:
             text: the text to merge
         """
-        text = ''.join((self.prefix.first,
-                        text.replace('\n', '\n' + self.prefix.rest)))
+        text = "".join((self.prefix.first, text.replace("\n", "\n" + self.prefix.rest)))
         text = unescape(text)
         self._content += text
         self.idx += len(text)
@@ -81,12 +83,12 @@ def content(self):
         if not self.collapsable_whitespace:
             return self._content
 
-        if self._content.endswith(' '):
+        if self._content.endswith(" "):
             self._content = self._content[:-1]
             self.idx -= 1
         return self._content
 
-    def new_block(self) -> 'Block':
+    def new_block(self) -> "Block":
         """Return a new Block based on the current one."""
         self.prefix.consumed = False
         return Block(idx=self.idx + 1, prefix=self.prefix)
diff --git a/src/inscriptis/model/canvas/prefix.py b/src/inscriptis/model/canvas/prefix.py
index ca0b768..8a68066 100644
--- a/src/inscriptis/model/canvas/prefix.py
+++ b/src/inscriptis/model/canvas/prefix.py
@@ -14,7 +14,7 @@ class Prefix:
         consumed: whether the current bullet has already been consumed.
     """
 
-    __slots__ = ('current_padding', 'paddings', 'bullets', 'consumed')
+    __slots__ = ("current_padding", "paddings", "bullets", "consumed")
 
     def __init__(self):
         self.current_padding = 0
@@ -31,7 +31,7 @@ def register_prefix(self, padding_inline, bullet):
         """
         self.current_padding += padding_inline
         self.paddings.append(padding_inline)
-        self.bullets.append(bullet if bullet else '')
+        self.bullets.append(bullet if bullet else "")
 
     def remove_last_prefix(self):
         """Remove the last prefix from the list."""
@@ -41,15 +41,15 @@ def remove_last_prefix(self):
 
     def pop_next_bullet(self):
         """Pop the next bullet to use, if any bullet is available."""
-        next_bullet_idx = next((-idx for idx, val
-                                in enumerate(reversed(self.bullets))
-                                if val), 1) - 1
+        next_bullet_idx = (
+            next((-idx for idx, val in enumerate(reversed(self.bullets)) if val), 1) - 1
+        )
 
         if not next_bullet_idx:
-            return ''
+            return ""
 
         bullet = self.bullets[next_bullet_idx]
-        self.bullets[next_bullet_idx] = ''
+        self.bullets[next_bullet_idx] = ""
         return bullet
 
     @property
@@ -62,12 +62,11 @@ def first(self):
             further prefixes should be used for a line.
         """
         if self.consumed:
-            return ''
+            return ""
 
         self.consumed = True
         bullet = self.pop_next_bullet()
-        return ' ' * (self.current_padding - len(bullet)) \
-               + bullet
+        return " " * (self.current_padding - len(bullet)) + bullet
 
     @property
     def unconsumed_bullet(self):
@@ -78,15 +77,14 @@ def unconsumed_bullet(self):
             not been consumed yet.
         """
         if self.consumed:
-            return ''
+            return ""
 
         bullet = self.pop_next_bullet()
         if not bullet:
-            return ''
+            return ""
 
         padding = self.current_padding - self.paddings[-1]
-        return ' ' * (padding - len(bullet)) \
-               + bullet
+        return " " * (padding - len(bullet)) + bullet
 
     @property
     def rest(self):
@@ -96,4 +94,4 @@ def rest(self):
         need to be prefixed with the right padding to preserver the
         indentation.
         """
-        return ' ' * self.current_padding
+        return " " * self.current_padding
diff --git a/src/inscriptis/model/config.py b/src/inscriptis/model/config.py
index 9bc216d..0aaeb7a 100644
--- a/src/inscriptis/model/config.py
+++ b/src/inscriptis/model/config.py
@@ -9,19 +9,22 @@
 from inscriptis.model.attribute import Attribute
 from inscriptis.model.html_element import HtmlElement
 
-DEFAULT_CSS_PROFILE_NAME = 'relaxed'
+DEFAULT_CSS_PROFILE_NAME = "relaxed"
 
 
 class ParserConfig:
     """Encapsulate configuration options and CSS definitions."""
 
-    def __init__(self, css: Dict[str, HtmlElement] = None,
-                 display_images: bool = False,
-                 deduplicate_captions: bool = False,
-                 display_links: bool = False,
-                 display_anchors: bool = False,
-                 annotation_rules: Attribute = None,
-                 table_cell_separator: str = '  '):
+    def __init__(
+        self,
+        css: Dict[str, HtmlElement] = None,
+        display_images: bool = False,
+        deduplicate_captions: bool = False,
+        display_links: bool = False,
+        display_anchors: bool = False,
+        annotation_rules: Attribute = None,
+        table_cell_separator: str = "  ",
+    ):
         """Create a ParserConfig configuration.
 
         Args:
@@ -47,13 +50,11 @@ def __init__(self, css: Dict[str, HtmlElement] = None,
         if annotation_rules:
             # ensure that we do not modify the original model or its
             # members.
-            annotation_model = AnnotationModel(deepcopy(self.css),
-                                               annotation_rules)
+            annotation_model = AnnotationModel(deepcopy(self.css), annotation_rules)
             # css with annotation support
             self.css = annotation_model.css
             # attribute handler with annotation support
-            self.attribute_handler.merge_attribute_map(
-                annotation_model.css_attr)
+            self.attribute_handler.merge_attribute_map(annotation_model.css_attr)
 
     def parse_a(self) -> bool:
         """Indicate whether the text output should contain links or anchors.
diff --git a/src/inscriptis/model/css.py b/src/inscriptis/model/css.py
index 1610055..d9efa44 100644
--- a/src/inscriptis/model/css.py
+++ b/src/inscriptis/model/css.py
@@ -7,8 +7,12 @@
 """
 from contextlib import suppress
 from re import compile as re_compile
-from inscriptis.html_properties import (Display, WhiteSpace,
-                                        HorizontalAlignment, VerticalAlignment)
+from inscriptis.html_properties import (
+    Display,
+    WhiteSpace,
+    HorizontalAlignment,
+    VerticalAlignment,
+)
 from inscriptis.model.html_element import HtmlElement
 
 
@@ -20,7 +24,7 @@ class CssParse:
     """
 
     # used to separate value and unit from each other
-    RE_UNIT = re_compile(r'(-?[0-9.]+)(\w+)')
+    RE_UNIT = re_compile(r"(-?[0-9.]+)(\w+)")
 
     @staticmethod
     def attr_style(style_attribute: str, html_element: HtmlElement):
@@ -31,15 +35,15 @@ def attr_style(style_attribute: str, html_element: HtmlElement):
                            Example: display: none
           html_element: The HtmlElement to which the given style is applied.
         """
-        for style_directive in style_attribute.lower().split(';'):
-            if ':' not in style_directive:
+        for style_directive in style_attribute.lower().split(";"):
+            if ":" not in style_directive:
                 continue
-            key, value = (s.strip() for s in style_directive.split(':', 1))
+            key, value = (s.strip() for s in style_directive.split(":", 1))
 
             try:
-                apply_style = getattr(CssParse, 'attr_'
-                                      + key.replace('-webkit-', '')
-                                      .replace('-', '_'))
+                apply_style = getattr(
+                    CssParse, "attr_" + key.replace("-webkit-", "").replace("-", "_")
+                )
                 apply_style(value, html_element)
             except AttributeError:
                 pass
@@ -61,7 +65,7 @@ def _get_em(length: str) -> int:
         value = float(_m.group(1))
         unit = _m.group(2)
 
-        if unit not in ('em', 'qem', 'rem'):
+        if unit not in ("em", "qem", "rem"):
             return int(round(value / 8))
         return int(round(value))
 
@@ -75,9 +79,9 @@ def attr_display(value: str, html_element: HtmlElement):
         if html_element.display == Display.none:
             return
 
-        if value == 'block':
+        if value == "block":
             html_element.display = Display.block
-        elif value == 'none':
+        elif value == "none":
             html_element.display = Display.none
         else:
             html_element.display = Display.inline
@@ -85,9 +89,9 @@ def attr_display(value: str, html_element: HtmlElement):
     @staticmethod
     def attr_white_space(value: str, html_element: HtmlElement):
         """Apply the given white-space value."""
-        if value in ('normal', 'nowrap'):
+        if value in ("normal", "nowrap"):
             html_element.whitespace = WhiteSpace.normal
-        elif value in ('pre', 'pre-line', 'pre-wrap'):
+        elif value in ("pre", "pre-line", "pre-wrap"):
             html_element.whitespace = WhiteSpace.pre
 
     @staticmethod
diff --git a/src/inscriptis/model/html_element.py b/src/inscriptis/model/html_element.py
index 3ea95fe..5d16e9d 100644
--- a/src/inscriptis/model/html_element.py
+++ b/src/inscriptis/model/html_element.py
@@ -1,8 +1,12 @@
 """Data structures for handling HTML Elements."""
 from typing import Tuple
 
-from inscriptis.html_properties import Display, HorizontalAlignment, \
-    VerticalAlignment, WhiteSpace
+from inscriptis.html_properties import (
+    Display,
+    HorizontalAlignment,
+    VerticalAlignment,
+    WhiteSpace,
+)
 
 
 class HtmlElement:
@@ -28,22 +32,40 @@ class HtmlElement:
     - annotation: annotations associated with the HtmlElement.
     """
 
-    __slots__ = ('canvas', 'tag', 'prefix', 'suffix', 'display',
-                 'margin_before', 'margin_after', 'padding_inline',
-                 'list_bullet', 'whitespace', 'limit_whitespace_affixes',
-                 'align', 'valign', 'previous_margin_after', 'annotation')
-
-    def __init__(self, tag='default', prefix='', suffix='',
-                 display: Display = Display.inline,
-                 margin_before: int = 0,
-                 margin_after: int = 0,
-                 padding_inline: int = 0,
-                 list_bullet: str = '',
-                 whitespace: WhiteSpace = None,
-                 limit_whitespace_affixes: bool = False,
-                 align: HorizontalAlignment = HorizontalAlignment.left,
-                 valign: VerticalAlignment = VerticalAlignment.middle,
-                 annotation: Tuple[str] = ()):
+    __slots__ = (
+        "canvas",
+        "tag",
+        "prefix",
+        "suffix",
+        "display",
+        "margin_before",
+        "margin_after",
+        "padding_inline",
+        "list_bullet",
+        "whitespace",
+        "limit_whitespace_affixes",
+        "align",
+        "valign",
+        "previous_margin_after",
+        "annotation",
+    )
+
+    def __init__(
+        self,
+        tag="default",
+        prefix="",
+        suffix="",
+        display: Display = Display.inline,
+        margin_before: int = 0,
+        margin_after: int = 0,
+        padding_inline: int = 0,
+        list_bullet: str = "",
+        whitespace: WhiteSpace = None,
+        limit_whitespace_affixes: bool = False,
+        align: HorizontalAlignment = HorizontalAlignment.left,
+        valign: VerticalAlignment = VerticalAlignment.middle,
+        annotation: Tuple[str] = (),
+    ):
         self.canvas = None
         self.tag = tag
         self.prefix = prefix
@@ -60,7 +82,7 @@ def __init__(self, tag='default', prefix='', suffix='',
         self.previous_margin_after = 0
         self.annotation = annotation
 
-    def __copy__(self) -> 'HtmlElement':
+    def __copy__(self) -> "HtmlElement":
         """Performance-optimized copy implementation."""
         copy = self.__class__.__new__(self.__class__)
         for attr in self.__slots__:
@@ -71,14 +93,13 @@ def write(self, text: str):
         """Write the given HTML text to the element's canvas."""
         if not text or self.display == Display.none:
             return
-        self.canvas.write(self, ''.join(
-            (self.prefix, text, self.suffix)))
+        self.canvas.write(self, "".join((self.prefix, text, self.suffix)))
 
-    def set_canvas(self, canvas) -> 'HtmlElement':
+    def set_canvas(self, canvas) -> "HtmlElement":
         self.canvas = canvas
         return self
 
-    def set_tag(self, tag: str) -> 'HtmlElement':
+    def set_tag(self, tag: str) -> "HtmlElement":
         self.tag = tag
         return self
 
@@ -99,7 +120,7 @@ def write_verbatim_text(self, text: str):
         if self.display == Display.block:
             self.canvas.close_block(self)
 
-    def get_refined_html_element(self, new: 'HtmlElement') -> 'HtmlElement':
+    def get_refined_html_element(self, new: "HtmlElement") -> "HtmlElement":
         """Compute the new HTML element based on the previous one.
 
         Adaptations:
@@ -124,12 +145,11 @@ def get_refined_html_element(self, new: 'HtmlElement') -> 'HtmlElement':
 
         # do not display whitespace only affixes in Whitespace.pre areas
         # if `limit_whitespace_affixes` is set.
-        if (new.limit_whitespace_affixes
-                and self.whitespace == WhiteSpace.pre):
+        if new.limit_whitespace_affixes and self.whitespace == WhiteSpace.pre:
             if new.prefix.isspace():
-                new.prefix = ''
+                new.prefix = ""
             if new.suffix.isspace():
-                new.suffix = ''
+                new.suffix = ""
 
         if new.display == Display.block and self.display == Display.block:
             new.previous_margin_after = self.margin_after
@@ -138,13 +158,13 @@ def get_refined_html_element(self, new: 'HtmlElement') -> 'HtmlElement':
 
     def __str__(self):
         return (
-            '<{self.tag} prefix={self.prefix}, suffix={self.suffix}, '
-            'display={self.display}, margin_before={self.margin_before}, '
-            'margin_after={self.margin_after}, '
-            'padding_inline={self.padding_inline}, '
-            'list_bullet={self.list_bullet}, '
-            'whitespace={self.whitespace}, align={self.align}, '
-            'valign={self.valign}, annotation={self.annotation}>'
+            "<{self.tag} prefix={self.prefix}, suffix={self.suffix}, "
+            "display={self.display}, margin_before={self.margin_before}, "
+            "margin_after={self.margin_after}, "
+            "padding_inline={self.padding_inline}, "
+            "list_bullet={self.list_bullet}, "
+            "whitespace={self.whitespace}, align={self.align}, "
+            "valign={self.valign}, annotation={self.annotation}>"
         ).format(self=self)
 
     __repr__ = __str__
diff --git a/src/inscriptis/model/table.py b/src/inscriptis/model/table.py
index 559aa79..073c626 100644
--- a/src/inscriptis/model/table.py
+++ b/src/inscriptis/model/table.py
@@ -20,9 +20,19 @@ class TableCell(Canvas):
                           vertical formatting rules.
     """
 
-    __slots__ = ('annotations', 'block_annotations', 'blocks', 'current_block',
-                 'margin', 'annotation_counter', 'align', 'valign', '_width',
-                 'line_width', 'vertical_padding')
+    __slots__ = (
+        "annotations",
+        "block_annotations",
+        "blocks",
+        "current_block",
+        "margin",
+        "annotation_counter",
+        "align",
+        "valign",
+        "_width",
+        "line_width",
+        "vertical_padding",
+    )
 
     def __init__(self, align: HorizontalAlignment, valign: VerticalAlignment):
         super().__init__()
@@ -39,9 +49,9 @@ def normalize_blocks(self) -> int:
             The height of the normalized cell.
         """
         self._flush_inline()
-        self.blocks = list(chain(*(line.split('\n') for line in self.blocks)))
+        self.blocks = list(chain(*(line.split("\n") for line in self.blocks)))
         if not self.blocks:
-            self.blocks = ['']
+            self.blocks = [""]
         return len(self.blocks)
 
     @property
@@ -62,8 +72,9 @@ def width(self):
         """
         if self._width:
             return self._width
-        return max((len(line) for line in chain(*(block.split('\n')
-                                                  for block in self.blocks))))
+        return max(
+            (len(line) for line in chain(*(block.split("\n") for block in self.blocks)))
+        )
 
     @width.setter
     def width(self, width):
@@ -77,8 +88,7 @@ def width(self, width):
 
         # record new width and start reformatting
         self._width = width
-        format_spec = '{{:{align}{width}}}'.format(align=self.align.value,
-                                                   width=width)
+        format_spec = "{{:{align}{width}}}".format(align=self.align.value, width=width)
         self.blocks = [format_spec.format(b) for b in self.blocks]
 
     @height.setter
@@ -91,14 +101,17 @@ def height(self, height: int):
         """
         rows = len(self.blocks)
         if rows < height:
-            empty_line = ['']
+            empty_line = [""]
             if self.valign == VerticalAlignment.bottom:
-                self.vertical_padding = (height - rows)
+                self.vertical_padding = height - rows
                 self.blocks = self.vertical_padding * empty_line + self.blocks
             elif self.valign == VerticalAlignment.middle:
                 self.vertical_padding = (height - rows) // 2
-                self.blocks = self.vertical_padding * empty_line + \
-                    self.blocks + ((height - rows + 1) // 2 * empty_line)
+                self.blocks = (
+                    self.vertical_padding * empty_line
+                    + self.blocks
+                    + ((height - rows + 1) // 2 * empty_line)
+                )
             else:
                 self.blocks = self.blocks + ((height - rows) * empty_line)
 
@@ -116,9 +129,9 @@ def get_annotations(self, idx: int, row_width: int) -> List[Annotation]:
         # the easy case - the cell has only one line :)
         if len(self.blocks) == 1:
             self.line_width[0] = self.width
-            return horizontal_shift(self.annotations,
-                                    self.line_width[0],
-                                    self.width, self.align, idx)
+            return horizontal_shift(
+                self.annotations, self.line_width[0], self.width, self.align, idx
+            )
 
         # the more challenging one - multiple cell lines
         line_break_pos = list(accumulate(self.line_width))
@@ -127,17 +140,19 @@ def get_annotations(self, idx: int, row_width: int) -> List[Annotation]:
         # assign annotations to the corresponding line
         for a in self.annotations:
             for no, line_break in enumerate(line_break_pos):
-                if a.start <= (line_break + no):         # consider newline
+                if a.start <= (line_break + no):  # consider newline
                     annotation_lines[no + self.vertical_padding].append(a)
                     break
 
         # compute the annotation index based on its line and delta :)
         result = []
-        idx += self.vertical_padding   # newlines introduced by the padding
-        for line_annotations, line_len in zip(annotation_lines,
-                                              self.line_width):
-            result.extend(horizontal_shift(line_annotations, line_len,
-                                           self.width, self.align, idx))
+        idx += self.vertical_padding  # newlines introduced by the padding
+        for line_annotations, line_len in zip(annotation_lines, self.line_width):
+            result.extend(
+                horizontal_shift(
+                    line_annotations, line_len, self.width, self.align, idx
+                )
+            )
             idx += row_width - line_len
         self.line_width = [self.width for _ in self.line_width]
         return result
@@ -151,7 +166,7 @@ class TableRow:
         cell_separator: string used for separating columns from each other.
     """
 
-    __slots__ = ('columns', 'cell_separator')
+    __slots__ = ("columns", "cell_separator")
 
     def __init__(self, cell_separator):
         self.columns: List[TableCell] = []
@@ -162,10 +177,11 @@ def __len__(self):
 
     def get_text(self) -> str:
         """Return a text representation of the TableRow."""
-        row_lines = [self.cell_separator.join(line)
-                     for line in zip(*[column.blocks
-                                       for column in self.columns])]
-        return '\n'.join(row_lines)
+        row_lines = [
+            self.cell_separator.join(line)
+            for line in zip(*[column.blocks for column in self.columns])
+        ]
+        return "\n".join(row_lines)
 
     @property
     def width(self):
@@ -173,8 +189,9 @@ def width(self):
         if not self.columns:
             return 0
 
-        return sum((cell.width for cell in self.columns)) + len(
-            self.cell_separator) * (len(self.columns) - 1)
+        return sum((cell.width for cell in self.columns)) + len(self.cell_separator) * (
+            len(self.columns) - 1
+        )
 
 
 class Table:
@@ -186,7 +203,7 @@ class Table:
         cell_separator: string used for separating cells from each other.
     """
 
-    __slots__ = ('rows', 'left_margin_len', 'cell_separator')
+    __slots__ = ("rows", "left_margin_len", "cell_separator")
 
     def __init__(self, left_margin_len: int, cell_separator):
         self.rows = []
@@ -210,9 +227,11 @@ def add_cell(self, table_cell: TableCell):
     def _set_row_height(self):
         """Set the cell height for all :class:`TableCell`s in the table."""
         for row in self.rows:
-            max_row_height = max((cell.normalize_blocks()
-                                  for cell in row.columns)) \
-                if row.columns else 0
+            max_row_height = (
+                max((cell.normalize_blocks() for cell in row.columns))
+                if row.columns
+                else 0
+            )
             for cell in row.columns:
                 cell.height = max_row_height
 
@@ -223,9 +242,13 @@ def _set_column_width(self):
 
         for cur_column_idx in range(max_columns):
             # determine the required column width for the current column
-            max_column_width = max((row.columns[cur_column_idx].width
-                                    for row in self.rows
-                                    if len(row) > cur_column_idx))
+            max_column_width = max(
+                (
+                    row.columns[cur_column_idx].width
+                    for row in self.rows
+                    if len(row) > cur_column_idx
+                )
+            )
 
             # set column width for all TableCells in the current column
             for row in self.rows:
@@ -235,14 +258,13 @@ def _set_column_width(self):
     def get_text(self):
         """Return and render the text of the given table."""
         if not self.rows:
-            return '\n'
+            return "\n"
 
         self._set_row_height()
         self._set_column_width()
-        return '\n'.join((row.get_text() for row in self.rows)) + '\n'
+        return "\n".join((row.get_text() for row in self.rows)) + "\n"
 
-    def get_annotations(self, idx: int,
-                        left_margin_len: int) -> List[Annotation]:
+    def get_annotations(self, idx: int, left_margin_len: int) -> List[Annotation]:
         r"""Return all annotations in the given table.
 
         Args:
@@ -270,6 +292,6 @@ def get_annotations(self, idx: int,
                 annotations += cell.get_annotations(cell_idx, row_width)
                 cell_idx += cell.width + len(row.cell_separator)
 
-            idx += (row_width + 1) * row_height   # linebreak
+            idx += (row_width + 1) * row_height  # linebreak
 
         return annotations
diff --git a/src/inscriptis/service/web.py b/src/inscriptis/service/web.py
index f654b26..bb54665 100755
--- a/src/inscriptis/service/web.py
+++ b/src/inscriptis/service/web.py
@@ -10,35 +10,39 @@
 from inscriptis.model.config import ParserConfig
 
 app = Flask(__name__)
-CONFIG = ParserConfig(css=RELAXED_CSS_PROFILE, display_images=True,
-                      deduplicate_captions=True, display_links=False)
+CONFIG = ParserConfig(
+    css=RELAXED_CSS_PROFILE,
+    display_images=True,
+    deduplicate_captions=True,
+    display_links=False,
+)
 
 
-@app.route('/')
+@app.route("/")
 def index():
     """Print a short status message for the Web service's base URL."""
-    return 'Inscriptis text to HTML Web service.'
+    return "Inscriptis text to HTML Web service."
 
 
-@app.route('/get_text', methods=['POST'])
+@app.route("/get_text", methods=["POST"])
 def get_text_call():
     """Return the text representation of the given HTML content."""
-    content_type = request.headers['Content-type']
-    if '; encoding=' in content_type:
-        encoding = content_type.split('; encoding=')[1]
+    content_type = request.headers["Content-type"]
+    if "; encoding=" in content_type:
+        encoding = content_type.split("; encoding=")[1]
     else:
-        encoding = 'UTF-8'
-    html_content = request.data.decode(encoding, errors='ignore')
+        encoding = "UTF-8"
+    html_content = request.data.decode(encoding, errors="ignore")
     text = get_text(html_content, CONFIG)
-    return Response(text, mimetype='text/plain')
+    return Response(text, mimetype="text/plain")
 
 
-@app.route('/version', methods=['GET'])
+@app.route("/version", methods=["GET"])
 def get_version_call():
     """Return the used inscriptis version."""
-    return Response(__version__ + '\n', mimetype='text/plain')
+    return Response(__version__ + "\n", mimetype="text/plain")
 
 
-if __name__ == '__main__':
-    print('Starting Web service based on Inscriptis', __version__)
-    app.run(threaded=True, host='127.0.0.1', port=5000)
+if __name__ == "__main__":
+    print("Starting Web service based on Inscriptis", __version__)
+    app.run(threaded=True, host="127.0.0.1", port=5000)
diff --git a/tests/test_annotation.py b/tests/test_annotation.py
index b19ddeb..c3518b3 100644
--- a/tests/test_annotation.py
+++ b/tests/test_annotation.py
@@ -11,57 +11,61 @@
 
 
 def test_horizontal_shift():
-    a = [Annotation(0, 4, 'test')]
+    a = [Annotation(0, 4, "test")]
 
     # no shift
-    assert horizontal_shift(a,
-                            content_width=5,
-                            line_width=10,
-                            align=HorizontalAlignment.left,
-                            shift=0).pop() == Annotation(0, 4, 'test')
+    assert horizontal_shift(
+        a, content_width=5, line_width=10, align=HorizontalAlignment.left, shift=0
+    ).pop() == Annotation(0, 4, "test")
 
     # shift
-    assert horizontal_shift(a,
-                            content_width=5,
-                            line_width=10,
-                            align=HorizontalAlignment.left,
-                            shift=3).pop() == Annotation(3, 7, 'test')
+    assert horizontal_shift(
+        a, content_width=5, line_width=10, align=HorizontalAlignment.left, shift=3
+    ).pop() == Annotation(3, 7, "test")
 
     # realignment to the right
-    assert horizontal_shift(a,
-                            content_width=len('test'),
-                            line_width=10,
-                            align=HorizontalAlignment.right,
-                            shift=0).pop() == Annotation(6, 10, 'test')
-    assert '{:>10}'.format('test')[6:10] == 'test'
-
+    assert horizontal_shift(
+        a,
+        content_width=len("test"),
+        line_width=10,
+        align=HorizontalAlignment.right,
+        shift=0,
+    ).pop() == Annotation(6, 10, "test")
+    assert "{:>10}".format("test")[6:10] == "test"
 
     # shift + realignment to the right
-    assert horizontal_shift(a,
-                            content_width=len('test'),
-                            line_width=10,
-                            align=HorizontalAlignment.right,
-                            shift=3).pop() == Annotation(9, 13, 'test')
+    assert horizontal_shift(
+        a,
+        content_width=len("test"),
+        line_width=10,
+        align=HorizontalAlignment.right,
+        shift=3,
+    ).pop() == Annotation(9, 13, "test")
 
     # realignment to the center
-    assert horizontal_shift(a,
-                            content_width=len('test'),
-                            line_width=10,
-                            align=HorizontalAlignment.center,
-                            shift=0).pop() == Annotation(3, 7, 'test')
-    assert '{:^10}'.format('test')[3:7] == 'test'
+    assert horizontal_shift(
+        a,
+        content_width=len("test"),
+        line_width=10,
+        align=HorizontalAlignment.center,
+        shift=0,
+    ).pop() == Annotation(3, 7, "test")
+    assert "{:^10}".format("test")[3:7] == "test"
 
-    assert horizontal_shift(a,
-                            content_width=len('test'),
-                            line_width=11,
-                            align=HorizontalAlignment.center,
-                            shift=0).pop() == Annotation(3, 7, 'test')
-    assert '{:^11}'.format('test')[3:7] == 'test'
+    assert horizontal_shift(
+        a,
+        content_width=len("test"),
+        line_width=11,
+        align=HorizontalAlignment.center,
+        shift=0,
+    ).pop() == Annotation(3, 7, "test")
+    assert "{:^11}".format("test")[3:7] == "test"
 
     # realignment + shift
-    assert horizontal_shift(a,
-                            content_width=len('test'),
-                            line_width=11,
-                            align=HorizontalAlignment.center,
-                            shift=7).pop() == Annotation(10, 14, 'test')
-
+    assert horizontal_shift(
+        a,
+        content_width=len("test"),
+        line_width=11,
+        align=HorizontalAlignment.center,
+        shift=7,
+    ).pop() == Annotation(10, 14, "test")
diff --git a/tests/test_annotation_output_processor.py b/tests/test_annotation_output_processor.py
index c80a654..82fdc7a 100644
--- a/tests/test_annotation_output_processor.py
+++ b/tests/test_annotation_output_processor.py
@@ -11,12 +11,12 @@
 from inscriptis.annotation.output.surface import SurfaceExtractor
 from inscriptis.annotation.output.xml import XmlExtractor
 
-EXAMPLE_OUTPUT = {'text': 'Chur\n\nChur is the capital and largest town of '
-                          'the Swiss canton of the Grisons and lies in the '
-                          'Grisonian Rhine Valley.',
-                  'label': [[0, 4, 'heading'],
-                            [0, 4, 'h1'],
-                            [6, 10, 'emphasis']]}
+EXAMPLE_OUTPUT = {
+    "text": "Chur\n\nChur is the capital and largest town of "
+    "the Swiss canton of the Grisons and lies in the "
+    "Grisonian Rhine Valley.",
+    "label": [[0, 4, "heading"], [0, 4, "h1"], [6, 10, "emphasis"]],
+}
 
 
 def test_abstract_class():
@@ -31,13 +31,15 @@ def test_surface_annotator():
     result = processor(EXAMPLE_OUTPUT)
 
     # the old keys haven't been changed
-    assert 'text' in result
-    assert 'label' in result
+    assert "text" in result
+    assert "label" in result
 
     # and we have additional information on surface forms :)
-    assert result['surface'] == [('heading', 'Chur'),
-                                 ('h1', 'Chur'),
-                                 ('emphasis', 'Chur')]
+    assert result["surface"] == [
+        ("heading", "Chur"),
+        ("h1", "Chur"),
+        ("emphasis", "Chur"),
+    ]
 
 
 def test_xml_annotator():
@@ -45,35 +47,40 @@ def test_xml_annotator():
     result = processor(EXAMPLE_OUTPUT)
 
     # and we have additional information on surface forms :)
-    assert result == ('<?xml version="1.0" encoding="UTF-8" ?>\n'
-                      '<h1><heading>Chur</heading></h1>\n\n<emphasis>'
-                      'Chur</emphasis> is the capital and largest town '
-                      'of the Swiss canton of the Grisons and lies in '
-                      'the Grisonian Rhine Valley.')
+    assert result == (
+        '<?xml version="1.0" encoding="UTF-8" ?>\n'
+        "<h1><heading>Chur</heading></h1>\n\n<emphasis>"
+        "Chur</emphasis> is the capital and largest town "
+        "of the Swiss canton of the Grisons and lies in "
+        "the Grisonian Rhine Valley."
+    )
 
 
 def test_html_annotator():
     processor = HtmlExtractor()
     result = processor(EXAMPLE_OUTPUT)
 
-    assert result.startswith('<html><head><style>')
-    assert result.endswith('</style></head>'
-                           '<body><pre><span class="heading-label">heading'
-                           '</span><span class="heading">'
-                           '<span class="h1-label">h1</span><span class="h1">'
-                           'Chur</span></span></pre>\n'
-                           '<pre></pre>\n'
-                           '<pre><span class="emphasis-label">emphasis</span>'
-                           '<span class="emphasis">Chur</span> is the capital '
-                           'and largest town of the Swiss canton of the '
-                            'Grisons and lies in the Grisonian Rhine Valley.'
-                           '</pre></body></html>')
+    assert result.startswith("<html><head><style>")
+    assert result.endswith(
+        "</style></head>"
+        '<body><pre><span class="heading-label">heading'
+        '</span><span class="heading">'
+        '<span class="h1-label">h1</span><span class="h1">'
+        "Chur</span></span></pre>\n"
+        "<pre></pre>\n"
+        '<pre><span class="emphasis-label">emphasis</span>'
+        '<span class="emphasis">Chur</span> is the capital '
+        "and largest town of the Swiss canton of the "
+        "Grisons and lies in the Grisonian Rhine Valley."
+        "</pre></body></html>"
+    )
 
 
 def test_trailing_tag_annotation():
     processor = XmlExtractor()
-    result = processor({'text': 'Ehre sei Gott!',
-                        'label': [[9, 14, 'emphasis']]})
+    result = processor({"text": "Ehre sei Gott!", "label": [[9, 14, "emphasis"]]})
 
-    assert result == ('<?xml version="1.0" encoding="UTF-8" ?>\n'
-                      'Ehre sei <emphasis>Gott!</emphasis>')
+    assert result == (
+        '<?xml version="1.0" encoding="UTF-8" ?>\n'
+        "Ehre sei <emphasis>Gott!</emphasis>"
+    )
diff --git a/tests/test_annotation_rule_parsing.py b/tests/test_annotation_rule_parsing.py
index fef265a..5893831 100644
--- a/tests/test_annotation_rule_parsing.py
+++ b/tests/test_annotation_rule_parsing.py
@@ -18,59 +18,58 @@ def test_parse():
     """
     basic rule parsing.
     """
-    rules = {'table#border=1': ['table'],
-             'hr': ['horizontal-line']}
+    rules = {"table#border=1": ["table"], "hr": ["horizontal-line"]}
     tags, attrs = AnnotationModel._parse(rules)
 
-    assert tags == {'hr': ['horizontal-line']}
+    assert tags == {"hr": ["horizontal-line"]}
 
-    apply_annotation= attrs[0]
-    assert apply_annotation.match_tag == 'table'
-    assert apply_annotation.match_value == '1'
-    assert apply_annotation.attr == 'border'
+    apply_annotation = attrs[0]
+    assert apply_annotation.match_tag == "table"
+    assert apply_annotation.match_value == "1"
+    assert apply_annotation.attr == "border"
 
-    e = HtmlElement(tag='table')
-    apply_annotation.apply('1', e)
-    assert e.annotation == ('table', )
+    e = HtmlElement(tag="table")
+    apply_annotation.apply("1", e)
+    assert e.annotation == ("table",)
 
 
 def test_apply_annotation():
     """
     rule application.
     """
-    rules = {'table#border=1': ['table'],
-             'hr': ['horizontal-line'],
-             '#color=red': ['red'],
-             '#bgcolor': ['bgcolor']}
-
-    css = deepcopy(CSS_PROFILES['strict'])
+    rules = {
+        "table#border=1": ["table"],
+        "hr": ["horizontal-line"],
+        "#color=red": ["red"],
+        "#bgcolor": ["bgcolor"],
+    }
+
+    css = deepcopy(CSS_PROFILES["strict"])
     annotation_model = AnnotationModel(css, rules)
-    assert annotation_model.css['hr'].annotation == ('horizontal-line', )
+    assert annotation_model.css["hr"].annotation == ("horizontal-line",)
 
     attribute_handler = Attribute()
     attribute_handler.merge_attribute_map(annotation_model.css_attr)
-    assert 'table#border=1' in str(attribute_handler.attribute_mapping['border'])
-    assert '{any}#color=red' in str(attribute_handler.attribute_mapping['color'])
-    assert '{any}#bgcolor={any}' in str(attribute_handler.attribute_mapping['bgcolor'])
+    assert "table#border=1" in str(attribute_handler.attribute_mapping["border"])
+    assert "{any}#color=red" in str(attribute_handler.attribute_mapping["color"])
+    assert "{any}#bgcolor={any}" in str(attribute_handler.attribute_mapping["bgcolor"])
+
 
 def test_merged_attribute():
     """
     test multiple rules per attribute
     """
-    rules = {'#color=white': ['white'],
-             '#color=yellow': ['yellow']}
-    css = deepcopy(CSS_PROFILES['strict'])
+    rules = {"#color=white": ["white"], "#color=yellow": ["yellow"]}
+    css = deepcopy(CSS_PROFILES["strict"])
     annotation_model = AnnotationModel(css, rules)
 
     attribute_handler = Attribute()
     attribute_handler.merge_attribute_map(annotation_model.css_attr)
 
     e = HtmlElement()
-    attribute_handler.attribute_mapping['color']('green', e)
+    attribute_handler.attribute_mapping["color"]("green", e)
     assert e.annotation == ()
-    attribute_handler.attribute_mapping['color']('yellow', e)
-    assert e.annotation == ('yellow', )
-    attribute_handler.attribute_mapping['color']('white', e)
-    assert e.annotation == ('yellow', 'white')
-
-
+    attribute_handler.attribute_mapping["color"]("yellow", e)
+    assert e.annotation == ("yellow",)
+    attribute_handler.attribute_mapping["color"]("white", e)
+    assert e.annotation == ("yellow", "white")
diff --git a/tests/test_block.py b/tests/test_block.py
index 21ac592..8aacc93 100644
--- a/tests/test_block.py
+++ b/tests/test_block.py
@@ -11,25 +11,25 @@ def test_merge_normal_text_collapsable_whitespaces():
     """
     b = Block(0, Prefix())
     b.merge_normal_text("Hallo")
-    assert b._content == 'Hallo'
+    assert b._content == "Hallo"
     assert not b.collapsable_whitespace
 
     b = Block(0, Prefix())
     b.merge_normal_text(" Hallo ")
-    assert b._content == 'Hallo '
+    assert b._content == "Hallo "
     assert b.collapsable_whitespace
 
     b = Block(0, Prefix())
-    b.merge_normal_text('')
-    assert b._content == ''
+    b.merge_normal_text("")
+    assert b._content == ""
     assert b.collapsable_whitespace
 
-    b.merge_normal_text(' ')
-    assert b._content == ''
+    b.merge_normal_text(" ")
+    assert b._content == ""
     assert b.collapsable_whitespace
 
-    b.merge_normal_text('  ')
-    assert b._content == ''
+    b.merge_normal_text("  ")
+    assert b._content == ""
     assert b.collapsable_whitespace
 
 
@@ -37,29 +37,29 @@ def test_merge_normal_non_collapsable_whitespaces():
     b = Block(0, Prefix())
     b.collapsable_whitespace = False
     b.merge_normal_text("Hallo")
-    assert b._content == 'Hallo'
+    assert b._content == "Hallo"
     assert not b.collapsable_whitespace
 
     b = Block(0, Prefix())
     b.collapsable_whitespace = False
     b.merge_normal_text(" Hallo ")
-    assert b._content == ' Hallo '
+    assert b._content == " Hallo "
     assert b.collapsable_whitespace
 
     b = Block(0, Prefix())
     b.collapsable_whitespace = False
-    b.merge_normal_text('')
-    assert b._content == ''
+    b.merge_normal_text("")
+    assert b._content == ""
     assert not b.collapsable_whitespace
 
     b = Block(0, Prefix())
     b.collapsable_whitespace = False
-    b.merge_normal_text(' ')
-    assert b._content == ' '
+    b.merge_normal_text(" ")
+    assert b._content == " "
     assert b.collapsable_whitespace
 
     b = Block(0, Prefix())
     b.collapsable_whitespace = False
-    b.merge_normal_text('  ')
-    assert b._content == ' '
+    b.merge_normal_text("  ")
+    assert b._content == " "
     assert b.collapsable_whitespace
diff --git a/tests/test_broken_table_handling.py b/tests/test_broken_table_handling.py
index bd210e9..dee75f3 100644
--- a/tests/test_broken_table_handling.py
+++ b/tests/test_broken_table_handling.py
@@ -9,21 +9,16 @@
 from inscriptis.css_profiles import CSS_PROFILES
 from inscriptis.model.config import ParserConfig
 
-config = ParserConfig(css=CSS_PROFILES['strict'])
+config = ParserConfig(css=CSS_PROFILES["strict"])
 
 
 def test_forgotten_td_close_tag():
     # one line (i.e., missing </td> before the next <td> and the next </tr>
-    html = ('<body>hallo<table>'
-            '<tr><td>1<td>2</tr>'
-            '</table>echo</body>')
+    html = "<body>hallo<table>" "<tr><td>1<td>2</tr>" "</table>echo</body>"
     print(html)
     # assert get_text(html, config) == u'hallo\n1  2\necho'
 
     # two lines (i.e. missing </td> before the <tr> and before the </table>
-    html = ('<body>hallo<table>'
-            '<tr><td>1<td>2'
-            '<tr><td>3<td>4'
-            '</table>echo</body>')
+    html = "<body>hallo<table>" "<tr><td>1<td>2" "<tr><td>3<td>4" "</table>echo</body>"
     print(html)
-    assert get_text(html, config) == u'hallo\n1  2\n3  4\n\necho'
+    assert get_text(html, config) == "hallo\n1  2\n3  4\n\necho"
diff --git a/tests/test_double_a.py b/tests/test_double_a.py
index 24623bd..a16ceb7 100644
--- a/tests/test_double_a.py
+++ b/tests/test_double_a.py
@@ -9,10 +9,14 @@
 
 
 def test_successive_a():
-    html = '<html><body><a href="first">first</a>' \
-           '<a href="second">second</a></body></html>'
-    assert get_text(html) == 'firstsecond'
+    html = (
+        '<html><body><a href="first">first</a>'
+        '<a href="second">second</a></body></html>'
+    )
+    assert get_text(html) == "firstsecond"
 
-    html = '<html><body><a href="first">first</a>\n' \
-           '<a href="second">second</a></body></html>'
-    assert get_text(html) == 'first second'
+    html = (
+        '<html><body><a href="first">first</a>\n'
+        '<a href="second">second</a></body></html>'
+    )
+    assert get_text(html) == "first second"
diff --git a/tests/test_empty_string.py b/tests/test_empty_string.py
index dd46353..9f7987c 100644
--- a/tests/test_empty_string.py
+++ b/tests/test_empty_string.py
@@ -9,9 +9,8 @@
 
 
 def test_empty_and_corrupt():
-    assert get_text('test').strip() == 'test'
-    assert get_text('  ') == ''
-    assert get_text('') == ''
+    assert get_text("test").strip() == "test"
+    assert get_text("  ") == ""
+    assert get_text("") == ""
     # test for the behaviour of older and recent lxml versions.
-    assert get_text('<<<').strip() in ('<<<', '<<', '')
-
+    assert get_text("<<<").strip() in ("<<<", "<<", "")
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 728191b..519c1ee 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -4,8 +4,8 @@
 
 
 def test_text_from_empty_content():
-    assert get_text('') == ''
+    assert get_text("") == ""
 
 
 def test_annotations_from_empty_content():
-    assert get_annotated_text('') == {}
+    assert get_annotated_text("") == {}
diff --git a/tests/test_html_conversion_options.py b/tests/test_html_conversion_options.py
index fdc69be..c9bb878 100644
--- a/tests/test_html_conversion_options.py
+++ b/tests/test_html_conversion_options.py
@@ -9,69 +9,70 @@
 
 
 def test_display_links():
-    html = '''<html>
+    html = """<html>
                  <body>
                    <a href="first">first</a>
                    <a href="second">second</a>
                    <a name="third">third</a>
                  </body>
                 </html>
-            '''
+            """
     config = ParserConfig(display_links=True)
-    assert get_text(html, config).strip() == \
-        '[first](first) [second](second) third'
+    assert get_text(html, config).strip() == "[first](first) [second](second) third"
 
 
 def test_display_anchors():
-    html = '''<html>
+    html = """<html>
                  <body>
                    <a name="first">first</a>
                    <a href="second">second</a>
                  </body>
                 </html>
-            '''
+            """
     config = ParserConfig(display_anchors=True)
-    assert get_text(html, config).strip() == \
-        '[first](first) second'
+    assert get_text(html, config).strip() == "[first](first) second"
 
 
 def test_display_links_and_anchors():
-    html = '''<html>
+    html = """<html>
                  <body>
                    <a href="first">first</a>
                    <a href="second">second</a>
                    <a name="third">third</a>
                  </body>
                 </html>
-            '''
+            """
     config = ParserConfig(display_links=True, display_anchors=True)
-    assert get_text(html, config).strip() == \
-        '[first](first) [second](second) [third](third)'
+    assert (
+        get_text(html, config).strip()
+        == "[first](first) [second](second) [third](third)"
+    )
 
 
 def test_display_images():
-    html = '''<html>
+    html = """<html>
                  <body>
                    <img src="test1" alt="Ein Test Bild" title="Hallo" />
                    <img src="test2" alt="Ein Test Bild" title="Juhu" />
                    <img src="test3" alt="Ein zweites Bild" title="Echo" />
                  </body>
                 </html>
-            '''
+            """
     config = ParserConfig(display_images=True)
-    assert get_text(html, config).strip() == \
-        '[Ein Test Bild] [Ein Test Bild] [Ein zweites Bild]'
+    assert (
+        get_text(html, config).strip()
+        == "[Ein Test Bild] [Ein Test Bild] [Ein zweites Bild]"
+    )
 
 
 def test_display_images_deduplicated():
-    html = '''<html>
+    html = """<html>
                  <body>
                    <img src="test1" alt="Ein Test Bild" title="Hallo" />
                    <img src="test2" alt="Ein Test Bild" title="Juhu" />
                    <img src="test3" alt="Ein zweites Bild" title="Echo" />
                  </body>
                 </html>
-            '''
+            """
     config = ParserConfig(display_images=True, deduplicate_captions=True)
-    assert get_text(html, config).strip() == \
-        '[Ein Test Bild] [Ein zweites Bild]'
+    assert get_text(html, config).strip() == "[Ein Test Bild] [Ein zweites Bild]"
diff --git a/tests/test_html_snippets.py b/tests/test_html_snippets.py
index 9e7197f..9df864d 100644
--- a/tests/test_html_snippets.py
+++ b/tests/test_html_snippets.py
@@ -11,10 +11,10 @@
 from inscriptis.css_profiles import CSS_PROFILES
 from inscriptis.model.config import ParserConfig
 
-TESTCASE_PATTERN = join(dirname(__file__), 'html/*.txt')
+TESTCASE_PATTERN = join(dirname(__file__), "html/*.txt")
 
 
-def test_html_snippets(filter_str=''):
+def test_html_snippets(filter_str=""):
     for testcase_txt in glob(TESTCASE_PATTERN):
         if filter_str not in testcase_txt:
             continue
@@ -22,26 +22,30 @@ def test_html_snippets(filter_str=''):
         with open(testcase_txt) as f:
             reference_txt = f.read().rstrip()
 
-        with open(testcase_txt.replace('.txt', '.html')) as f:
+        with open(testcase_txt.replace(".txt", ".html")) as f:
             print(f.name)
-            html = '<html><body>{}</body></html>'.format(f.read())
+            html = "<html><body>{}</body></html>".format(f.read())
 
-        converted_txt = get_text(html, ParserConfig(
-            css=CSS_PROFILES['strict'])).rstrip()
+        converted_txt = get_text(
+            html, ParserConfig(css=CSS_PROFILES["strict"])
+        ).rstrip()
 
         if converted_txt != reference_txt:
-            print('File:{}\nHTML:\n{}\n\nReference:\n{}\n\nConverted:\n{}'
-                  .format(testcase_txt, html, reference_txt, converted_txt))
-            print('HTML file:', testcase_txt.replace('.txt', '.html'))
-            print("Visualize differences with `vimdiff reference.txt "
-                  "converted.txt`")
+            print(
+                "File:{}\nHTML:\n{}\n\nReference:\n{}\n\nConverted:\n{}".format(
+                    testcase_txt, html, reference_txt, converted_txt
+                )
+            )
+            print("HTML file:", testcase_txt.replace(".txt", ".html"))
+            print("Visualize differences with `vimdiff reference.txt " "converted.txt`")
             open("reference.txt", "w").write(reference_txt)
             open("converted.txt", "w").write(converted_txt)
 
         assert converted_txt == reference_txt
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     from sys import argv
-    filter_str = argv[1] if len(argv) > 1 else ''
+
+    filter_str = argv[1] if len(argv) > 1 else ""
     test_html_snippets(filter_str)
diff --git a/tests/test_html_snippets_annotations.py b/tests/test_html_snippets_annotations.py
index 9655afa..6c481a1 100644
--- a/tests/test_html_snippets_annotations.py
+++ b/tests/test_html_snippets_annotations.py
@@ -12,18 +12,18 @@
 from inscriptis.css_profiles import CSS_PROFILES
 from inscriptis.model.config import ParserConfig
 
-TESTCASE_PATTERN = os.path.join(os.path.dirname(__file__), 'html/*.json')
+TESTCASE_PATTERN = os.path.join(os.path.dirname(__file__), "html/*.json")
 
 
-def assert_equal_ignoring_whitespace(reference: List[str],
-                                     converted: List[str]) -> bool:
+def assert_equal_ignoring_whitespace(
+    reference: List[str], converted: List[str]
+) -> bool:
     for (ref_tag, ref_str), (conv_tag, conv_str) in zip(reference, converted):
-
         assert ref_tag == conv_tag
-        assert ''.join(ref_str.split()) == ''.join(conv_str.split())
+        assert "".join(ref_str.split()) == "".join(conv_str.split())
 
 
-def test_html_annotations(filter_str=''):
+def test_html_annotations(filter_str=""):
     for annotation_file in glob(TESTCASE_PATTERN):
         if filter_str not in annotation_file:
             continue
@@ -31,33 +31,39 @@ def test_html_annotations(filter_str=''):
         with open(annotation_file) as f:
             reference = load(f)
 
-        with open(annotation_file.replace('.json', '.html')) as f:
+        with open(annotation_file.replace(".json", ".html")) as f:
             print(f.name)
-            html = '<html><body>{}</body></html>'.format(f.read())
+            html = "<html><body>{}</body></html>".format(f.read())
 
-        for indentation_strategy in ('strict', 'relaxed'):
-            result = get_annotated_text(html, ParserConfig(
-                css=CSS_PROFILES[indentation_strategy],
-                annotation_rules=reference['annotation_rules']))
+        for indentation_strategy in ("strict", "relaxed"):
+            result = get_annotated_text(
+                html,
+                ParserConfig(
+                    css=CSS_PROFILES[indentation_strategy],
+                    annotation_rules=reference["annotation_rules"],
+                ),
+            )
 
-            converted = [[a[2], result['text'][a[0]:a[1]]]
-                        for a in result['label']]
+            converted = [[a[2], result["text"][a[0] : a[1]]] for a in result["label"]]
 
-            if reference['result'] != converted:
+            if reference["result"] != converted:
                 print("Reference:")
-                print(reference['result'])
-                print("\nConverted (indentation strategy: {})".format(indentation_strategy))
+                print(reference["result"])
+                print(
+                    "\nConverted (indentation strategy: {})".format(
+                        indentation_strategy
+                    )
+                )
                 print(converted)
 
-            if indentation_strategy == 'strict':
-                assert reference['result'] == converted
+            if indentation_strategy == "strict":
+                assert reference["result"] == converted
             else:
-                assert_equal_ignoring_whitespace(reference['result'],
-                                                 converted)
+                assert_equal_ignoring_whitespace(reference["result"], converted)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     from sys import argv
 
-    filter_str = argv[1] if len(argv) > 1 else ''
+    filter_str = argv[1] if len(argv) > 1 else ""
     test_html_annotations(filter_str)
diff --git a/tests/test_limit_whitespace_affixes.py b/tests/test_limit_whitespace_affixes.py
index 20d6666..53e97fd 100644
--- a/tests/test_limit_whitespace_affixes.py
+++ b/tests/test_limit_whitespace_affixes.py
@@ -13,36 +13,41 @@
 
 
 def test_html_element_refinement():
-    new = HtmlElement('span', display=Display.inline, prefix=' ', suffix=' ',
-                      limit_whitespace_affixes=True)
-    pre = HtmlElement('pre', display=Display.block, whitespace=WhiteSpace.pre)
-    code = HtmlElement('code')
+    new = HtmlElement(
+        "span",
+        display=Display.inline,
+        prefix=" ",
+        suffix=" ",
+        limit_whitespace_affixes=True,
+    )
+    pre = HtmlElement("pre", display=Display.block, whitespace=WhiteSpace.pre)
+    code = HtmlElement("code")
 
     # refinement with pre and whitespaces
     refined = pre.get_refined_html_element(copy(new))
-    assert refined.prefix == ''
-    assert refined.suffix == ''
+    assert refined.prefix == ""
+    assert refined.suffix == ""
 
     # refinement with code and whitespaces
     refined = code.get_refined_html_element(copy(new))
-    assert refined.prefix == ' '
-    assert refined.suffix == ' '
+    assert refined.prefix == " "
+    assert refined.suffix == " "
 
     # refinement with pre and non-whitespaces
-    new.prefix = ' 1. '
-    new.suffix = '<'
+    new.prefix = " 1. "
+    new.suffix = "<"
     refined = pre.get_refined_html_element(copy(new))
-    assert refined.prefix == ' 1. '
-    assert refined.suffix == '<'
+    assert refined.prefix == " 1. "
+    assert refined.suffix == "<"
 
     # refinement with code and non-whitespaces
     refined = code.get_refined_html_element(copy(new))
-    assert refined.prefix == ' 1. '
-    assert refined.suffix == '<'
+    assert refined.prefix == " 1. "
+    assert refined.suffix == "<"
 
 
 def test_limit_whitespace_affixes():
-    html = '''<html>
+    html = """<html>
                  <body>
                    hallo<span>echo</span>
                    <pre>
@@ -51,9 +56,10 @@ def <span>hallo</span>():
                    </pre>
                  </body>
                 </html>
-            '''
+            """
     config = ParserConfig(css=RELAXED_CSS_PROFILE)
-    assert get_text(html, config).strip() == \
-        'hallo echo\n\n' \
-        'def hallo():\n' \
+    assert (
+        get_text(html, config).strip() == "hallo echo\n\n"
+        "def hallo():\n"
         '   print("echo")'
+    )
diff --git a/tests/test_list_div.py b/tests/test_list_div.py
index 07ae5d1..44c1ef5 100644
--- a/tests/test_list_div.py
+++ b/tests/test_list_div.py
@@ -10,21 +10,21 @@
 from inscriptis.css_profiles import CSS_PROFILES
 from inscriptis.model.config import ParserConfig
 
-config = ParserConfig(css=CSS_PROFILES['strict'])
+config = ParserConfig(css=CSS_PROFILES["strict"])
 
 
 def test_divs():
-    html = u'<body>Thomas<div>Anton</div>Maria</body>'
-    assert get_text(html, config) == u'Thomas\nAnton\nMaria'
+    html = "<body>Thomas<div>Anton</div>Maria</body>"
+    assert get_text(html, config) == "Thomas\nAnton\nMaria"
 
-    html = u'<body>Thomas<div>Anna <b>läuft</b> weit weg.</div>'
-    assert get_text(html, config) == u'Thomas\nAnna läuft weit weg.'
+    html = "<body>Thomas<div>Anna <b>läuft</b> weit weg.</div>"
+    assert get_text(html, config) == "Thomas\nAnna läuft weit weg."
 
-    html = u'<body>Thomas <ul><li><div>Anton</div>Maria</ul></body>'
-    assert get_text(html, config) == u'Thomas\n  * Anton\n    Maria'
+    html = "<body>Thomas <ul><li><div>Anton</div>Maria</ul></body>"
+    assert get_text(html, config) == "Thomas\n  * Anton\n    Maria"
 
-    html = u'<body>Thomas <ul><li>  <div>Anton</div>Maria</ul></body>'
-    assert get_text(html, config) == u'Thomas\n  * Anton\n    Maria'
+    html = "<body>Thomas <ul><li>  <div>Anton</div>Maria</ul></body>"
+    assert get_text(html, config) == "Thomas\n  * Anton\n    Maria"
 
-    html = u'<body>Thomas <ul><li> a  <div>Anton</div>Maria</ul></body>'
-    assert get_text(html, config) == u'Thomas\n  * a\n    Anton\n    Maria'
+    html = "<body>Thomas <ul><li> a  <div>Anton</div>Maria</ul></body>"
+    assert get_text(html, config) == "Thomas\n  * a\n    Anton\n    Maria"
diff --git a/tests/test_margin_before_at_start.py b/tests/test_margin_before_at_start.py
index bcadbc5..870c076 100644
--- a/tests/test_margin_before_at_start.py
+++ b/tests/test_margin_before_at_start.py
@@ -9,20 +9,18 @@
 
 
 def test_content():
-    html = '<html><body>first</body></html>'
-    assert get_text(html) == 'first'
+    html = "<html><body>first</body></html>"
+    assert get_text(html) == "first"
 
 
 def test_margin_before():
-    html = '<html><body><p>first</p></body></html>'
-    assert get_text(html) == 'first\n'
+    html = "<html><body><p>first</p></body></html>"
+    assert get_text(html) == "first\n"
 
-    html = '<html><body>first<p>' \
-           'second</p></body></html>'
-    assert get_text(html) == 'first\n\nsecond\n'
+    html = "<html><body>first<p>" "second</p></body></html>"
+    assert get_text(html) == "first\n\nsecond\n"
 
 
 def test_br():
-    html = '<html><body><br>' \
-           'first</p></body></html>'
-    assert get_text(html) == '\nfirst'
+    html = "<html><body><br>" "first</p></body></html>"
+    assert get_text(html) == "\nfirst"
diff --git a/tests/test_margin_handling.py b/tests/test_margin_handling.py
index c09d944..c6a9906 100644
--- a/tests/test_margin_handling.py
+++ b/tests/test_margin_handling.py
@@ -9,29 +9,29 @@
 from inscriptis.css_profiles import CSS_PROFILES
 from inscriptis.model.config import ParserConfig
 
-config = ParserConfig(css=CSS_PROFILES['strict'])
+config = ParserConfig(css=CSS_PROFILES["strict"])
 
 
 def test_margin_handling():
-    html = u'''<body>Hallo
+    html = """<body>Hallo
                      <div style="margin-top: 1em; margin-bottom: 1em">Echo
                          <div style="margin-top: 2em">Mecho</div>
                      </div>
                      sei Gott
-               </body>'''
-    assert get_text(html, config) == u'Hallo\n\nEcho\n\n\nMecho\n\nsei Gott'
+               </body>"""
+    assert get_text(html, config) == "Hallo\n\nEcho\n\n\nMecho\n\nsei Gott"
 
-    html = u'''<body>Hallo
+    html = """<body>Hallo
                      <div style="margin-top: 1em; margin-bottom: 1em">Echo</div>
                          <div style="margin-top: 2em">Mecho</div>
                      sei Gott
-               </body>'''
-    assert get_text(html, config) == u'Hallo\n\nEcho\n\n\nMecho\nsei Gott'
+               </body>"""
+    assert get_text(html, config) == "Hallo\n\nEcho\n\n\nMecho\nsei Gott"
 
-    html = u'''<body>Hallo
+    html = """<body>Hallo
                      <div style="margin-top: 1em; margin-bottom: 1em">
                          <div style="margin-top: 2em">Ehre</div>
                     </div>
                     sei Gott
-               </body>'''
-    assert get_text(html, config) == u'Hallo\n\n\nEhre\n\nsei Gott'
+               </body>"""
+    assert get_text(html, config) == "Hallo\n\n\nEhre\n\nsei Gott"
diff --git a/tests/test_metadata.py b/tests/test_metadata.py
index 9ffe217..2094695 100644
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@@ -1,19 +1,24 @@
-from inscriptis.metadata import (__author__, __author_email__, __copyright__,
-                                 __license__, __version__)
+from inscriptis.metadata import (
+    __author__,
+    __author_email__,
+    __copyright__,
+    __license__,
+    __version__,
+)
 
 
 def test_metadata():
     """Test inscriptis package metadata."""
-    assert 'Albert Weichselbraun' in __author__
-    assert 'Fabian Odoni' in __author__
+    assert "Albert Weichselbraun" in __author__
+    assert "Fabian Odoni" in __author__
 
-    assert '@' in __author_email__
+    assert "@" in __author_email__
 
-    assert '2016-' in __copyright__
-    assert 'Albert Weichselbraun' in __copyright__
-    assert 'Fabian Odoni' in __copyright__
+    assert "2016-" in __copyright__
+    assert "Albert Weichselbraun" in __copyright__
+    assert "Fabian Odoni" in __copyright__
 
-    assert __license__ == 'Apache 2.0'
+    assert __license__ == "Apache 2.0"
 
     assert __version__[0].isnumeric()
-    assert '.' in __version__
+    assert "." in __version__
diff --git a/tests/test_model_html_element_canvas.py b/tests/test_model_html_element_canvas.py
index 574c047..e0d8c66 100644
--- a/tests/test_model_html_element_canvas.py
+++ b/tests/test_model_html_element_canvas.py
@@ -26,31 +26,31 @@ def _get_text(html_element):
 
     HtmlElement().set_canvas(c).write("last")
     c._flush_inline()
-    return '\n'.join(c.blocks)
+    return "\n".join(c.blocks)
 
 
 def test_formatting():
     # standard line
 
     h = HtmlElement()
-    assert _get_text(h) == 'firstEhre sei Gott!last'
+    assert _get_text(h) == "firstEhre sei Gott!last"
 
     h.display = Display.block
     h.margin_before = 1
     h.margin_after = 2
     print(h)
     print(_get_text(h))
-    assert _get_text(h) == 'first\n\nEhre sei Gott!\n\n\nlast'
+    assert _get_text(h) == "first\n\nEhre sei Gott!\n\n\nlast"
 
     # list bullet without padding_inline
     h.list_bullet = "* "
-    assert _get_text(h) == 'first\n\n* Ehre sei Gott!\n\n\nlast'
+    assert _get_text(h) == "first\n\n* Ehre sei Gott!\n\n\nlast"
 
     # add a padding_inline
     h.padding_inline = 3
-    assert _get_text(h) == 'first\n\n * Ehre sei Gott!\n\n\nlast'
+    assert _get_text(h) == "first\n\n * Ehre sei Gott!\n\n\nlast"
 
     # and prefixes + suffixes
-    h.prefix = '>>'
-    h.suffix = '<<'
-    assert _get_text(h)== 'first\n\n * >>Ehre sei Gott!<<\n\n\nlast'
+    h.prefix = ">>"
+    h.suffix = "<<"
+    assert _get_text(h) == "first\n\n * >>Ehre sei Gott!<<\n\n\nlast"
diff --git a/tests/test_model_prefix.py b/tests/test_model_prefix.py
index 6682bbb..f5e3f8c 100644
--- a/tests/test_model_prefix.py
+++ b/tests/test_model_prefix.py
@@ -11,46 +11,45 @@
 def test_simple_prefix():
     p = Prefix()
 
-    p.register_prefix(5, '1. ')
+    p.register_prefix(5, "1. ")
 
     # first use
-    assert p.first == '  1. '
+    assert p.first == "  1. "
 
     # the prefix has been consumed
-    assert p.first == ''
+    assert p.first == ""
 
     # prefix used to indent lines separated with newlines
-    assert p.rest == '     '
+    assert p.rest == "     "
 
 
 def test_combined_prefix():
     p = Prefix()
 
-    p.register_prefix(5, '1. ')
-    p.register_prefix(2, '')
+    p.register_prefix(5, "1. ")
+    p.register_prefix(2, "")
 
-    assert p.first == '    1. '
-    assert p.first == ''
+    assert p.first == "    1. "
+    assert p.first == ""
 
     p.remove_last_prefix()
-    assert p.first == ''
+    assert p.first == ""
 
     p.remove_last_prefix()
     # final consumption - no prefix
-    assert p.first == ''
+    assert p.first == ""
 
     # ensure that there are no interactions between different runs with
     # bullets
     p.consumed = False
-    p.register_prefix(5, '2. ')
-    p.register_prefix(2, '- ')
+    p.register_prefix(5, "2. ")
+    p.register_prefix(2, "- ")
 
-    assert p.first == '     - '
-    assert p.first == ''
-    assert p.rest == '       '
+    assert p.first == "     - "
+    assert p.first == ""
+    assert p.rest == "       "
 
     p.consumed = False
     p.remove_last_prefix()
-    assert p.first == '  2. '
-    assert p.rest == '     '
-
+    assert p.first == "  2. "
+    assert p.rest == "     "
diff --git a/tests/test_parse_css.py b/tests/test_parse_css.py
index 9822644..8b26bf5 100644
--- a/tests/test_parse_css.py
+++ b/tests/test_parse_css.py
@@ -7,54 +7,61 @@
 
 from copy import copy
 from inscriptis.css_profiles import CSS_PROFILES
-from inscriptis.html_properties import (Display, WhiteSpace, VerticalAlignment,
-                                        HorizontalAlignment)
+from inscriptis.html_properties import (
+    Display,
+    WhiteSpace,
+    VerticalAlignment,
+    HorizontalAlignment,
+)
 from inscriptis.model.css import CssParse
 from inscriptis.model.html_element import HtmlElement
 
 
 def test_css_parsing():
-    html_element = copy(CSS_PROFILES['strict']['div'])
-    CssParse.attr_style('padding_left: 8px; display: block', html_element)
+    html_element = copy(CSS_PROFILES["strict"]["div"])
+    CssParse.attr_style("padding_left: 8px; display: block", html_element)
     assert html_element.padding_inline == 1
     assert html_element.display == Display.block
 
-    CssParse.attr_style('margin_before: 8em; display: inline', html_element)
+    CssParse.attr_style("margin_before: 8em; display: inline", html_element)
     assert html_element.margin_before == 8
     assert html_element.display == Display.inline
 
 
 def test_html_element_str():
-    '''
+    """
     Tests the string representation of an HtmlElement.
-    '''
-    html_element = HtmlElement('div', '', '', Display.inline, 0, 0, 0,
-                               '', WhiteSpace.pre)
-    assert str(html_element) == ('<div prefix=, suffix=, '
-                                 'display=Display.inline, margin_before=0, '
-                                 'margin_after=0, padding_inline=0, '
-                                 'list_bullet=, '
-                                 'whitespace=WhiteSpace.pre, '
-                                 'align=HorizontalAlignment.left, '
-                                 'valign=VerticalAlignment.middle, '
-                                 'annotation=()>')
+    """
+    html_element = HtmlElement(
+        "div", "", "", Display.inline, 0, 0, 0, "", WhiteSpace.pre
+    )
+    assert str(html_element) == (
+        "<div prefix=, suffix=, "
+        "display=Display.inline, margin_before=0, "
+        "margin_after=0, padding_inline=0, "
+        "list_bullet=, "
+        "whitespace=WhiteSpace.pre, "
+        "align=HorizontalAlignment.left, "
+        "valign=VerticalAlignment.middle, "
+        "annotation=()>"
+    )
 
 
 def test_parse_vertical_align():
     html_element = HtmlElement()
-    CssParse.attr_vertical_align('top', html_element)
+    CssParse.attr_vertical_align("top", html_element)
     assert html_element.valign == VerticalAlignment.top
 
     # invalid value
-    CssParse.attr_vertical_align('unknown', html_element)
+    CssParse.attr_vertical_align("unknown", html_element)
     assert html_element.valign == VerticalAlignment.top
 
 
 def test_parse_horizontal_align():
     html_element = HtmlElement()
-    CssParse.attr_horizontal_align('center', html_element)
+    CssParse.attr_horizontal_align("center", html_element)
     assert html_element.align == HorizontalAlignment.center
 
     # invalid value
-    CssParse.attr_horizontal_align('unknown', html_element)
+    CssParse.attr_horizontal_align("unknown", html_element)
     assert html_element.align == HorizontalAlignment.center
diff --git a/tests/test_strip_xml_header.py b/tests/test_strip_xml_header.py
index b2e8e44..cc28f05 100644
--- a/tests/test_strip_xml_header.py
+++ b/tests/test_strip_xml_header.py
@@ -4,7 +4,7 @@
 
 from inscriptis import get_text
 
-def test_successive_a():
-    html = u'<?xml version="1.0" encoding="UTF-8" ?> Hallo?>'
-    assert get_text(html).strip() == 'Hallo?>'
 
+def test_successive_a():
+    html = '<?xml version="1.0" encoding="UTF-8" ?> Hallo?>'
+    assert get_text(html).strip() == "Hallo?>"
diff --git a/tests/test_style_parsing.py b/tests/test_style_parsing.py
index 8efce8f..d23ae48 100644
--- a/tests/test_style_parsing.py
+++ b/tests/test_style_parsing.py
@@ -10,7 +10,8 @@
 
 def test_style_unit_parsing():
     html_element = HtmlElement()
-    CssParse.attr_style("margin-top:2.666666667em;margin-bottom: 2.666666667em",
-                        html_element)
+    CssParse.attr_style(
+        "margin-top:2.666666667em;margin-bottom: 2.666666667em", html_element
+    )
     assert html_element.margin_before == 3
     assert html_element.margin_after == 3
diff --git a/tests/test_table_cell.py b/tests/test_table_cell.py
index 8c728b2..597af19 100644
--- a/tests/test_table_cell.py
+++ b/tests/test_table_cell.py
@@ -9,39 +9,40 @@
 from inscriptis.model.table import TableCell
 from inscriptis.html_properties import HorizontalAlignment, VerticalAlignment
 
+
 def test_height():
     cell = TableCell(HorizontalAlignment.left, VerticalAlignment.top)
 
-    cell.blocks = ['hallo']
+    cell.blocks = ["hallo"]
     cell.normalize_blocks()
-    assert cell.height == len('\n'.join(cell.blocks).split('\n'))
+    assert cell.height == len("\n".join(cell.blocks).split("\n"))
 
-    cell.blocks = ['hallo', 'echo']
+    cell.blocks = ["hallo", "echo"]
     cell.normalize_blocks()
     assert cell.height == 2
 
-    cell.blocks = ['hallo\necho']
+    cell.blocks = ["hallo\necho"]
     cell.normalize_blocks()
     assert cell.height == 2
 
-    cell.blocks = ['hallo\necho', 'Ehre sei Gott', 'Jump\n&\nRun!\n\n\n']
+    cell.blocks = ["hallo\necho", "Ehre sei Gott", "Jump\n&\nRun!\n\n\n"]
     cell.normalize_blocks()
     assert cell.height == 9
-    assert cell.height == len('\n'.join(cell.blocks).split('\n'))
+    assert cell.height == len("\n".join(cell.blocks).split("\n"))
+
 
 def test_width():
     cell = TableCell(HorizontalAlignment.left, VerticalAlignment.top)
 
-    cell.blocks = ['hallo']
+    cell.blocks = ["hallo"]
     cell.normalize_blocks()
     assert cell.width == len(cell.blocks[0])
 
-    cell.blocks = ['hallo\necho', 'Ehre sei Gott', 'Jump\n&\nRun!\n\n\n']
+    cell.blocks = ["hallo\necho", "Ehre sei Gott", "Jump\n&\nRun!\n\n\n"]
     cell.normalize_blocks()
-    assert cell.width == len('Ehre sei Gott')
+    assert cell.width == len("Ehre sei Gott")
 
     # fixed set width
     cell.width = 95
     cell.normalize_blocks()
     assert cell.width == 95
-
diff --git a/tests/test_table_cell_formatting.py b/tests/test_table_cell_formatting.py
index 7062d78..f8d6de8 100644
--- a/tests/test_table_cell_formatting.py
+++ b/tests/test_table_cell_formatting.py
@@ -11,50 +11,38 @@
 
 
 def test_horizontal_cell_formatting():
-
-    cell = TableCell(align=HorizontalAlignment.left,
-                     valign=VerticalAlignment.top)
+    cell = TableCell(align=HorizontalAlignment.left, valign=VerticalAlignment.top)
     # left alignment
-    cell.blocks = ['Ehre sei Gott!']
+    cell.blocks = ["Ehre sei Gott!"]
     cell.width = 16
-    assert cell.blocks == ['Ehre sei Gott!  ']
+    assert cell.blocks == ["Ehre sei Gott!  "]
 
     # right alignment
     cell.align = HorizontalAlignment.right
-    cell.blocks = ['Ehre sei Gott!']
+    cell.blocks = ["Ehre sei Gott!"]
     cell.width = 16
-    assert cell.blocks == ['  Ehre sei Gott!']
+    assert cell.blocks == ["  Ehre sei Gott!"]
 
 
 def test_vertical_cell_formatting():
-    cell = TableCell(align=HorizontalAlignment.left,
-                     valign=VerticalAlignment.top)
+    cell = TableCell(align=HorizontalAlignment.left, valign=VerticalAlignment.top)
 
     # default top alignment
-    cell.blocks = ['Ehre sei Gott!']
+    cell.blocks = ["Ehre sei Gott!"]
     cell.width = 16
     cell.height = 4
-    assert cell.blocks == ['Ehre sei Gott!  ',
-                           '',
-                           '',
-                           '']
+    assert cell.blocks == ["Ehre sei Gott!  ", "", "", ""]
 
     # bottom alignment
-    cell.blocks = ['Ehre sei Gott!']
+    cell.blocks = ["Ehre sei Gott!"]
     cell.valign = VerticalAlignment.bottom
     cell.width = 16
     cell.height = 4
-    assert cell.blocks == ['',
-                           '',
-                           '',
-                           'Ehre sei Gott!  ']
+    assert cell.blocks == ["", "", "", "Ehre sei Gott!  "]
 
     # middle alignment
-    cell.blocks = ['Ehre sei Gott!']
+    cell.blocks = ["Ehre sei Gott!"]
     cell.valign = VerticalAlignment.middle
     cell.width = 16
     cell.height = 4
-    assert cell.blocks == ['',
-                           'Ehre sei Gott!  ',
-                           '',
-                           '']
+    assert cell.blocks == ["", "Ehre sei Gott!  ", "", ""]
diff --git a/tests/test_table_row.py b/tests/test_table_row.py
index dc2f5f6..b0ea143 100644
--- a/tests/test_table_row.py
+++ b/tests/test_table_row.py
@@ -11,17 +11,17 @@
 
 
 def test_empty_row():
-    tr = TableRow(cell_separator='   ')
+    tr = TableRow(cell_separator="   ")
 
     assert tr.width == 0
-    assert tr.get_text() == ''
+    assert tr.get_text() == ""
 
 
 def test_table_cell_separator():
-    html = '<html><body><table><tr><td>Hallo<br>Eins</td><td>Echo<br>Zwei</td></tr></table></html>'
+    html = "<html><body><table><tr><td>Hallo<br>Eins</td><td>Echo<br>Zwei</td></tr></table></html>"
 
     config = ParserConfig()
-    assert get_text(html, config) == 'Hallo  Echo\nEins   Zwei\n'
+    assert get_text(html, config) == "Hallo  Echo\nEins   Zwei\n"
 
-    config = ParserConfig(table_cell_separator='\t')
-    assert get_text(html, config) == 'Hallo\tEcho\nEins \tZwei\n'
+    config = ParserConfig(table_cell_separator="\t")
+    assert get_text(html, config) == "Hallo\tEcho\nEins \tZwei\n"
diff --git a/tests/test_white_space_handling.py b/tests/test_white_space_handling.py
index cf43d4d..b8b8e28 100644
--- a/tests/test_white_space_handling.py
+++ b/tests/test_white_space_handling.py
@@ -9,29 +9,24 @@
 from inscriptis.css_profiles import CSS_PROFILES
 from inscriptis.model.config import ParserConfig
 
-config = ParserConfig(css=CSS_PROFILES['strict'])
+config = ParserConfig(css=CSS_PROFILES["strict"])
 
 
 def test_white_space():
-    html = (u'<body><span style="white-space: normal"><i>1</i>2\n3</span>'
-            u'</body>')
-    assert get_text(html, config) == u'12 3'
+    html = '<body><span style="white-space: normal"><i>1</i>2\n3</span>' "</body>"
+    assert get_text(html, config) == "12 3"
 
-    html = (u'<body><span style="white-space: nowrap"><i>1</i>2\n3</span>'
-            u'</body>')
-    assert get_text(html, config) == u'12 3'
+    html = '<body><span style="white-space: nowrap"><i>1</i>2\n3</span>' "</body>"
+    assert get_text(html, config) == "12 3"
 
-    html = (u'<body><span style="white-space: pre"><i>1</i>2\n3</span>'
-            u'</body>')
-    assert get_text(html, config) == u'12\n3'
+    html = '<body><span style="white-space: pre"><i>1</i>2\n3</span>' "</body>"
+    assert get_text(html, config) == "12\n3"
 
-    html = (u'<body><span style="white-space: pre-line"><i>1</i>2\n3</span>'
-            u'</body>')
-    assert get_text(html, config) == u'12\n3'
+    html = '<body><span style="white-space: pre-line"><i>1</i>2\n3</span>' "</body>"
+    assert get_text(html, config) == "12\n3"
 
-    html = (u'<body><span style="white-space: pre-wrap"><i>1</i>2\n3</span>'
-            u'</body>')
-    assert get_text(html, config) == u'12\n3'
+    html = '<body><span style="white-space: pre-wrap"><i>1</i>2\n3</span>' "</body>"
+    assert get_text(html, config) == "12\n3"
 
 
 def test_borderline_cases():
@@ -41,39 +36,38 @@ def test_borderline_cases():
     """
     # change of whitespace handling between terms; no whitespace
     # between the terms
-    html = u'<body>Hallo<span style="white-space: pre">echo</span> versus'
-    assert get_text(html, config) == u'Halloecho versus'
+    html = '<body>Hallo<span style="white-space: pre">echo</span> versus'
+    assert get_text(html, config) == "Halloecho versus"
 
     # change of whitespace handling between terms; one whitespace
     # between the terms; option 1
-    html = u'<body>Hallo<span style="white-space: pre"> echo</span> versus'
-    assert get_text(html, config) == u'Hallo echo versus'
+    html = '<body>Hallo<span style="white-space: pre"> echo</span> versus'
+    assert get_text(html, config) == "Hallo echo versus"
 
     # change of whitespace handling between terms; one whitespace
     # between the terms; option 2
-    html = u'<body>Hallo <span style="white-space: pre">echo</span> versus'
-    assert get_text(html, config) == u'Hallo echo versus'
+    html = '<body>Hallo <span style="white-space: pre">echo</span> versus'
+    assert get_text(html, config) == "Hallo echo versus"
 
     # change of whitespace handling between terms; two whitespaces
     # between the terms
-    html = u'<body>Hallo <span style="white-space: pre"> echo</span> versus'
-    assert get_text(html, config) == u'Hallo  echo versus'
+    html = '<body>Hallo <span style="white-space: pre"> echo</span> versus'
+    assert get_text(html, config) == "Hallo  echo versus"
 
     # change of whitespace handling between terms; multiple whitespaces
     # between the terms
-    html = u'<body>Hallo   <span style="white-space: pre"> echo</span> versus'
-    assert get_text(html, config) == u'Hallo  echo versus'
+    html = '<body>Hallo   <span style="white-space: pre"> echo</span> versus'
+    assert get_text(html, config) == "Hallo  echo versus"
 
     # change of whitespace handling between terms; multiple whitespaces
     # between the terms
-    html = u'<body>Hallo   <span style="white-space: pre">   echo</span> versus'
-    assert get_text(html, config) == u'Hallo    echo versus'
+    html = '<body>Hallo   <span style="white-space: pre">   echo</span> versus'
+    assert get_text(html, config) == "Hallo    echo versus"
 
 
 def test_tail():
     """
     ensure that the tail elements are formated based on the container element.
     """
-    html = (u'<body>Hi<span style="white-space: pre"> 1   3 </span>'
-            u' versus 1   3')
-    assert get_text(html, config) == u'Hi 1   3  versus 1 3'
+    html = '<body>Hi<span style="white-space: pre"> 1   3 </span>' " versus 1   3"
+    assert get_text(html, config) == "Hi 1   3  versus 1 3"

From 8329a305ea0ef6ac629c77f08e9c56d6701071a2 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Thu, 11 Jan 2024 17:31:42 +0100
Subject: [PATCH 15/29] chg: ignore black reformatting in 'git blame'.

---
 .git-blame-ignore-revs | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .git-blame-ignore-revs

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000..e768d6d
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1 @@
+55fa29ca39f9ed5895f9e88b2eb0f17e4d84245f

From 255615337d976686c0f88ae2e1b2ea8c3de73ca1 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Thu, 11 Jan 2024 18:08:46 +0100
Subject: [PATCH 16/29] chg: improved dependencies.

---
 pyproject.toml | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 53f64cd..92f8ba6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,15 +1,36 @@
 [tool.poetry]
 name = "inscriptis"
 version = "2.4.0"
-description = "inscriptis - HTML to text converter."
 authors = ["Albert Weichselbraun <albert.weichselbraun@fhgr.ch>", "Fabian Odoni <fabian.odoni@fhgr.ch>"]
-license = "Apache 2.0"
+description = "inscriptis - HTML to text converter."
+keywords = ["HTML", "converter", "text"]
+classifiers = [
+        'Development Status :: 5 - Production/Stable',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: Apache Software License',
+        'Topic :: Text Processing',
+        'Topic :: Text Processing :: Markup :: HTML',
+        'Topic :: Utilities',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
+        'Programming Language :: Python :: 3.11',
+        'Programming Language :: Python :: 3.12',
+    ]
+homepage = "https://github.com/weblyzard/inscriptis"
+repository = "https://github.com/weblyzard/inscriptis"
+documentation = "https://inscriptis.readthedocs.io/en"
+license = "Apache-2.0"
 readme = "README.rst"
 
+# [tool.poetry.scripts]
+# inscript = "scripts/inscript.py"
+
 [tool.poetry.dependencies]
 python = "^3.8 || ^3.9 || ^3.10 || ^3.11 || ^3.12"
-requests = "^2.31.0"
-lxml = "^5.1.0"
+requests = ">=2.23.0"
+lxml = ">=4.5.0"
 
 [build-system]
 requires = ["poetry-core"]

From e68329bd4eac5473dd335b4874a1aceb1d56b73a Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Fri, 12 Jan 2024 16:07:53 +0100
Subject: [PATCH 17/29] wip: cli.

---
 pyproject.toml                              | 11 ++++--
 src/inscriptis/cli/__init__.py              |  1 +
 {scripts => src/inscriptis/cli}/inscript.py | 40 ++++++++++++++-------
 3 files changed, 38 insertions(+), 14 deletions(-)
 create mode 100644 src/inscriptis/cli/__init__.py
 rename {scripts => src/inscriptis/cli}/inscript.py (85%)

diff --git a/pyproject.toml b/pyproject.toml
index 92f8ba6..2b86e63 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,14 +24,21 @@ documentation = "https://inscriptis.readthedocs.io/en"
 license = "Apache-2.0"
 readme = "README.rst"
 
-# [tool.poetry.scripts]
-# inscript = "scripts/inscript.py"
+packages = [
+    { include = "inscriptis" }
+]
+
+[tool.poetry.scripts]
+inscript = "inscriptis.cli.inscript:cli"
 
 [tool.poetry.dependencies]
 python = "^3.8 || ^3.9 || ^3.10 || ^3.11 || ^3.12"
 requests = ">=2.23.0"
 lxml = ">=4.5.0"
 
+[tool.poetry.group.test.dependencies]
+pytest = ">=7.2.0"
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
diff --git a/src/inscriptis/cli/__init__.py b/src/inscriptis/cli/__init__.py
new file mode 100644
index 0000000..e37fc78
--- /dev/null
+++ b/src/inscriptis/cli/__init__.py
@@ -0,0 +1 @@
+"""Inscriptis command line interface clients."""
diff --git a/scripts/inscript.py b/src/inscriptis/cli/inscript.py
similarity index 85%
rename from scripts/inscript.py
rename to src/inscriptis/cli/inscript.py
index 2e694a5..58a047f 100755
--- a/scripts/inscript.py
+++ b/src/inscriptis/cli/inscript.py
@@ -5,6 +5,7 @@
 import argparse
 import sys
 from json import load, dumps
+from typing import Optional
 from pathlib import Path
 
 import requests
@@ -121,7 +122,32 @@ def get_parser():
     return parser
 
 
-if __name__ == "__main__":
+def get_html_content(url: str, timeout: int, encoding: str = None) -> Optional[str]:
+    """
+    Return the HTML content to convert.
+
+    Args:
+        url: URL to the HTML content, or None if the content is obtained from stdin.
+        encoding: used encoding.
+
+    Returns:
+        The html_content or None, if no content could be extracted.
+
+    """
+    if not url:
+        return sys.stdin.read()
+    elif Path(url).is_file():
+        with Path(url).open(
+            encoding=encoding or DEFAULT_ENCODING, errors="ignore"
+        ) as f:
+            return f.read()
+    elif url.startswith("http://") or url.startswith("https://"):
+        req = requests.get(url, timeout=timeout)
+        return req.content.decode(encoding or req.encoding)
+
+
+def cli():
+    """Run the inscript command line client."""
     parser = get_parser()
     args = parser.parse_args()
 
@@ -138,17 +164,7 @@ def get_parser():
         )
         sys.exit(0)
 
-    if not args.input:
-        html_content = sys.stdin.read()
-    elif Path(args.input).is_file():
-        with Path(args.input).open(
-            encoding=args.encoding or DEFAULT_ENCODING, errors="ignore"
-        ) as f:
-            html_content = f.read()
-    elif args.input.startswith("http://") or args.input.startswith("https://"):
-        req = requests.get(args.input, timeout=args.timeout)
-        html_content = req.content.decode(args.encoding or req.encoding)
-    else:
+    if not (html_content := get_html_content(args.input, args.timeout, args.encoding)):
         print("ERROR: Cannot open input file '{0}'.\n".format(args.input))
         parser.print_help()
         sys.exit(-1)

From 7ba651326d8c26f4796421a9f71cd0d088f5b3b9 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 15 Jan 2024 08:23:00 +0100
Subject: [PATCH 18/29] fix: metadata handling.

---
 pyproject.toml                 |  4 ++--
 src/inscriptis/cli/inscript.py | 41 +++++++++++++++++++---------------
 src/inscriptis/metadata.py     | 10 ++++++---
 tests/test_metadata.py         |  7 ++----
 4 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2b86e63..a6aef78 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ license = "Apache-2.0"
 readme = "README.rst"
 
 packages = [
-    { include = "inscriptis" }
+   {include = "inscriptis", from="src"},
 ]
 
 [tool.poetry.scripts]
@@ -49,5 +49,5 @@ line-length = 88
 target-version = ["py38", "py39", "py310", "py311", "py312"]
 extend-exclude = '\.html$|\.json$|\.txt$'
 include = '''
-  ^/scripts/|^/src/|^/tests/
+  ^/src/|^/tests/
 '''
diff --git a/src/inscriptis/cli/inscript.py b/src/inscriptis/cli/inscript.py
index 58a047f..a0d7fb7 100755
--- a/src/inscriptis/cli/inscript.py
+++ b/src/inscriptis/cli/inscript.py
@@ -33,8 +33,12 @@ def get_postprocessor(name):
     return getattr(mod, pp_class)()
 
 
-def get_parser():
-    """Parse the arguments if script is run via console."""
+def parse_command_line() -> argparse.Namespace:
+    """Parse the command line arguments.
+
+    Returns:
+        The parsed command line arguments.
+    """
     parser = argparse.ArgumentParser(
         description="Convert the given HTML document to text."
     )
@@ -119,7 +123,22 @@ def get_parser():
         default=False,
         help="display version information",
     )
-    return parser
+
+    # parse command line arguments
+    args = parser.parse_args()
+    if args.version:
+        print(
+            "Inscript HTML to text conversion (based on the inscriptis "
+            "library version {0})".format(__version__)
+        )
+        print("Copyright (C)", __copyright__)
+        print("\nInscript comes with ABSOLUTELY NO WARRANTY.")
+        print(
+            "This is free software and you are welcome to redistribute it "
+            "under the terms of the {0}.".format(__license__)
+        )
+        sys.exit(0)
+    return args
 
 
 def get_html_content(url: str, timeout: int, encoding: str = None) -> Optional[str]:
@@ -149,21 +168,7 @@ def get_html_content(url: str, timeout: int, encoding: str = None) -> Optional[s
 def cli():
     """Run the inscript command line client."""
     parser = get_parser()
-    args = parser.parse_args()
-
-    if args.version:
-        print(
-            "Inscript HTML to text conversion (based on the inscriptis "
-            "library version {0})".format(__version__)
-        )
-        print("Copyright (C)", __copyright__)
-        print("\nInscript comes with ABSOLUTELY NO WARRANTY.")
-        print(
-            "This is free software and you are welcome to redistribute it "
-            "under the terms of the {0}.".format(__license__)
-        )
-        sys.exit(0)
-
+    args = parse_command_line()
     if not (html_content := get_html_content(args.input, args.timeout, args.encoding)):
         print("ERROR: Cannot open input file '{0}'.\n".format(args.input))
         parser.print_help()
diff --git a/src/inscriptis/metadata.py b/src/inscriptis/metadata.py
index f7112f0..f1c3747 100644
--- a/src/inscriptis/metadata.py
+++ b/src/inscriptis/metadata.py
@@ -1,7 +1,11 @@
 """Inscriptis metadata information."""
 
+import importlib.metadata as metadata
+
+PACKAGE = "inscriptis"
+
 __author__ = "Albert Weichselbraun, Fabian Odoni"
 __author_email__ = "albert.weichselbraun@fhgr.ch, fabian.odoni@fhgr.ch"
-__copyright__ = "2016-2024 Albert Weichselbraun, Fabian Odoni"
-__license__ = "Apache 2.0"
-__version__ = "2.3.3"
+__copyright__ = f"{metadata.metadata(PACKAGE)['Name']} {metadata.metadata(PACKAGE)['Version']} © 2016-2023 {__author__}"
+__license__ = metadata.metadata(PACKAGE)["License"]
+__version__ = metadata.metadata(PACKAGE)["Version"]
diff --git a/tests/test_metadata.py b/tests/test_metadata.py
index 2094695..d0f5a9a 100644
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@@ -12,13 +12,10 @@ def test_metadata():
     assert "Albert Weichselbraun" in __author__
     assert "Fabian Odoni" in __author__
 
-    assert "@" in __author_email__
-
-    assert "2016-" in __copyright__
     assert "Albert Weichselbraun" in __copyright__
     assert "Fabian Odoni" in __copyright__
 
-    assert __license__ == "Apache 2.0"
-
+    assert "@" in __author_email__
+    assert __license__ == "Apache-2.0"
     assert __version__[0].isnumeric()
     assert "." in __version__

From dec34b567369dcd9e3be0907451d71e424fc46ea Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 15 Jan 2024 10:30:30 +0100
Subject: [PATCH 19/29] chg: improved README.

---
 README.rst                           |  2 +-
 benchmarking/run_benchmarking.py     | 13 +++++++++++++
 src/inscriptis/cli/inscript.py       |  4 +---
 src/inscriptis/metadata.py           |  5 ++++-
 src/inscriptis/model/attribute.py    |  2 +-
 src/inscriptis/model/canvas/block.py |  4 ++++
 src/inscriptis/model/html_element.py | 16 ++++++++--------
 src/inscriptis/model/table.py        |  8 ++++----
 8 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/README.rst b/README.rst
index a6fac94..116843e 100644
--- a/README.rst
+++ b/README.rst
@@ -315,7 +315,7 @@ The Flask Web Service translates HTML pages to the corresponding plain text.
 
 Run the Web Service on your host system
 ---------------------------------------
-Provide additional requirement `python3-flask <https://flask.palletsprojects.com/en/2.2.x/>`_, then start the inscriptis Web service with the following command::
+Install the additional requirement `python3-flask <https://flask.palletsprojects.com/en/2.2.x/>`_, then start the inscriptis Web service with the following command::
 
   $ export FLASK_APP="inscriptis.service.web"
   $ python3 -m flask run
diff --git a/benchmarking/run_benchmarking.py b/benchmarking/run_benchmarking.py
index dd09ce5..2de67f3 100755
--- a/benchmarking/run_benchmarking.py
+++ b/benchmarking/run_benchmarking.py
@@ -368,6 +368,7 @@ def benchmark(args, source_list):
     _setup_benchmarking_directories(args)
 
     output = []
+    total_times = {}
     for source in source_list:
         source_name, html = _fetch_url(source, args.cache)
 
@@ -383,10 +384,22 @@ def benchmark(args, source_list):
                 save_to_file(converter.name, source_name, text,
                              args.benchmarking_results)
 
+        for converter, conversion_time in times.items():
+            total_times[converter] = total_times.get(converter, 0) + conversion_time
         speed_table = get_speed_table(times)
         print(speed_table)
         output.append(speed_table)
 
+    print('\nTotal')
+    output.append('\nTotal\n')
+    speed_table = get_speed_table(total_times)
+    print(speed_table)
+    output.append(speed_table)
+
+
+
+
+
     with open(os.path.join(args.benchmarking_results,
                            OUTFILE), 'w') as output_file:
         output_file.write('\n'.join(output) + '\n')
diff --git a/src/inscriptis/cli/inscript.py b/src/inscriptis/cli/inscript.py
index a0d7fb7..c2861dc 100755
--- a/src/inscriptis/cli/inscript.py
+++ b/src/inscriptis/cli/inscript.py
@@ -167,11 +167,9 @@ def get_html_content(url: str, timeout: int, encoding: str = None) -> Optional[s
 
 def cli():
     """Run the inscript command line client."""
-    parser = get_parser()
     args = parse_command_line()
     if not (html_content := get_html_content(args.input, args.timeout, args.encoding)):
-        print("ERROR: Cannot open input file '{0}'.\n".format(args.input))
-        parser.print_help()
+        print("ERROR: Cannot open input file '{0}'.".format(args.input))
         sys.exit(-1)
 
     if args.annotation_rules:
diff --git a/src/inscriptis/metadata.py b/src/inscriptis/metadata.py
index f1c3747..c86c482 100644
--- a/src/inscriptis/metadata.py
+++ b/src/inscriptis/metadata.py
@@ -6,6 +6,9 @@
 
 __author__ = "Albert Weichselbraun, Fabian Odoni"
 __author_email__ = "albert.weichselbraun@fhgr.ch, fabian.odoni@fhgr.ch"
-__copyright__ = f"{metadata.metadata(PACKAGE)['Name']} {metadata.metadata(PACKAGE)['Version']} © 2016-2023 {__author__}"
+__copyright__ = (
+    f"{metadata.metadata(PACKAGE)['Name']} "
+    + f"{metadata.metadata(PACKAGE)['Version']} © 2016-2023 {__author__}"
+)
 __license__ = metadata.metadata(PACKAGE)["License"]
 __version__ = metadata.metadata(PACKAGE)["Version"]
diff --git a/src/inscriptis/model/attribute.py b/src/inscriptis/model/attribute.py
index f4f8efc..d8cf3f6 100644
--- a/src/inscriptis/model/attribute.py
+++ b/src/inscriptis/model/attribute.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # encoding: utf-8
 
 """HTML attribute handling."""
diff --git a/src/inscriptis/model/canvas/block.py b/src/inscriptis/model/canvas/block.py
index 59ba05f..6dc1361 100644
--- a/src/inscriptis/model/canvas/block.py
+++ b/src/inscriptis/model/canvas/block.py
@@ -42,6 +42,10 @@ def merge_normal_text(self, text: str) -> None:
 
         Args:
             text: the text to merge
+
+        Note:
+            If the previous text ended with a whitespace and text starts with one, both
+             will automatically collapse into a single whitespace.
         """
         normalized_text = []
 
diff --git a/src/inscriptis/model/html_element.py b/src/inscriptis/model/html_element.py
index 5d16e9d..91e9585 100644
--- a/src/inscriptis/model/html_element.py
+++ b/src/inscriptis/model/html_element.py
@@ -158,14 +158,14 @@ def get_refined_html_element(self, new: "HtmlElement") -> "HtmlElement":
 
     def __str__(self):
         return (
-            "<{self.tag} prefix={self.prefix}, suffix={self.suffix}, "
-            "display={self.display}, margin_before={self.margin_before}, "
-            "margin_after={self.margin_after}, "
-            "padding_inline={self.padding_inline}, "
-            "list_bullet={self.list_bullet}, "
-            "whitespace={self.whitespace}, align={self.align}, "
-            "valign={self.valign}, annotation={self.annotation}>"
-        ).format(self=self)
+            f"<{self.tag} prefix={self.prefix}, suffix={self.suffix}, "
+            f"display={self.display}, margin_before={self.margin_before}, "
+            f"margin_after={self.margin_after}, "
+            f"padding_inline={self.padding_inline}, "
+            f"list_bullet={self.list_bullet}, "
+            f"whitespace={self.whitespace}, align={self.align}, "
+            f"valign={self.valign}, annotation={self.annotation}>"
+        )
 
     __repr__ = __str__
 
diff --git a/src/inscriptis/model/table.py b/src/inscriptis/model/table.py
index 073c626..75a2cd3 100644
--- a/src/inscriptis/model/table.py
+++ b/src/inscriptis/model/table.py
@@ -55,7 +55,7 @@ def normalize_blocks(self) -> int:
         return len(self.blocks)
 
     @property
-    def height(self):
+    def height(self) -> int:
         """Compute the table cell's height.
 
         Returns:
@@ -64,7 +64,7 @@ def height(self):
         return max(1, len(self.blocks))
 
     @property
-    def width(self):
+    def width(self) -> int:
         """Compute the table cell's width.
 
         Returns:
@@ -184,7 +184,7 @@ def get_text(self) -> str:
         return "\n".join(row_lines)
 
     @property
-    def width(self):
+    def width(self) -> int:
         """Compute and return the width of the current row."""
         if not self.columns:
             return 0
@@ -255,7 +255,7 @@ def _set_column_width(self):
                 if len(row) > cur_column_idx:
                     row.columns[cur_column_idx].width = max_column_width
 
-    def get_text(self):
+    def get_text(self) -> str:
         """Return and render the text of the given table."""
         if not self.rows:
             return "\n"

From 161035dd0bf915861c35b5aded8eb7eb5f8d7f69 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Mon, 15 Jan 2024 20:06:06 +0100
Subject: [PATCH 20/29] chg: removed setup.py.

---
 setup.py | 60 --------------------------------------------------------
 1 file changed, 60 deletions(-)
 delete mode 100644 setup.py

diff --git a/setup.py b/setup.py
deleted file mode 100644
index d3ec856..0000000
--- a/setup.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env python
-
-"""Inscriptis setup script."""
-
-from pathlib import Path
-from setuptools import setup, find_packages
-from os import path
-
-
-here = Path(path.dirname(__file__)).resolve()
-# get version information
-with here.joinpath('src/inscriptis/metadata.py').open() as f:
-    exec(f.read())
-
-# Get the long description from the README.md file
-with here.joinpath('README.rst').open() as f:  # , encoding='utf-8'
-    long_description = f.read()
-
-setup(
-    # Metadata
-    name='inscriptis',
-    version=__version__,
-    description='inscriptis - HTML to text converter.',
-    long_description=long_description,
-    author=__author__,
-    author_email=__author_email__,
-    python_requires='>=3.8',
-    classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'Intended Audience :: Developers',
-        'License :: OSI Approved :: Apache Software License',
-        'Topic :: Text Processing',
-        'Topic :: Text Processing :: Markup :: HTML',
-        'Topic :: Utilities',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
-        'Programming Language :: Python :: 3.10',
-        'Programming Language :: Python :: 3.11',
-        'Programming Language :: Python :: 3.12',
-    ],
-    keywords='HTML,converter,text',
-    url='https://github.com/weblyzard/inscriptis',
-    license=__license__,
-    package_dir={'': 'src'},
-
-    # Package List
-    packages=find_packages('src'),
-
-    # Scripts
-    scripts=[
-        'scripts/inscript.py'
-    ],
-
-    # Requirements
-    install_requires=[
-        'lxml',
-        'requests'
-    ],
-)

From 4dc04d5d1b194dd9774c3162b85e5079737d19f7 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Tue, 16 Jan 2024 08:13:11 +0100
Subject: [PATCH 21/29] add: benchmarking.py to black config.

---
 benchmarking/run_benchmarking.py | 270 +++++++++++++++++--------------
 pyproject.toml                   |   4 +-
 2 files changed, 152 insertions(+), 122 deletions(-)

diff --git a/benchmarking/run_benchmarking.py b/benchmarking/run_benchmarking.py
index 2de67f3..f0f44af 100755
--- a/benchmarking/run_benchmarking.py
+++ b/benchmarking/run_benchmarking.py
@@ -21,17 +21,19 @@
 # any installed module versions).
 #
 
-LYNX_BIN = '/usr/bin/lynx'
-LINKS_BIN = '/usr/bin/links'
+LYNX_BIN = "/usr/bin/lynx"
+LINKS_BIN = "/usr/bin/links"
 BENCHMARKING_ROOT = os.path.dirname(os.path.abspath(__file__))
-SRC_DIR = os.path.join(BENCHMARKING_ROOT, '../src')
+SRC_DIR = os.path.join(BENCHMARKING_ROOT, "../src")
 sys.path.insert(0, os.path.abspath(SRC_DIR))
 
 try:
     import inscriptis
 except ImportError:
-    print('Inscriptis is not available. Please install it in order to '
-          'compare with inscriptis.')
+    print(
+        "Inscriptis is not available. Please install it in order to "
+        "compare with inscriptis."
+    )
 
 #
 # Import third-party HTML 2 text converters.
@@ -39,22 +41,28 @@
 try:
     from bs4 import BeautifulSoup
 except ImportError:
-    print('BeautifulSoup is not available. Please install it in order to '
-          'compare with BeautifulSoup.')
+    print(
+        "BeautifulSoup is not available. Please install it in order to "
+        "compare with BeautifulSoup."
+    )
 try:
     import html2text
 except ImportError:
-    print('html2text is not available. Please install it in order to '
-          'compare with html2text.')
+    print(
+        "html2text is not available. Please install it in order to "
+        "compare with html2text."
+    )
 try:
     import justext
 except ImportError:
-    print('justext is not available. Please install it in order to compare '
-          'with justext.')
+    print(
+        "justext is not available. Please install it in order to compare "
+        "with justext."
+    )
 
 
 TRIES = 7
-OUTFILE = 'speed_comparisons.txt'
+OUTFILE = "speed_comparisons.txt"
 
 
 class AbstractHtmlConverter:
@@ -86,22 +94,22 @@ class BeautifulSoupHtmlConverter(AbstractHtmlConverter):
     """
     Converts HTML to text using BeautifulSoup.
     """
-    name = 'BeautifulSoup'
+
+    name = "BeautifulSoup"
 
     def __init__(self):
-        self.available = 'bs4' in sys.modules
+        self.available = "bs4" in sys.modules
 
     def get_text(self, html):
-        soup = BeautifulSoup(html, 'lxml')
+        soup = BeautifulSoup(html, "lxml")
 
-        for script in soup(['script', 'style']):
+        for script in soup(["script", "style"]):
             script.extract()
 
         text = soup.get_text()
         lines = (line.strip() for line in text.splitlines())
-        chunks = (phrase.strip() for line in lines
-                  for phrase in line.split('  '))
-        result = '\n'.join(chunk for chunk in chunks if chunk)
+        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+        result = "\n".join(chunk for chunk in chunks if chunk)
         return result
 
 
@@ -109,67 +117,74 @@ class JustextConverter(AbstractHtmlConverter):
     """
     Converts HTML to text using Justtext.
     """
-    name = 'Justtext'
+
+    name = "Justtext"
 
     def __init__(self):
-        self.available = 'justext' in sys.modules
+        self.available = "justext" in sys.modules
 
     def get_text(self, html):
-        paragraphs = justext.justext(html, stoplist='English')
+        paragraphs = justext.justext(html, stoplist="English")
         result = [paragraph.text for paragraph in paragraphs]
-        return '\n'.join(result)
+        return "\n".join(result)
 
 
 class Html2TextConverter(AbstractHtmlConverter):
     """
     Converts HTML to text using Html2Text.
     """
-    name = 'Html2Text'
+
+    name = "Html2Text"
 
     def __init__(self):
-        self.available = 'html2text' in sys.modules
+        self.available = "html2text" in sys.modules
 
     def get_text(self, html):
         converter = html2text.HTML2Text()
         converter.ignore_links = True
         result = converter.handle(str(html))
 
-        return ''.join(result)
+        return "".join(result)
 
 
 class LynxConverter(AbstractHtmlConverter):
     """
     Converts HTML to text using lynx.
     """
-    name = 'Lynx'
+
+    name = "Lynx"
 
     def __init__(self):
         try:
-            subprocess.call([LYNX_BIN, '-dump \'www.google.com\''],
-                            stdout=subprocess.PIPE)
+            subprocess.call(
+                [LYNX_BIN, "-dump 'www.google.com'"], stdout=subprocess.PIPE
+            )
             self.available = True
         except OSError:
-            print('lynx can not be called. Please check in order to compare '
-                  'with lynx.')
+            print(
+                "lynx can not be called. Please check in order to compare " "with lynx."
+            )
             self.available = False
 
     def get_text(self, html):
-
         def kill_lynx(pid):
             os.kill(pid, signal.SIGKILL)
             os.waitpid(-1, os.WNOHANG)
-            print('lynx killed')
-
-        lynx_args = '-stdin -width=20000 -force_html -nocolor -dump -nolist ' \
-                    '-nobold -display_charset=utf8'
-        cmd = [LYNX_BIN, ] + lynx_args.split(' ')
-        lynx = subprocess.Popen(cmd, stdin=subprocess.PIPE,
-                                stdout=subprocess.PIPE)
-        lynx.stdin.write(html.encode('utf8'))
+            print("lynx killed")
+
+        lynx_args = (
+            "-stdin -width=20000 -force_html -nocolor -dump -nolist "
+            "-nobold -display_charset=utf8"
+        )
+        cmd = [
+            LYNX_BIN,
+        ] + lynx_args.split(" ")
+        lynx = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+        lynx.stdin.write(html.encode("utf8"))
         lynx.stdin.close()
         _t = threading.Timer(200.0, kill_lynx, args=[lynx.pid])
         _t.start()
-        text = lynx.stdout.read().decode('utf-8', 'replace')
+        text = lynx.stdout.read().decode("utf-8", "replace")
         _t.cancel()
         return text
 
@@ -178,34 +193,38 @@ class LinksConverter(AbstractHtmlConverter):
     """
     Converts HTML to text using links.
     """
-    name = 'Links'
+
+    name = "Links"
 
     def __init__(self):
         try:
-            subprocess.call([LINKS_BIN, '-dump \'www.google.com\''],
-                            stdout=subprocess.PIPE)
+            subprocess.call(
+                [LINKS_BIN, "-dump 'www.google.com'"], stdout=subprocess.PIPE
+            )
             self.available = True
         except OSError:
-            print('links can not be called. Please check in order to compare '
-                  'with links.')
+            print(
+                "links can not be called. Please check in order to compare "
+                "with links."
+            )
             self.available = False
 
     def get_text(self, html):
-
         def kill_links(pid):
             os.kill(pid, signal.SIGKILL)
             os.waitpid(-1, os.WNOHANG)
-            print('links killed')
-
-        links_args= '-dump '
-        cmd = [LINKS_BIN, ] + links_args.split(' ')
-        links = subprocess.Popen(cmd, stdin=subprocess.PIPE,
-                                 stdout=subprocess.PIPE)
-        links.stdin.write(html.encode('utf8'))
+            print("links killed")
+
+        links_args = "-dump "
+        cmd = [
+            LINKS_BIN,
+        ] + links_args.split(" ")
+        links = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+        links.stdin.write(html.encode("utf8"))
         links.stdin.close()
         _t = threading.Timer(200.0, kill_links, args=[links.pid])
         _t.start()
-        text = links.stdout.read().decode('utf-8', 'replace')
+        text = links.stdout.read().decode("utf-8", "replace")
         _t.cancel()
         return text
 
@@ -214,32 +233,33 @@ class InscriptisHtmlConverter(AbstractHtmlConverter):
     """
     Converts HTML to text using Inscriptis.
     """
-    name = 'Inscriptis'
+
+    name = "Inscriptis"
 
     def __init__(self):
-        self.available = 'inscriptis' in sys.modules
+        self.available = "inscriptis" in sys.modules
         if self.available:
             from inscriptis import get_text
+
             self.get_text = get_text
 
     def get_text(self, html):
         return self.get_text(html)
 
 
-timestamp = str(datetime.now()).replace(' ', '_').replace(':', '-')\
-                                                 .split('.')[0]
-DEFAULT_RESULT_DIR = os.path.join(BENCHMARKING_ROOT, 'benchmarking_results',
-                                  timestamp)
-DEFAULT_CACHE_DIR = os.path.join(BENCHMARKING_ROOT, 'html_cache')
+timestamp = str(datetime.now()).replace(" ", "_").replace(":", "-").split(".")[0]
+DEFAULT_RESULT_DIR = os.path.join(BENCHMARKING_ROOT, "benchmarking_results", timestamp)
+DEFAULT_CACHE_DIR = os.path.join(BENCHMARKING_ROOT, "html_cache")
 
 
 def save_to_file(algorithm, url, data, benchmarking_results_dir):
     """
     Saves a benchmarking result to the given file.
     """
-    result_file = os.path.join(benchmarking_results_dir,
-                               '{}_{}.txt'.format(algorithm, url))
-    with open(result_file, 'w') as output_file:
+    result_file = os.path.join(
+        benchmarking_results_dir, "{}_{}.txt".format(algorithm, url)
+    )
+    with open(result_file, "w") as output_file:
         output_file.write(data)
 
 
@@ -251,19 +271,22 @@ def get_speed_table(times):
     longest_key = max(len(key) for key, _ in times.items())
     longest_value = max(len(str(value)) for _, value in times.items())
 
-    result = ''
+    result = ""
     for key, value in sorted(times.items(), key=operator.itemgetter(1)):
         difference = value - fastest
         if difference == 0:
-            difference = '--> fastest'
+            difference = "--> fastest"
         else:
-            difference = '{0:+f}'.format(difference)
+            difference = "{0:+f}".format(difference)
 
-        output = '{}{}: {}{} {}'.format(key, ' ' * (longest_key - len(key)),
-                                        value, ' ' * (longest_value -
-                                                      len(str(value))),
-                                        difference)
-        result += output + '\n'
+        output = "{}{}: {}{} {}".format(
+            key,
+            " " * (longest_key - len(key)),
+            value,
+            " " * (longest_value - len(str(value))),
+            difference,
+        )
+        result += output + "\n"
 
     return result
 
@@ -272,46 +295,54 @@ def get_fname(url) -> str:
     """
     Transforms a URL to a file name.
     """
-    trash = (('http://', ''),
-             ('https://', ''),
-             ('/', '-'),
-             (':', '-'),
-             ('%', ''))
+    trash = (("http://", ""), ("https://", ""), ("/", "-"), (":", "-"), ("%", ""))
 
     for key, value in trash:
         url = url.replace(key, value)
     return url[0:100]
 
 
-CONVERTER = (BeautifulSoupHtmlConverter(),
-             JustextConverter(),
-             Html2TextConverter(),
-             LynxConverter(),
-             LinksConverter(),
-             InscriptisHtmlConverter())
+CONVERTER = (
+    BeautifulSoupHtmlConverter(),
+    JustextConverter(),
+    Html2TextConverter(),
+    LynxConverter(),
+    LinksConverter(),
+    InscriptisHtmlConverter(),
+)
 
 
 def parse_args():
     """
     Parse optional benchmarking arguments.
     """
-    parser = argparse.ArgumentParser(description='Inscriptis benchmarking '
-                                     'suite')
-    parser.add_argument('converter', type=str, nargs='*',
-                        help='The list of converters to benchmark (options:'
-                             'BeautifulSoup, Justext, Html2Text, Lynx, '
-                             'Inscriptis; default: all)')
-    parser.add_argument('-u', '--benchmarking-urls',
-                        default=os.path.join(BENCHMARKING_ROOT,
-                                             'url_list.txt'),
-                        help='A list of URLs to use in the benchmark.')
-    parser.add_argument('-r', '--benchmarking-results',
-                        default=DEFAULT_RESULT_DIR,
-                        help='Optional directory for saving the benchmarking '
-                        'results.')
-    parser.add_argument('-c', '--cache', default=DEFAULT_CACHE_DIR,
-                        help='Optional cache directory for the retrieved Web '
-                        'pages.')
+    parser = argparse.ArgumentParser(description="Inscriptis benchmarking " "suite")
+    parser.add_argument(
+        "converter",
+        type=str,
+        nargs="*",
+        help="The list of converters to benchmark (options:"
+        "BeautifulSoup, Justext, Html2Text, Lynx, "
+        "Inscriptis; default: all)",
+    )
+    parser.add_argument(
+        "-u",
+        "--benchmarking-urls",
+        default=os.path.join(BENCHMARKING_ROOT, "url_list.txt"),
+        help="A list of URLs to use in the benchmark.",
+    )
+    parser.add_argument(
+        "-r",
+        "--benchmarking-results",
+        default=DEFAULT_RESULT_DIR,
+        help="Optional directory for saving the benchmarking " "results.",
+    )
+    parser.add_argument(
+        "-c",
+        "--cache",
+        default=DEFAULT_CACHE_DIR,
+        help="Optional cache directory for the retrieved Web " "pages.",
+    )
     return parser.parse_args()
 
 
@@ -348,10 +379,10 @@ def _fetch_url(url, cache_dir):
     else:
         req = urllib.request.Request(url)
         try:
-            html = urllib.request.urlopen(req).read().decode('utf-8')
+            html = urllib.request.urlopen(req).read().decode("utf-8")
         except UnicodeDecodeError:
-            html = urllib.request.urlopen(req).read().decode('latin1')
-        open(source_cache_path, 'w').write(html)
+            html = urllib.request.urlopen(req).read().decode("latin1")
+        open(source_cache_path, "w").write(html)
 
     return source_name, html
 
@@ -372,17 +403,21 @@ def benchmark(args, source_list):
     for source in source_list:
         source_name, html = _fetch_url(source, args.cache)
 
-        print('\nURL: {}'.format(source_name))
-        output.append('\nURL: {}\n'.format(source_name))
+        print("\nURL: {}".format(source_name))
+        output.append("\nURL: {}\n".format(source_name))
 
         times = {}
         for converter in CONVERTER:
-            if converter.available and not args.converter or converter.name \
-                    in args.converter:
+            if (
+                converter.available
+                and not args.converter
+                or converter.name in args.converter
+            ):
                 time_required, text = converter.benchmark(html)
                 times[converter.name] = time_required
-                save_to_file(converter.name, source_name, text,
-                             args.benchmarking_results)
+                save_to_file(
+                    converter.name, source_name, text, args.benchmarking_results
+                )
 
         for converter, conversion_time in times.items():
             total_times[converter] = total_times.get(converter, 0) + conversion_time
@@ -390,22 +425,17 @@ def benchmark(args, source_list):
         print(speed_table)
         output.append(speed_table)
 
-    print('\nTotal')
-    output.append('\nTotal\n')
+    print("\nTotal")
+    output.append("\nTotal\n")
     speed_table = get_speed_table(total_times)
     print(speed_table)
     output.append(speed_table)
 
+    with open(os.path.join(args.benchmarking_results, OUTFILE), "w") as output_file:
+        output_file.write("\n".join(output) + "\n")
 
 
-
-
-    with open(os.path.join(args.benchmarking_results,
-                           OUTFILE), 'w') as output_file:
-        output_file.write('\n'.join(output) + '\n')
-
-
-if __name__ == '__main__':
+if __name__ == "__main__":
     # These are a few predefined urls the script will
     cmdline_args = parse_args()
     with open(cmdline_args.benchmarking_urls) as url_list:
diff --git a/pyproject.toml b/pyproject.toml
index a6aef78..010dd1d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,7 +47,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.black]
 line-length = 88
 target-version = ["py38", "py39", "py310", "py311", "py312"]
-extend-exclude = '\.html$|\.json$|\.txt$'
+extend-exclude = '\.html$|\.json$|\.txt$|/a$|/b$'
 include = '''
-  ^/src/|^/tests/
+  ^/src/|^/tests/|^/benchmarking/
 '''

From 61b93f4fc9752b19fc91bfbf5f4fc5682085833d Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Tue, 16 Jan 2024 15:24:30 +0100
Subject: [PATCH 22/29] chg: migrate webservice to fastapi.

---
 .gitignore                    |  1 +
 pyproject.toml                | 18 ++++++++++++++----
 requirements.txt              |  2 --
 src/inscriptis/service/web.py | 33 ++++++++++++++++++++-------------
 4 files changed, 35 insertions(+), 19 deletions(-)
 delete mode 100644 requirements.txt

diff --git a/.gitignore b/.gitignore
index 18e246b..53ece72 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,4 @@ tests/reference.txt
 *.c
 docs/paper/*.pdf
 htmlcov/
+poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
index 010dd1d..e43429c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,21 +28,31 @@ packages = [
    {include = "inscriptis", from="src"},
 ]
 
+
 [tool.poetry.scripts]
 inscript = "inscriptis.cli.inscript:cli"
+inscriptis-api = "inscriptis.service.web:start"
+
+
+[tool.poetry.extras]
+web-service = ["fastapi", "uvicorn"]
+
 
 [tool.poetry.dependencies]
 python = "^3.8 || ^3.9 || ^3.10 || ^3.11 || ^3.12"
-requests = ">=2.23.0"
-lxml = ">=4.5.0"
+requests = ">=2.31.0"
+lxml = ">=4.9.3"
+
+# optional dependencies
+fastapi = { version = "^0.109.0", optional = true }
+uvicorn = { version = "^0.25.0", optional = true }
 
-[tool.poetry.group.test.dependencies]
-pytest = ">=7.2.0"
 
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 
+
 # code formatting with black
 [tool.black]
 line-length = 88
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index a3596c0..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-lxml
-requests
diff --git a/src/inscriptis/service/web.py b/src/inscriptis/service/web.py
index bb54665..fdf47ca 100755
--- a/src/inscriptis/service/web.py
+++ b/src/inscriptis/service/web.py
@@ -2,14 +2,15 @@
 # coding:utf-8
 """Inscriptis Web Service."""
 
-from flask import request, Response, Flask
+from fastapi import FastAPI, Request
+from fastapi.responses import PlainTextResponse
 
 from inscriptis import get_text
 from inscriptis.metadata import __version__
 from inscriptis.css_profiles import RELAXED_CSS_PROFILE
 from inscriptis.model.config import ParserConfig
 
-app = Flask(__name__)
+app = FastAPI()
 CONFIG = ParserConfig(
     css=RELAXED_CSS_PROFILE,
     display_images=True,
@@ -18,31 +19,37 @@
 )
 
 
-@app.route("/")
+@app.get("/")
 def index():
     """Print a short status message for the Web service's base URL."""
     return "Inscriptis text to HTML Web service."
 
 
-@app.route("/get_text", methods=["POST"])
-def get_text_call():
+@app.post("/get_text", response_class=PlainTextResponse)
+async def get_text_call(request: Request):
     """Return the text representation of the given HTML content."""
-    content_type = request.headers["Content-type"]
+    content_type = request.headers.get("Content-type")
     if "; encoding=" in content_type:
         encoding = content_type.split("; encoding=")[1]
     else:
         encoding = "UTF-8"
-    html_content = request.data.decode(encoding, errors="ignore")
-    text = get_text(html_content, CONFIG)
-    return Response(text, mimetype="text/plain")
+    html_content = await request.body()
+    return get_text(html_content.decode(encoding, errors="ignore"), CONFIG)
 
 
-@app.route("/version", methods=["GET"])
+@app.get("/version", response_class=PlainTextResponse)
 def get_version_call():
     """Return the used inscriptis version."""
-    return Response(__version__ + "\n", mimetype="text/plain")
+    return __version__
 
 
-if __name__ == "__main__":
+def start():
+    """Start the webservice."""
+    import uvicorn
+
     print("Starting Web service based on Inscriptis", __version__)
-    app.run(threaded=True, host="127.0.0.1", port=5000)
+    uvicorn.run(app, host="127.0.0.1", port=5000)
+
+
+if __name__ == "__main__":
+    start()

From 789e0bc58ee418b0e44cea35b157453b221ea354 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Tue, 16 Jan 2024 15:25:07 +0100
Subject: [PATCH 23/29] chg: use inscriptis package rather than local build.

---
 Dockerfile | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 70e30e5..089e929 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,10 +4,8 @@
 FROM python:3.11-slim-bullseye AS builder
 
 WORKDIR /inscriptis
-COPY requirements.txt .
 RUN python -m venv .venv && .venv/bin/python -m pip install --upgrade pip
-RUN .venv/bin/pip install --no-cache-dir -r requirements.txt && \
-    .venv/bin/pip install --no-cache-dir Flask waitress && \
+RUN .venv/bin/pip install --no-cache-dir inscriptis[web-service] && \
     find /inscriptis/.venv \( -type d -a -name test -o -name tests \) -o \( -type f -a -name '*.pyc' -o -name '*.pyo' \) -exec rm -rf '{}' \+
 
 #
@@ -18,10 +16,9 @@ LABEL maintainer="albert@weichselbraun.net"
 
 # Note: only copy the src directory, to prevent bloating the image with 
 #       irrelevant files from the project directory.
-WORKDIR /inscriptis/src
+WORKDIR /inscriptis
 COPY --from=builder /inscriptis /inscriptis
-COPY ./src /inscriptis/src
 
 ENV PATH="/inscriptis/.venv/bin:$PATH"
-CMD ["waitress-serve", "inscriptis.service.web:app", "--port=5000", "--host=0.0.0.0"]
+CMD ["uvicorn", "inscriptis.service.web:app", "--port=5000", "--host=0.0.0.0"]
 EXPOSE 5000

From 49fd08b685cddc694a897d5d8b794e96b90799ff Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Tue, 16 Jan 2024 15:31:40 +0100
Subject: [PATCH 24/29] chg: updated documentation on the Inscriptis Web
 service.

---
 README.rst | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index 116843e..7fc6ef2 100644
--- a/README.rst
+++ b/README.rst
@@ -311,14 +311,18 @@ Currently, inscriptis supports the following postprocessors:
 Web Service
 ===========
 
-The Flask Web Service translates HTML pages to the corresponding plain text.
+A FastAPI-based Web Service that uses Inscriptis for translating HTML pages to plain text.
 
 Run the Web Service on your host system
 ---------------------------------------
-Install the additional requirement `python3-flask <https://flask.palletsprojects.com/en/2.2.x/>`_, then start the inscriptis Web service with the following command::
+Install the optional feature `web-service` for inscriptis::
+  
+  $ pip install inscriptis[web-service]
+
+Start the Inscriptis Web service with the following command::
+
+  $ uvicorn inscriptis.service.web:app --port 5000 --host 127.0.0.1
 
-  $ export FLASK_APP="inscriptis.service.web"
-  $ python3 -m flask run
 
 Run the Web Service with Docker
 -------------------------------

From 44461512aa906c3151c0dbe2af50fc5fbd093643 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Tue, 16 Jan 2024 15:42:27 +0100
Subject: [PATCH 25/29] fix: package builds.

---
 .github/workflows/python-package.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 1498480..7613ee1 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -22,9 +22,9 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install tox setuptools pytest pytest-cov codecov
+        python -m pip install tox setuptools pytest pytest-cov codecov poetry
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-        python setup.py install
+        poetry build
     - name: Lint with tox
       run: |
         tox

From a60df16810f77cefd0f7837532a6812461594b1c Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Tue, 16 Jan 2024 15:43:16 +0100
Subject: [PATCH 26/29] chg: adapted documentation to inscript.

---
 README.rst | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/README.rst b/README.rst
index 7fc6ef2..87eca1e 100644
--- a/README.rst
+++ b/README.rst
@@ -131,9 +131,9 @@ the corresponding text representation.
 Command line parameters
 -----------------------
 
-The inscript.py command line client supports the following parameters::
+The inscript command line client supports the following parameters::
 
-    usage: inscript.py [-h] [-o OUTPUT] [-e ENCODING] [-i] [-d] [-l] [-a] [-r ANNOTATION_RULES] [-p POSTPROCESSOR] [--indentation INDENTATION]
+    usage: inscript [-h] [-o OUTPUT] [-e ENCODING] [-i] [-d] [-l] [-a] [-r ANNOTATION_RULES] [-p POSTPROCESSOR] [--indentation INDENTATION]
                        [--table-cell-separator TABLE_CELL_SEPARATOR] [-v]
                        [input]
 
@@ -172,19 +172,19 @@ HTML to text conversion
 -----------------------
 convert the given page to text and output the result to the screen::
 
-  $ inscript.py https://www.fhgr.ch
+  $ inscript https://www.fhgr.ch
    
 convert the file to text and save the output to fhgr.txt::
 
-  $ inscript.py fhgr.html -o fhgr.txt
+  $ inscript fhgr.html -o fhgr.txt
 
 convert the file using strict indentation (i.e., minimize indentation and extra spaces) and save the output to fhgr-layout-optimized.txt::
 
-  $ inscript.py --indentation strict fhgr.html -o fhgr-layout-optimized.txt
+  $ inscript --indentation strict fhgr.html -o fhgr-layout-optimized.txt
    
 convert HTML provided via stdin and save the output to output.txt::
 
-  $ echo "<body><p>Make it so!</p></body>" | inscript.py -o output.txt 
+  $ echo "<body><p>Make it so!</p></body>" | inscript -o output.txt 
 
 
 HTML to annotated text conversion
@@ -193,7 +193,7 @@ convert and annotate HTML from a Web page using the provided annotation rules.
 
 Download the example `annotation-profile.json <https://github.com/weblyzard/inscriptis/blob/master/examples/annotation-profile.json>`_ and save it to your working directory::
 
-  $ inscript.py https://www.fhgr.ch -r annotation-profile.json
+  $ inscript https://www.fhgr.ch -r annotation-profile.json
 
 The annotation rules are specified in `annotation-profile.json`:
 
@@ -241,7 +241,7 @@ Annotation postprocessors enable the post processing of annotations to formats
 that are suitable for your particular application. Post processors can be
 specified with the ``-p`` or ``--postprocessor`` command line argument::
 
-  $ inscript.py https://www.fhgr.ch \
+  $ inscript https://www.fhgr.ch \
           -r ./examples/annotation-profile.json \
           -p surface
 
@@ -286,7 +286,7 @@ Currently, inscriptis supports the following postprocessors:
 
    .. code-block:: bash
 
-      inscript.py --annotation-rules ./wikipedia.json \
+      inscript --annotation-rules ./wikipedia.json \
                   --postprocessor html \
                   https://en.wikipedia.org/wiki/Chur.html
 
@@ -503,7 +503,7 @@ The following options are available for fine tuning inscriptis' HTML rendering:
 1. **More rigorous indentation:** call ``inscriptis.get_text()`` with the
    parameter ``indentation='extended'`` to also use indentation for tags such as
    ``<div>`` and ``<span>`` that do not provide indentation in their standard
-   definition. This strategy is the default in ``inscript.py`` and many other
+   definition. This strategy is the default in ``inscript`` and many other
    tools such as Lynx. If you do not want extended indentation you can use the
    parameter ``indentation='standard'`` instead.
 

From 2b6c4494de4e2ce86c36d79f14f4b110caf26b81 Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Tue, 16 Jan 2024 15:46:39 +0100
Subject: [PATCH 27/29] fix: build process.

---
 .github/workflows/python-package.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 7613ee1..4364f97 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -24,7 +24,7 @@ jobs:
         python -m pip install --upgrade pip
         python -m pip install tox setuptools pytest pytest-cov codecov poetry
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-        poetry build
+        poetry install
     - name: Lint with tox
       run: |
         tox

From 34eeb7875c2e3a508c5884b1623f80a191700bed Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Tue, 16 Jan 2024 15:53:10 +0100
Subject: [PATCH 28/29] chg: improved build process.

---
 .github/workflows/python-package.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 4364f97..bae9208 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -22,12 +22,9 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install tox setuptools pytest pytest-cov codecov poetry
+        python -m pip install tox setuptools pytest pytest-cov codecov
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         poetry install
     - name: Lint with tox
       run: |
         tox
-    - name: Test with pytest
-      run: |
-        py.test --cov=inscripits ./tests && codecov

From c0630ffb487e0972336c4c88a4b53f6800d3a41d Mon Sep 17 00:00:00 2001
From: Albert Weichselbraun <albert@weichselbraun.net>
Date: Tue, 16 Jan 2024 15:56:44 +0100
Subject: [PATCH 29/29] fix: improved build process.

---
 .github/workflows/python-package.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index bae9208..c2cb2d5 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -19,12 +19,10 @@ jobs:
       uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
+    - name: Install build environment
       run: |
         python -m pip install --upgrade pip
         python -m pip install tox setuptools pytest pytest-cov codecov
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-        poetry install
-    - name: Lint with tox
+    - name: Build and test with tox.
       run: |
         tox