diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml
index 8f45e22e6..f80c0c6eb 100644
--- a/.github/workflows/tox.yml
+++ b/.github/workflows/tox.yml
@@ -71,7 +71,7 @@ jobs:
fail-fast: false
max-parallel: 4
matrix:
- tox-env: [flake8, pep517check, checkspelling]
+ tox-env: [mypy, flake8, pep517check, checkspelling]
env:
TOXENV: ${{ matrix.tox-env }}
diff --git a/markdown/__main__.py b/markdown/__main__.py
index c323aaac4..b907108c1 100644
--- a/markdown/__main__.py
+++ b/markdown/__main__.py
@@ -24,6 +24,11 @@
import codecs
import warnings
import markdown
+import logging
+from logging import DEBUG, WARNING, CRITICAL
+from typing import Any, Callable, IO, Mapping
+
+yaml_load: Callable[[IO], Any]
try:
# We use `unsafe_load` because users may need to pass in actual Python
# objects. As this is only available from the CLI, the user has much
@@ -32,18 +37,16 @@
except ImportError: # pragma: no cover
try:
# Fall back to PyYAML <5.1
- from yaml import load as yaml_load
+ from yaml import load as yaml_load # type: ignore
except ImportError:
# Fall back to JSON
from json import load as yaml_load
-import logging
-from logging import DEBUG, WARNING, CRITICAL
logger = logging.getLogger('MARKDOWN')
-def parse_options(args=None, values=None):
+def parse_options(args=None, values=None) -> tuple[Mapping[str, Any], bool]:
"""
Define and parse `optparse` options for command-line usage.
"""
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
index d2020b9b6..798977789 100644
--- a/markdown/blockprocessors.py
+++ b/markdown/blockprocessors.py
@@ -121,7 +121,7 @@ def test(self, parent: etree.Element, block: str) -> bool:
parent: An `etree` element which will be the parent of the block.
block: A block of text from the source which has been split at blank lines.
"""
- pass # pragma: no cover
+ raise NotImplementedError() # pragma: no cover
def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
""" Run processor. Must be overridden by subclasses.
@@ -147,7 +147,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
parent: An `etree` element which is the parent of the current block.
blocks: A list of all remaining blocks of the document.
"""
- pass # pragma: no cover
+ raise NotImplementedError() # pragma: no cover
class ListIndentProcessor(BlockProcessor):
@@ -167,7 +167,7 @@ class ListIndentProcessor(BlockProcessor):
LIST_TYPES = ['ul', 'ol']
""" Types of lists this processor can operate on. """
- def __init__(self, *args):
+ def __init__(self, *args) -> None:
super().__init__(*args)
self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length)
@@ -175,7 +175,7 @@ def test(self, parent: etree.Element, block: str) -> bool:
return block.startswith(' '*self.tab_length) and \
not self.parser.state.isstate('detabbed') and \
(parent.tag in self.ITEM_TYPES or
- (len(parent) and parent[-1] is not None and
+ (len(parent) > 0 and parent[-1] is not None and
(parent[-1].tag in self.LIST_TYPES)))
def run(self, parent: etree.Element, blocks: list[str]) -> None:
@@ -417,7 +417,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> None:
def get_items(self, block: str) -> list[str]:
""" Break a block into list items. """
- items = []
+ items: list[str] = []
for line in block.split('\n'):
m = self.CHILD_RE.match(line)
if m:
@@ -426,7 +426,9 @@ def get_items(self, block: str) -> list[str]:
if not items and self.TAG == 'ol':
# Detect the integer value of first list item
INTEGER_RE = re.compile(r'(\d+)')
- self.STARTSWITH = INTEGER_RE.match(m.group(1)).group()
+ int_match = INTEGER_RE.match(m.group(1))
+ assert int_match is not None
+ self.STARTSWITH = int_match.group()
# Append to the list
items.append(m.group(3))
elif self.INDENT_RE.match(line):
diff --git a/markdown/core.py b/markdown/core.py
index 6c7a21be9..92b729119 100644
--- a/markdown/core.py
+++ b/markdown/core.py
@@ -85,7 +85,7 @@ class Markdown:
callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`.
"""
- def __init__(self, **kwargs):
+ def __init__(self, **kwargs: Any):
"""
Creates a new Markdown instance.
@@ -183,7 +183,7 @@ def registerExtensions(
'Successfully loaded extension "%s.%s".'
% (ext.__class__.__module__, ext.__class__.__name__)
)
- elif ext is not None:
+ elif ext is not None: # type: ignore[unreachable]
raise TypeError(
'Extension "{}.{}" must be of type: "{}.{}"'.format(
ext.__class__.__module__, ext.__class__.__name__,
@@ -417,11 +417,11 @@ def convertFile(
# Read the source
if input:
if isinstance(input, str):
- input_file = codecs.open(input, mode="r", encoding=encoding)
+ with codecs.open(input, mode="r", encoding=encoding) as input_file:
+ text = input_file.read()
else:
- input_file = codecs.getreader(encoding)(input)
- text = input_file.read()
- input_file.close()
+ with codecs.getreader(encoding)(input) as input_file:
+ text = input_file.read()
else:
text = sys.stdin.read()
@@ -440,13 +440,13 @@ def convertFile(
output_file.close()
else:
writer = codecs.getwriter(encoding)
- output_file = writer(output, errors="xmlcharrefreplace")
- output_file.write(html)
+ output_writer = writer(output, errors="xmlcharrefreplace")
+ output_writer.write(html)
# Don't close here. User may want to write more.
else:
# Encode manually and write bytes to stdout.
- html = html.encode(encoding, "xmlcharrefreplace")
- sys.stdout.buffer.write(html)
+ html_bytes = html.encode(encoding, "xmlcharrefreplace")
+ sys.stdout.buffer.write(html_bytes)
return self
@@ -482,7 +482,13 @@ def markdown(text: str, **kwargs: Any) -> str:
return md.convert(text)
-def markdownFromFile(**kwargs: Any):
+def markdownFromFile(
+ *,
+ input: str | BinaryIO | None = None,
+ output: str | BinaryIO | None = None,
+ encoding: str | None = None,
+ **kwargs: Any
+) -> None:
"""
Read Markdown text from a file and write output to a file or a stream.
@@ -491,13 +497,11 @@ def markdownFromFile(**kwargs: Any):
[`convert`][markdown.Markdown.convert].
Keyword arguments:
- input (str | BinaryIO): A file name or readable object.
- output (str | BinaryIO): A file name or writable object.
- encoding (str): Encoding of input and output.
+ input: A file name or readable object.
+ output: A file name or writable object.
+ encoding: Encoding of input and output.
**kwargs: Any arguments accepted by the `Markdown` class.
"""
md = Markdown(**kwargs)
- md.convertFile(kwargs.get('input', None),
- kwargs.get('output', None),
- kwargs.get('encoding', None))
+ md.convertFile(input, output, encoding)
diff --git a/markdown/extensions/__init__.py b/markdown/extensions/__init__.py
index a5ec07b27..32ba55658 100644
--- a/markdown/extensions/__init__.py
+++ b/markdown/extensions/__init__.py
@@ -53,7 +53,7 @@ class Extension:
if a default is not set for each option.
"""
- def __init__(self, **kwargs):
+ def __init__(self, **kwargs) -> None:
""" Initiate Extension and set up configs. """
self.setConfigs(kwargs)
diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py
index 738368afe..3d89fefa5 100644
--- a/markdown/extensions/abbr.py
+++ b/markdown/extensions/abbr.py
@@ -28,12 +28,16 @@
from ..util import AtomicString
import re
import xml.etree.ElementTree as etree
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
class AbbrExtension(Extension):
""" Abbreviation Extension for Python-Markdown. """
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Insert `AbbrPreprocessor` before `ReferencePreprocessor`. """
md.parser.blockprocessors.register(AbbrPreprocessor(md.parser), 'abbr', 16)
diff --git a/markdown/extensions/admonition.py b/markdown/extensions/admonition.py
index d0e97002d..9346f3c28 100644
--- a/markdown/extensions/admonition.py
+++ b/markdown/extensions/admonition.py
@@ -33,13 +33,14 @@
from typing import TYPE_CHECKING
if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
from markdown import blockparser
class AdmonitionExtension(Extension):
""" Admonition extension for Python-Markdown. """
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Add Admonition to Markdown instance. """
md.registerExtension(self)
@@ -59,7 +60,7 @@ def __init__(self, parser: blockparser.BlockParser):
super().__init__(parser)
self.current_sibling: etree.Element | None = None
- self.content_indention = 0
+ self.content_indent = 0
def parse_content(self, parent: etree.Element, block: str) -> tuple[etree.Element | None, str, str]:
"""Get sibling admonition.
@@ -74,11 +75,11 @@ def parse_content(self, parent: etree.Element, block: str) -> tuple[etree.Elemen
# We already acquired the block via test
if self.current_sibling is not None:
- sibling = self.current_sibling
+ prev_sibling = self.current_sibling
block, the_rest = self.detab(block, self.content_indent)
self.current_sibling = None
self.content_indent = 0
- return sibling, block, the_rest
+ return prev_sibling, block, the_rest
sibling = self.lastChild(parent)
@@ -147,6 +148,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> None:
p.text = title
p.set('class', self.CLASSNAME_TITLE)
else:
+ assert sibling is not None
# Sibling is a list item, but we need to wrap it's content should be wrapped in
if sibling.tag in ('li', 'dd') and sibling.text:
text = sibling.text
diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py
index 7ce3f9925..a7276e747 100644
--- a/markdown/extensions/attr_list.py
+++ b/markdown/extensions/attr_list.py
@@ -32,6 +32,7 @@
if TYPE_CHECKING: # pragma: no cover
from xml.etree.ElementTree import Element
+ from markdown import Markdown
def _handle_double_quote(s, t):
@@ -56,7 +57,7 @@ def _handle_word(s, t):
return t, t
-_scanner = re.Scanner([
+_scanner = re.Scanner([ # type: ignore[attr-defined]
(r'[^ =]+=".*?"', _handle_double_quote),
(r"[^ =]+='.*?'", _handle_single_quote),
(r'[^ =]+=[^ =]+', _handle_key_value),
@@ -86,6 +87,8 @@ class AttrListTreeprocessor(Treeprocessor):
r'\uf900-\ufdcf\ufdf0-\ufffd'
r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
+ md: Markdown
+
def run(self, doc: Element) -> None:
for elem in doc.iter():
if self.md.is_block_level(elem.tag):
@@ -102,18 +105,18 @@ def run(self, doc: Element) -> None:
if child.tag in ['ul', 'ol']:
pos = i
break
- if pos is None and elem[-1].tail:
+ if pos is None and (tail := elem[-1].tail):
# use tail of last child. no `ul` or `ol`.
- m = RE.search(elem[-1].tail)
+ m = RE.search(tail)
if m:
self.assign_attrs(elem, m.group(1))
- elem[-1].tail = elem[-1].tail[:m.start()]
- elif pos is not None and pos > 0 and elem[pos-1].tail:
+ elem[-1].tail = tail[:m.start()]
+ elif pos is not None and pos > 0 and (tail := elem[pos-1].tail):
# use tail of last child before `ul` or `ol`
- m = RE.search(elem[pos-1].tail)
+ m = RE.search(tail)
if m:
self.assign_attrs(elem, m.group(1))
- elem[pos-1].tail = elem[pos-1].tail[:m.start()]
+ elem[pos-1].tail = tail[:m.start()]
elif elem.text:
# use text. `ul` is first child.
m = RE.search(elem.text)
@@ -170,7 +173,7 @@ def sanitize_name(self, name: str) -> str:
class AttrListExtension(Extension):
""" Attribute List extension for Python-Markdown """
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
md.registerExtension(self)
diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py
index 0114908f6..a33e7be19 100644
--- a/markdown/extensions/codehilite.py
+++ b/markdown/extensions/codehilite.py
@@ -27,6 +27,7 @@
from typing import TYPE_CHECKING, Callable, Any
if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
import xml.etree.ElementTree as etree
try: # pragma: no cover
@@ -150,7 +151,7 @@ def hilite(self, shebang: bool = True) -> str:
if pygments and self.use_pygments:
try:
- lexer = get_lexer_by_name(self.lang, **self.options)
+ lexer = get_lexer_by_name(self.lang or '', **self.options)
except ValueError:
try:
if self.guess_lang:
@@ -161,7 +162,7 @@ def hilite(self, shebang: bool = True) -> str:
lexer = get_lexer_by_name('text', **self.options)
if not self.lang:
# Use the guessed lexer's language instead
- self.lang = lexer.aliases[0]
+ self.lang = lexer.aliases[0] # type: ignore[attr-defined]
lang_str = f'{self.lang_prefix}{self.lang}'
if isinstance(self.pygments_formatter, str):
try:
@@ -254,6 +255,7 @@ class HiliteTreeprocessor(Treeprocessor):
""" Highlight source code in code blocks. """
config: dict[str, Any]
+ md: Markdown
def code_unescape(self, text: str) -> str:
"""Unescape code."""
@@ -270,8 +272,10 @@ def run(self, root: etree.Element) -> None:
for block in blocks:
if len(block) == 1 and block[0].tag == 'code':
local_config = self.config.copy()
+ text = block[0].text
+ assert text is not None
code = CodeHilite(
- self.code_unescape(block[0].text),
+ self.code_unescape(text),
tab_length=self.md.tab_length,
style=local_config.pop('pygments_style', 'default'),
**local_config
@@ -288,7 +292,7 @@ def run(self, root: etree.Element) -> None:
class CodeHiliteExtension(Extension):
""" Add source code highlighting to markdown code blocks. """
- def __init__(self, **kwargs):
+ def __init__(self, **kwargs) -> None:
# define default configs
self.config = {
'linenums': [
@@ -331,7 +335,7 @@ def __init__(self, **kwargs):
pass # Assume it's not a boolean value. Use as-is.
self.config[key] = [value, '']
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Add `HilitePostprocessor` to Markdown instance. """
hiliter = HiliteTreeprocessor(md)
hiliter.config = self.getConfigs()
diff --git a/markdown/extensions/def_list.py b/markdown/extensions/def_list.py
index 5324bf193..8fb5c3db8 100644
--- a/markdown/extensions/def_list.py
+++ b/markdown/extensions/def_list.py
@@ -25,6 +25,10 @@
from ..blockprocessors import BlockProcessor, ListIndentProcessor
import xml.etree.ElementTree as etree
import re
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
class DefListProcessor(BlockProcessor):
@@ -40,6 +44,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
raw_block = blocks.pop(0)
m = self.RE.search(raw_block)
+ assert m is not None
terms = [term.strip() for term in
raw_block[:m.start()].split('\n') if term.strip()]
block = raw_block[m.end():]
@@ -53,20 +58,21 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
else:
d = m.group(2)
sibling = self.lastChild(parent)
- if not terms and sibling is None:
- # This is not a definition item. Most likely a paragraph that
- # starts with a colon at the beginning of a document or list.
- blocks.insert(0, raw_block)
- return False
- if not terms and sibling.tag == 'p':
- # The previous paragraph contains the terms
- state = 'looselist'
- terms = sibling.text.split('\n')
- parent.remove(sibling)
- # Acquire new sibling
- sibling = self.lastChild(parent)
- else:
- state = 'list'
+ state = 'list'
+ if not terms:
+ if sibling is None:
+ # This is not a definition item. Most likely a paragraph that
+ # starts with a colon at the beginning of a document or list.
+ blocks.insert(0, raw_block)
+ return False
+ if sibling.tag == 'p':
+ # The previous paragraph contains the terms
+ state = 'looselist'
+ assert sibling.text is not None
+ terms = sibling.text.split('\n')
+ parent.remove(sibling)
+ # Acquire new sibling
+ sibling = self.lastChild(parent)
if sibling is not None and sibling.tag == 'dl':
# This is another item on an existing list
@@ -88,6 +94,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
if theRest:
blocks.insert(0, theRest)
+ return None
class DefListIndentProcessor(ListIndentProcessor):
@@ -99,7 +106,7 @@ class DefListIndentProcessor(ListIndentProcessor):
LIST_TYPES = ['dl', 'ol', 'ul']
""" Include `dl` is list types. """
- def create_item(self, parent: etree.Element, block: str) -> None:
+ def create_item(self, parent: etree.Element, block: str):
""" Create a new `dd` or `li` (depending on parent) and parse the block with it as the parent. """
dd = etree.SubElement(parent, 'dd')
@@ -109,7 +116,7 @@ def create_item(self, parent: etree.Element, block: str) -> None:
class DefListExtension(Extension):
""" Add definition lists to Markdown. """
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Add an instance of `DefListProcessor` to `BlockParser`. """
md.parser.blockprocessors.register(DefListIndentProcessor(md.parser), 'defindent', 85)
md.parser.blockprocessors.register(DefListProcessor(md.parser), 'deflist', 25)
diff --git a/markdown/extensions/extra.py b/markdown/extensions/extra.py
index 74ebc192c..d33a09697 100644
--- a/markdown/extensions/extra.py
+++ b/markdown/extensions/extra.py
@@ -53,7 +53,7 @@
class ExtraExtension(Extension):
""" Add various extensions to Markdown class."""
- def __init__(self, **kwargs):
+ def __init__(self, **kwargs) -> None:
""" `config` is a dumb holder which gets passed to the actual extension later. """
self.config = kwargs
diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py
index da1a9be1e..7cd33c85b 100644
--- a/markdown/extensions/fenced_code.py
+++ b/markdown/extensions/fenced_code.py
@@ -36,14 +36,14 @@
class FencedCodeExtension(Extension):
- def __init__(self, **kwargs):
+ def __init__(self, **kwargs) -> None:
self.config = {
'lang_prefix': ['language-', 'Prefix prepended to the language. Default: "language-"']
}
""" Default configuration options. """
super().__init__(**kwargs)
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Add `FencedBlockPreprocessor` to the Markdown instance. """
md.registerExtension(self)
@@ -53,6 +53,8 @@ def extendMarkdown(self, md):
class FencedBlockPreprocessor(Preprocessor):
""" Find and extract fenced code blocks. """
+ md: Markdown
+
FENCED_BLOCK_RE = re.compile(
dedent(r'''
(?P^(?:~{3,}|`{3,}))[ ]* # opening fence
@@ -97,12 +99,13 @@ def run(self, lines: list[str]) -> list[str]:
while 1:
m = self.FENCED_BLOCK_RE.search(text)
if m:
- lang, id, classes, config = None, '', [], {}
+ lang = None
if m.group('attrs'):
id, classes, config = self.handle_attrs(get_attrs(m.group('attrs')))
if len(classes):
lang = classes.pop(0)
else:
+ id, classes, config = '', [], {}
if m.group('lang'):
lang = m.group('lang')
if m.group('hl_lines'):
@@ -159,7 +162,7 @@ def handle_attrs(self, attrs: Iterable[tuple[str, str]]) -> tuple[str, list[str]
""" Return tuple: `(id, [list, of, classes], {configs})` """
id = ''
classes = []
- configs = {}
+ configs: dict[str, Any] = {}
for k, v in attrs:
if k == 'id':
id = v
diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py
index 30c081138..ac35cf484 100644
--- a/markdown/extensions/footnotes.py
+++ b/markdown/extensions/footnotes.py
@@ -29,6 +29,10 @@
import re
import copy
import xml.etree.ElementTree as etree
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX
NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX
@@ -38,7 +42,7 @@
class FootnoteExtension(Extension):
""" Footnote Extension. """
- def __init__(self, **kwargs):
+ def __init__(self, **kwargs) -> None:
""" Setup configs. """
self.config = {
@@ -73,7 +77,7 @@ def __init__(self, **kwargs):
self.reset()
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Add pieces to Markdown. """
md.registerExtension(self)
self.parser = md.parser
@@ -130,7 +134,7 @@ def findFootnotesPlaceholder(
self, root: etree.Element
) -> tuple[etree.Element, etree.Element, bool] | None:
""" Return ElementTree Element that contains Footnote placeholder. """
- def finder(element):
+ def finder(element: etree.Element) -> tuple[etree.Element, etree.Element, bool] | None:
for child in element:
if child.text:
if child.text.find(self.getConfig("PLACE_MARKER")) > -1:
@@ -205,6 +209,7 @@ def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None:
if len(li):
node = li[-1]
if node.tag == "p":
+ assert node.text is not None
node.text = node.text + NBSP_PLACEHOLDER
node.append(backlink)
else:
@@ -290,7 +295,7 @@ def detectTabbed(self, blocks: list[str]) -> list[str]:
break
return fn_blocks
- def detab(self, block: str) -> str:
+ def detab(self, block: str) -> str: # type: ignore[override]
""" Remove one level of indent from a block.
Preserve lazily indented blocks by only removing indent from indented lines.
diff --git a/markdown/extensions/legacy_attrs.py b/markdown/extensions/legacy_attrs.py
index 6641e6ea6..3f1c7a6dc 100644
--- a/markdown/extensions/legacy_attrs.py
+++ b/markdown/extensions/legacy_attrs.py
@@ -37,6 +37,7 @@
if TYPE_CHECKING: # pragma: no cover
import xml.etree.ElementTree as etree
+ from markdown import Markdown
ATTR_RE = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123}
@@ -56,13 +57,14 @@ def run(self, doc: etree.Element) -> None:
def handleAttributes(self, el: etree.Element, txt: str) -> str:
""" Set attributes and return text without definitions. """
- def attributeCallback(match: re.Match[str]):
+ def attributeCallback(match: re.Match[str]) -> str:
el.set(match.group(1), match.group(2).replace('\n', ' '))
+ return ''
return ATTR_RE.sub(attributeCallback, txt)
class LegacyAttrExtension(Extension):
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Add `LegacyAttrs` to Markdown instance. """
md.treeprocessors.register(LegacyAttrs(md), 'legacyattrs', 15)
diff --git a/markdown/extensions/legacy_em.py b/markdown/extensions/legacy_em.py
index a6f67b7ef..6fbff4a06 100644
--- a/markdown/extensions/legacy_em.py
+++ b/markdown/extensions/legacy_em.py
@@ -16,6 +16,10 @@
from . import Extension
from ..inlinepatterns import UnderscoreProcessor, EmStrongItem, EM_STRONG2_RE, STRONG_EM2_RE
import re
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
# _emphasis_
EMPHASIS_RE = r'(_)([^_]+)\1'
@@ -42,7 +46,7 @@ class LegacyUnderscoreProcessor(UnderscoreProcessor):
class LegacyEmExtension(Extension):
""" Add legacy_em extension to Markdown class."""
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Modify inline patterns. """
md.inlinePatterns.register(LegacyUnderscoreProcessor(r'_'), 'em_strong2', 50)
diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py
index 64b84a5f4..ebcea86c2 100644
--- a/markdown/extensions/md_in_html.py
+++ b/markdown/extensions/md_in_html.py
@@ -28,7 +28,7 @@
from .. import util
from ..htmlparser import HTMLExtractor, blank_line_re
import xml.etree.ElementTree as etree
-from typing import TYPE_CHECKING, Literal, Mapping
+from typing import TYPE_CHECKING, Literal, Mapping, Sequence
if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
@@ -56,14 +56,14 @@ def __init__(self, md: Markdown, *args, **kwargs):
self.block_tags = set(self.block_level_tags) - (self.span_tags | self.raw_tags | self.empty_tags)
self.span_and_blocks_tags = self.block_tags | self.span_tags
- def reset(self):
+ def reset(self) -> None:
"""Reset this instance. Loses all unprocessed data."""
self.mdstack: list[str] = [] # When markdown=1, stack contains a list of tags
self.treebuilder = etree.TreeBuilder()
self.mdstate: list[Literal['block', 'span', 'off', None]] = []
super().reset()
- def close(self):
+ def close(self) -> None:
"""Handle any buffered data."""
super().close()
# Handle any unclosed tags.
@@ -98,13 +98,13 @@ def get_state(self, tag, attrs: Mapping[str, str]) -> Literal['block', 'span', '
else: # pragma: no cover
return None
- def handle_starttag(self, tag, attrs):
+ def handle_starttag(self, tag: str, attrs: Sequence[tuple[str, str]]) -> None:
# Handle tags that should always be empty and do not specify a closing tag
if tag in self.empty_tags and (self.at_line_start() or self.intail):
- attrs = {key: value if value is not None else key for key, value in attrs}
- if "markdown" in attrs:
- attrs.pop('markdown')
- element = etree.Element(tag, attrs)
+ attrs_dict = {key: value if value is not None else key for key, value in attrs}
+ if "markdown" in attrs_dict:
+ attrs_dict.pop('markdown')
+ element = etree.Element(tag, attrs_dict)
data = etree.tostring(element, encoding='unicode', method='html')
else:
data = self.get_starttag_text()
@@ -114,20 +114,20 @@ def handle_starttag(self, tag, attrs):
if tag in self.block_level_tags and (self.at_line_start() or self.intail):
# Valueless attribute (ex: ``) results in `[('checked', None)]`.
# Convert to `{'checked': 'checked'}`.
- attrs = {key: value if value is not None else key for key, value in attrs}
- state = self.get_state(tag, attrs)
- if self.inraw or (state in [None, 'off'] and not self.mdstack):
+ attrs_dict = {key: value if value is not None else key for key, value in attrs}
+ state = self.get_state(tag, attrs_dict)
+ if self.inraw or ((state is None or state == 'off') and not self.mdstack):
# fall back to default behavior
- attrs.pop('markdown', None)
- super().handle_starttag(tag, attrs)
+ attrs_dict.pop('markdown', None)
+ super().handle_starttag(tag, attrs_dict) # type: ignore[arg-type]
else:
if 'p' in self.mdstack and tag in self.block_level_tags:
# Close unclosed 'p' tag
self.handle_endtag('p')
self.mdstate.append(state)
self.mdstack.append(tag)
- attrs['markdown'] = state
- self.treebuilder.start(tag, attrs)
+ attrs_dict['markdown'] = state # type: ignore[assignment]
+ self.treebuilder.start(tag, attrs_dict)
else:
# Span level tag
if self.inraw:
@@ -142,7 +142,7 @@ def handle_starttag(self, tag, attrs):
# This is presumably a standalone tag in a code span (see #1036).
self.clear_cdata_mode()
- def handle_endtag(self, tag):
+ def handle_endtag(self, tag: str) -> None:
if tag in self.block_level_tags:
if self.inraw:
super().handle_endtag(tag)
@@ -165,7 +165,7 @@ def handle_endtag(self, tag):
self.cleandoc.append('\n')
self.cleandoc.append(self.md.htmlStash.store(element))
self.cleandoc.append('\n\n')
- self.state = []
+ self.state: list = []
# Check if element has a tail
if not blank_line_re.match(
self.rawdata[self.line_offset + self.offset + len(self.get_endtag_text(tag)):]):
@@ -189,12 +189,12 @@ def handle_endtag(self, tag):
else:
self.handle_data(text)
- def handle_startendtag(self, tag, attrs):
+ def handle_startendtag(self, tag: str, attrs: Sequence[tuple[str, str]]) -> None:
if tag in self.empty_tags:
- attrs = {key: value if value is not None else key for key, value in attrs}
- if "markdown" in attrs:
- attrs.pop('markdown')
- element = etree.Element(tag, attrs)
+ attrs_dict = {key: value if value is not None else key for key, value in attrs}
+ if "markdown" in attrs_dict:
+ attrs_dict.pop('markdown')
+ element = etree.Element(tag, attrs_dict)
data = etree.tostring(element, encoding='unicode', method='html')
else:
data = self.get_starttag_text()
@@ -202,7 +202,7 @@ def handle_startendtag(self, tag, attrs):
data = self.get_starttag_text()
self.handle_empty_tag(data, is_block=self.md.is_block_level(tag))
- def handle_data(self, data):
+ def handle_data(self, data: str) -> None:
if self.intail and '\n' in data:
self.intail = False
if self.inraw or not self.mdstack:
@@ -210,7 +210,7 @@ def handle_data(self, data):
else:
self.treebuilder.data(data)
- def handle_empty_tag(self, data, is_block):
+ def handle_empty_tag(self, data: str, is_block: bool) -> None:
if self.inraw or not self.mdstack:
super().handle_empty_tag(data, is_block)
else:
@@ -243,6 +243,8 @@ def parse_html_declaration(self, i: int) -> int:
class HtmlBlockPreprocessor(Preprocessor):
"""Remove html blocks from the text and store them for later retrieval."""
+ md: Markdown
+
def run(self, lines: list[str]) -> list[str]:
source = '\n'.join(lines)
parser = HTMLExtractorExtra(self.md)
@@ -359,7 +361,7 @@ def stash_to_string(self, text: str | etree.Element) -> str:
class MarkdownInHtmlExtension(Extension):
"""Add Markdown parsing in HTML to Markdown class."""
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Register extension instances. """
# Replace raw HTML preprocessor
diff --git a/markdown/extensions/meta.py b/markdown/extensions/meta.py
index cb703399b..e43698c28 100644
--- a/markdown/extensions/meta.py
+++ b/markdown/extensions/meta.py
@@ -25,7 +25,10 @@
from ..preprocessors import Preprocessor
import re
import logging
-from typing import Any
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
log = logging.getLogger('MARKDOWN')
@@ -39,19 +42,21 @@
class MetaExtension (Extension):
""" Meta-Data extension for Python-Markdown. """
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Add `MetaPreprocessor` to Markdown instance. """
md.registerExtension(self)
self.md = md
md.preprocessors.register(MetaPreprocessor(md), 'meta', 27)
def reset(self) -> None:
- self.md.Meta = {}
+ self.md.Meta = {} # type: ignore[attr-defined]
class MetaPreprocessor(Preprocessor):
""" Get Meta-Data. """
+ md: Markdown
+
def run(self, lines: list[str]) -> list[str]:
""" Parse Meta-Data and store in Markdown.Meta. """
meta: dict[str, Any] = {}
@@ -78,7 +83,7 @@ def run(self, lines: list[str]) -> list[str]:
else:
lines.insert(0, line)
break # no meta data - done
- self.md.Meta = meta
+ self.md.Meta = meta # type: ignore[attr-defined]
return lines
diff --git a/markdown/extensions/nl2br.py b/markdown/extensions/nl2br.py
index 177df1ee4..eb715defc 100644
--- a/markdown/extensions/nl2br.py
+++ b/markdown/extensions/nl2br.py
@@ -25,13 +25,17 @@
from . import Extension
from ..inlinepatterns import SubstituteTagInlineProcessor
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
BR_RE = r'\n'
class Nl2BrExtension(Extension):
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Add a `SubstituteTagInlineProcessor` to Markdown. """
br_tag = SubstituteTagInlineProcessor(BR_RE, 'br')
md.inlinePatterns.register(br_tag, 'nl', 5)
diff --git a/markdown/extensions/sane_lists.py b/markdown/extensions/sane_lists.py
index be421f943..cf2b5ccc7 100644
--- a/markdown/extensions/sane_lists.py
+++ b/markdown/extensions/sane_lists.py
@@ -27,6 +27,7 @@
from typing import TYPE_CHECKING
if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
from .. import blockparser
@@ -59,7 +60,7 @@ def __init__(self, parser: blockparser.BlockParser):
class SaneListExtension(Extension):
""" Add sane lists to Markdown. """
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Override existing Processors. """
md.parser.blockprocessors.register(SaneOListProcessor(md.parser), 'olist', 40)
md.parser.blockprocessors.register(SaneUListProcessor(md.parser), 'ulist', 30)
diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py
index 0ce7772a7..14abb8422 100644
--- a/markdown/extensions/smarty.py
+++ b/markdown/extensions/smarty.py
@@ -179,7 +179,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]:
class SmartyExtension(Extension):
""" Add Smarty to Markdown. """
- def __init__(self, **kwargs):
+ def __init__(self, **kwargs) -> None:
self.config = {
'smart_quotes': [True, 'Educate quotes'],
'smart_angled_quotes': [False, 'Educate angled quotes'],
@@ -198,10 +198,9 @@ def _addPatterns(
patterns: Sequence[tuple[str, Sequence[int | str | etree.Element]]],
serie: str,
priority: int,
- ):
- for ind, pattern in enumerate(patterns):
- pattern += (md,)
- pattern = SubstituteTextPattern(*pattern)
+ ) -> None:
+ for ind, pattern_parts in enumerate(patterns):
+ pattern = SubstituteTextPattern(*pattern_parts, md)
name = 'smarty-%s-%d' % (serie, ind)
self.inlinePatterns.register(pattern, name, priority-ind)
@@ -253,7 +252,7 @@ def educateQuotes(self, md: Markdown) -> None:
)
self._addPatterns(md, patterns, 'quotes', 30)
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
configs = self.getConfigs()
self.inlinePatterns: Registry[inlinepatterns.InlineProcessor] = Registry()
if configs['smart_ellipses']:
diff --git a/markdown/extensions/tables.py b/markdown/extensions/tables.py
index 6e2fa1742..3220c7f76 100644
--- a/markdown/extensions/tables.py
+++ b/markdown/extensions/tables.py
@@ -28,6 +28,7 @@
from typing import TYPE_CHECKING, Any, Sequence
if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
from .. import blockparser
PIPE_NONE = 0
@@ -228,7 +229,7 @@ def _split(self, row: str) -> list[str]:
class TableExtension(Extension):
""" Add tables to Markdown. """
- def __init__(self, **kwargs):
+ def __init__(self, **kwargs) -> None:
self.config = {
'use_align_attribute': [False, 'True to use align attribute instead of style.'],
}
@@ -236,7 +237,7 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Add an instance of `TableProcessor` to `BlockParser`. """
if '|' not in md.ESCAPED_CHARS:
md.ESCAPED_CHARS.append('|')
diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py
index a17d7241c..41838287e 100644
--- a/markdown/extensions/toc.py
+++ b/markdown/extensions/toc.py
@@ -27,10 +27,11 @@
import html
import unicodedata
import xml.etree.ElementTree as etree
-from typing import TYPE_CHECKING, Any, Iterator, MutableSet
+from typing import TYPE_CHECKING, Any, Iterator, MutableSet, TypedDict
if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
+ from .. import treeprocessors
def slugify(value: str, separator: str, unicode: bool = False) -> str:
@@ -83,6 +84,7 @@ def _html_sub(m: re.Match[str]) -> str:
raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))]
except (IndexError, TypeError): # pragma: no cover
return m.group(0)
+ assert isinstance(raw, str)
# Strip out tags and/or entities - leaving text
res = re.sub(r'(<[^>]+>)', '', raw)
if strip_entities:
@@ -98,7 +100,17 @@ def unescape(text: str) -> str:
return c.unescape(text)
-def nest_toc_tokens(toc_list):
+if TYPE_CHECKING:
+ class FlatTocToken(TypedDict):
+ level: int
+ id: str
+ name: str
+
+ class TocToken(FlatTocToken):
+ children: list[TocToken]
+
+
+def nest_toc_tokens(toc_list: list[FlatTocToken]) -> list[TocToken]:
"""Given an unsorted list with errors and skips, return a nested one.
[{'level': 1}, {'level': 2}]
@@ -115,15 +127,15 @@ def nest_toc_tokens(toc_list):
ordered_list = []
if len(toc_list):
# Initialize everything by processing the first entry
- last = toc_list.pop(0)
+ last: TocToken = toc_list.pop(0) # type: ignore[assignment]
last['children'] = []
levels = [last['level']]
ordered_list.append(last)
- parents = []
+ parents: list[TocToken] = []
# Walk the rest nesting the entries properly
while toc_list:
- t = toc_list.pop(0)
+ t: TocToken = toc_list.pop(0) # type: ignore[assignment]
current_level = t['level']
t['children'] = []
@@ -166,6 +178,8 @@ def nest_toc_tokens(toc_list):
class TocTreeprocessor(Treeprocessor):
""" Step through document and build TOC. """
+ md: Markdown
+
def __init__(self, md: Markdown, config: dict[str, Any]):
super().__init__(md)
@@ -178,9 +192,11 @@ def __init__(self, md: Markdown, config: dict[str, Any]):
self.title_class: str = config["title_class"]
self.use_anchors: bool = parseBoolValue(config["anchorlink"])
self.anchorlink_class: str = config["anchorlink_class"]
- self.use_permalinks = parseBoolValue(config["permalink"], False)
- if self.use_permalinks is None:
+ use_permalinks = parseBoolValue(config["permalink"], False)
+ if use_permalinks is None:
self.use_permalinks = config["permalink"]
+ else:
+ self.use_permalinks = use_permalinks
self.permalink_class: str = config["permalink_class"]
self.permalink_title: str = config["permalink_title"]
self.permalink_leading: bool | None = parseBoolValue(config["permalink_leading"], False)
@@ -258,7 +274,7 @@ def add_permalink(self, c: etree.Element, elem_id: str) -> None:
else:
c.append(permalink)
- def build_toc_div(self, toc_list: list) -> etree.Element:
+ def build_toc_div(self, toc_list: list[TocToken]) -> etree.Element:
""" Return a string div given a toc list. """
div = etree.Element("div")
div.attrib["class"] = self.toc_class
@@ -270,7 +286,7 @@ def build_toc_div(self, toc_list: list) -> etree.Element:
header.attrib["class"] = self.title_class
header.text = self.title
- def build_etree_ul(toc_list: list, parent: etree.Element) -> etree.Element:
+ def build_etree_ul(toc_list: list[TocToken], parent: etree.Element) -> etree.Element:
ul = etree.SubElement(parent, "ul")
for item in toc_list:
# List item link, to be inserted into the toc div
@@ -285,7 +301,9 @@ def build_etree_ul(toc_list: list, parent: etree.Element) -> etree.Element:
build_etree_ul(toc_list, div)
if 'prettify' in self.md.treeprocessors:
- self.md.treeprocessors['prettify'].run(div)
+ prettify_processor: treeprocessors.PrettifyTreeprocessor
+ prettify_processor = self.md.treeprocessors['prettify'] # type: ignore[assignment]
+ prettify_processor.run(div)
return div
@@ -296,7 +314,7 @@ def run(self, doc: etree.Element) -> None:
if "id" in el.attrib:
used_ids.add(el.attrib["id"])
- toc_tokens = []
+ flat_toc_tokens: list[FlatTocToken] = []
for el in doc.iter():
if isinstance(el.tag, str) and self.header_rgx.match(el.tag):
self.set_level(el)
@@ -308,7 +326,7 @@ def run(self, doc: etree.Element) -> None:
el.attrib["id"] = unique(self.slugify(innertext, self.sep), used_ids)
if int(el.tag[-1]) >= self.toc_top and int(el.tag[-1]) <= self.toc_bottom:
- toc_tokens.append({
+ flat_toc_tokens.append({
'level': int(el.tag[-1]),
'id': el.attrib["id"],
'name': unescape(stashedHTML2text(
@@ -326,7 +344,7 @@ def run(self, doc: etree.Element) -> None:
if self.use_permalinks not in [False, None]:
self.add_permalink(el, el.attrib["id"])
- toc_tokens = nest_toc_tokens(toc_tokens)
+ toc_tokens = nest_toc_tokens(flat_toc_tokens)
div = self.build_toc_div(toc_tokens)
if self.marker:
self.replace_marker(doc, div)
@@ -335,15 +353,15 @@ def run(self, doc: etree.Element) -> None:
toc = self.md.serializer(div)
for pp in self.md.postprocessors:
toc = pp.run(toc)
- self.md.toc_tokens = toc_tokens
- self.md.toc = toc
+ self.md.toc_tokens = toc_tokens # type: ignore[attr-defined]
+ self.md.toc = toc # type: ignore[attr-defined]
class TocExtension(Extension):
TreeProcessorClass = TocTreeprocessor
- def __init__(self, **kwargs):
+ def __init__(self, **kwargs) -> None:
self.config = {
'marker': [
'[TOC]',
@@ -395,7 +413,7 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
""" Add TOC tree processor to Markdown. """
md.registerExtension(self)
self.md = md
@@ -404,8 +422,8 @@ def extendMarkdown(self, md):
md.treeprocessors.register(tocext, 'toc', 5)
def reset(self) -> None:
- self.md.toc = ''
- self.md.toc_tokens = []
+ self.md.toc = '' # type: ignore[attr-defined]
+ self.md.toc_tokens = [] # type: ignore[attr-defined]
def makeExtension(**kwargs): # pragma: no cover
diff --git a/markdown/extensions/wikilinks.py b/markdown/extensions/wikilinks.py
index 3f3cbe2dd..ed8e7335b 100644
--- a/markdown/extensions/wikilinks.py
+++ b/markdown/extensions/wikilinks.py
@@ -25,7 +25,10 @@
from ..inlinepatterns import InlineProcessor
import xml.etree.ElementTree as etree
import re
-from typing import Any
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import Markdown
def build_url(label: str, base: str, end: str) -> str:
@@ -37,7 +40,7 @@ def build_url(label: str, base: str, end: str) -> str:
class WikiLinkExtension(Extension):
""" Add inline processor to Markdown. """
- def __init__(self, **kwargs):
+ def __init__(self, **kwargs) -> None:
self.config = {
'base_url': ['/', 'String to append to beginning or URL.'],
'end_url': ['/', 'String to append to end of URL.'],
@@ -47,7 +50,7 @@ def __init__(self, **kwargs):
""" Default configuration options. """
super().__init__(**kwargs)
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md: Markdown) -> None:
self.md = md
# append to end of inline patterns
@@ -60,11 +63,14 @@ def extendMarkdown(self, md):
class WikiLinksInlineProcessor(InlineProcessor):
""" Build link from `wikilink`. """
+ md: Markdown
+
def __init__(self, pattern: str, config: dict[str, Any]):
super().__init__(pattern)
self.config = config
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str, int, int]:
+ a: etree.Element | str
if m.group(1).strip():
base_url, end_url, html_class = self._getMeta()
label = m.group(1).strip()
diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
index 5155ef69d..faef4daac 100644
--- a/markdown/htmlparser.py
+++ b/markdown/htmlparser.py
@@ -28,7 +28,7 @@
import re
import importlib.util
import sys
-from typing import TYPE_CHECKING, Sequence
+from typing import TYPE_CHECKING, Any, Sequence
if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
@@ -37,7 +37,9 @@
# Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
# Users can still do `from html import parser` and get the default behavior.
spec = importlib.util.find_spec('html.parser')
-htmlparser = importlib.util.module_from_spec(spec)
+assert spec is not None
+htmlparser: Any = importlib.util.module_from_spec(spec)
+assert spec.loader is not None
spec.loader.exec_module(htmlparser)
sys.modules['htmlparser'] = htmlparser
@@ -80,6 +82,8 @@ class HTMLExtractor(htmlparser.HTMLParser):
is stored in `cleandoc` as a list of strings.
"""
+ md: Markdown
+
def __init__(self, md: Markdown, *args, **kwargs):
if 'convert_charrefs' not in kwargs:
kwargs['convert_charrefs'] = False
@@ -93,7 +97,7 @@ def __init__(self, md: Markdown, *args, **kwargs):
super().__init__(*args, **kwargs)
self.md = md
- def reset(self):
+ def reset(self) -> None:
"""Reset this instance. Loses all unprocessed data."""
self.inraw = False
self.intail = False
@@ -104,7 +108,7 @@ def reset(self):
super().reset()
- def close(self):
+ def close(self) -> None:
"""Handle any buffered data."""
super().close()
if len(self.rawdata):
@@ -160,7 +164,7 @@ def get_endtag_text(self, tag: str) -> str:
# Failed to extract from raw data. Assume well formed and lowercase.
return '{}>'.format(tag)
- def handle_starttag(self, tag: str, attrs: Sequence[tuple[str, str]]):
+ def handle_starttag(self, tag: str, attrs: Sequence[tuple[str, str]]) -> None:
# Handle tags that should always be empty and do not specify a closing tag
if tag in self.empty_tags:
self.handle_startendtag(tag, attrs)
@@ -181,7 +185,7 @@ def handle_starttag(self, tag: str, attrs: Sequence[tuple[str, str]]):
# This is presumably a standalone tag in a code span (see #1036).
self.clear_cdata_mode()
- def handle_endtag(self, tag: str):
+ def handle_endtag(self, tag: str) -> None:
text = self.get_endtag_text(tag)
if self.inraw:
@@ -208,7 +212,7 @@ def handle_endtag(self, tag: str):
else:
self.cleandoc.append(text)
- def handle_data(self, data: str):
+ def handle_data(self, data: str) -> None:
if self.intail and '\n' in data:
self.intail = False
if self.inraw:
@@ -216,7 +220,7 @@ def handle_data(self, data: str):
else:
self.cleandoc.append(data)
- def handle_empty_tag(self, data: str, is_block: bool):
+ def handle_empty_tag(self, data: str, is_block: bool) -> None:
""" Handle empty tags (``). """
if self.inraw or self.intail:
# Append this to the existing raw block
@@ -239,25 +243,25 @@ def handle_empty_tag(self, data: str, is_block: bool):
else:
self.cleandoc.append(data)
- def handle_startendtag(self, tag: str, attrs):
+ def handle_startendtag(self, tag: str, attrs: Sequence[tuple[str, str]]) -> None:
self.handle_empty_tag(self.get_starttag_text(), is_block=self.md.is_block_level(tag))
- def handle_charref(self, name: str):
+ def handle_charref(self, name: str) -> None:
self.handle_empty_tag('{};'.format(name), is_block=False)
- def handle_entityref(self, name: str):
+ def handle_entityref(self, name: str) -> None:
self.handle_empty_tag('&{};'.format(name), is_block=False)
- def handle_comment(self, data: str):
+ def handle_comment(self, data: str) -> None:
self.handle_empty_tag(''.format(data), is_block=True)
- def handle_decl(self, data: str):
+ def handle_decl(self, data: str) -> None:
self.handle_empty_tag(''.format(data), is_block=True)
- def handle_pi(self, data: str):
+ def handle_pi(self, data: str) -> None:
self.handle_empty_tag('{}?>'.format(data), is_block=True)
- def unknown_decl(self, data: str):
+ def unknown_decl(self, data: str) -> None:
end = ']]>' if data.startswith('CDATA[') else ']>'
self.handle_empty_tag(' int:
def get_starttag_text(self) -> str:
"""Return full source of start tag: `<...>`."""
+ assert self.__starttag_text is not None
return self.__starttag_text
def parse_starttag(self, i: int) -> int: # pragma: no cover
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
index ef6f0fbc6..4becb2988 100644
--- a/markdown/inlinepatterns.py
+++ b/markdown/inlinepatterns.py
@@ -44,13 +44,11 @@
from typing import TYPE_CHECKING, Any, Collection, NamedTuple
import re
import xml.etree.ElementTree as etree
-try: # pragma: no cover
- from html import entities
-except ImportError: # pragma: no cover
- import htmlentitydefs as entities
+from html import entities
if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
+ from . import treeprocessors
def build_inlinepatterns(md: Markdown, **kwargs: Any) -> util.Registry[InlineProcessor]:
@@ -72,7 +70,7 @@ def build_inlinepatterns(md: Markdown, **kwargs: Any) -> util.Registry[InlinePro
* finally we apply strong, emphasis, etc.
"""
- inlinePatterns = util.Registry()
+ inlinePatterns: util.Registry[InlineProcessor] = util.Registry()
inlinePatterns.register(BacktickInlineProcessor(BACKTICK_RE), 'backtick', 190)
inlinePatterns.register(EscapeInlineProcessor(ESCAPE_RE, md), 'escape', 180)
inlinePatterns.register(ReferenceInlineProcessor(REFERENCE_RE, md), 'reference', 170)
@@ -191,7 +189,7 @@ class EmStrongItem(NamedTuple):
# -----------------------------------------------------------------------------
-class Pattern: # pragma: no cover
+class _BasePattern:
"""
Base class that inline patterns subclass.
@@ -241,42 +239,54 @@ def getCompiledRegExp(self) -> re.Pattern:
""" Return a compiled regular expression. """
return self.compiled_re
- def handleMatch(self, m: re.Match[str]) -> etree.Element | str:
- """Return a ElementTree element from the given match.
-
- Subclasses should override this method.
-
- Arguments:
- m: A match object containing a match of the pattern.
-
- Returns: An ElementTree Element object.
-
- """
- pass # pragma: no cover
-
def type(self) -> str:
""" Return class name, to define pattern type """
return self.__class__.__name__
def unescape(self, text: str) -> str:
""" Return unescaped text given text with an inline placeholder. """
+ assert self.md is not None
try:
- stash = self.md.treeprocessors['inline'].stashed_nodes
+ inlineprocessor: treeprocessors.InlineProcessor
+ inlineprocessor = self.md.treeprocessors['inline'] # type: ignore[assignment]
+ stash = inlineprocessor.stashed_nodes
except KeyError: # pragma: no cover
return text
- def get_stash(m):
+ def get_stash(m: re.Match[str]) -> str:
id = m.group(1)
if id in stash:
- value = stash.get(id)
+ value = stash[id]
if isinstance(value, str):
return value
else:
# An `etree` Element - return text content only
return ''.join(value.itertext())
+ return ''
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
+class LegacyPattern(_BasePattern):
+ def handleMatch(self, m: re.Match[str]) -> etree.Element | str:
+ """Return a ElementTree element from the given match.
+
+ Subclasses should override this method.
+
+ Arguments:
+ m: A match object containing a match of the pattern.
+
+ Returns: An ElementTree Element object.
+
+ """
+ raise NotImplementedError() # pragma: no cover
+
+
+if TYPE_CHECKING: # pragma: no cover
+ Pattern = _BasePattern
+else:
+ Pattern = LegacyPattern
+
+
class InlineProcessor(Pattern):
"""
Base class that inline processors subclass.
@@ -322,7 +332,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str
end: The end of the region that has been matched or None.
"""
- pass # pragma: no cover
+ raise NotImplementedError() # pragma: no cover
class SimpleTextPattern(Pattern): # pragma: no cover
@@ -342,6 +352,8 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]:
class EscapeInlineProcessor(InlineProcessor):
""" Return an escaped character. """
+ md: Markdown
+
def handleMatch(self, m: re.Match[str], data: str) -> tuple[str | None, int, int]:
"""
If the character matched by `group(1)` of a pattern is in [`ESCAPED_CHARS`][markdown.Markdown.ESCAPED_CHARS]
@@ -499,6 +511,9 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int,
class HtmlInlineProcessor(InlineProcessor):
""" Store raw inline html and return a placeholder. """
+
+ md: Markdown
+
def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]:
""" Store the text of `group(1)` of a pattern and return a placeholder string. """
rawhtml = self.backslash_unescape(self.unescape(m.group(1)))
@@ -508,7 +523,9 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]:
def unescape(self, text: str) -> str:
""" Return unescaped text given text with an inline placeholder. """
try:
- stash = self.md.treeprocessors['inline'].stashed_nodes
+ inlineprocessor: treeprocessors.InlineProcessor
+ inlineprocessor = self.md.treeprocessors['inline'] # type: ignore[assignment]
+ stash = inlineprocessor.stashed_nodes
except KeyError: # pragma: no cover
return text
@@ -517,16 +534,20 @@ def get_stash(m: re.Match[str]) -> str:
value = stash.get(id)
if value is not None:
try:
+ assert isinstance(value, etree.Element)
return self.md.serializer(value)
except Exception:
return r'\%s' % value
+ return ''
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
def backslash_unescape(self, text: str) -> str:
""" Return text with backslash escapes undone (backslashes are restored). """
try:
- RE = self.md.treeprocessors['unescape'].RE
+ unescape_processor: treeprocessors.UnescapeTreeprocessor
+ unescape_processor = self.md.treeprocessors['unescape'] # type: ignore[assignment]
+ RE = unescape_processor.RE
except KeyError: # pragma: no cover
return text
@@ -875,6 +896,8 @@ class ReferenceInlineProcessor(LinkInlineProcessor):
RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE)
+ md: Markdown
+
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
"""
Return [`Element`][xml.etree.ElementTree.Element] returned by `makeTag` method or `(None, None, None)`.
@@ -887,6 +910,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None
id, end, handled = self.evalId(data, index, text)
if not handled:
return None, None, None
+ assert id is not None
# Clean up line breaks in id
id = self.NEWLINE_CLEANUP_RE.sub(' ', id)
diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py
index 7f5ede90c..f05dc4487 100644
--- a/markdown/postprocessors.py
+++ b/markdown/postprocessors.py
@@ -39,7 +39,7 @@
def build_postprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Postprocessor]:
""" Build the default postprocessors for Markdown. """
- postprocessors = util.Registry()
+ postprocessors: util.Registry[Postprocessor] = util.Registry()
postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30)
postprocessors.register(AndSubstitutePostprocessor(), 'amp_substitute', 20)
return postprocessors
@@ -63,7 +63,7 @@ def run(self, text: str) -> str:
(possibly modified) string.
"""
- pass # pragma: no cover
+ raise NotImplementedError() # pragma: no cover
class RawHtmlPostprocessor(Postprocessor):
@@ -71,11 +71,14 @@ class RawHtmlPostprocessor(Postprocessor):
BLOCK_LEVEL_REGEX = re.compile(r'^\<\/?([^ >]+)')
+ md: Markdown
+
def run(self, text: str) -> str:
""" Iterate over html stash and restore html. """
replacements = OrderedDict()
for i in range(self.md.htmlStash.html_counter):
- html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i])
+ raw: str = self.md.htmlStash.rawHtmlBlocks[i] # type: ignore[assignment]
+ html = self.stash_to_string(raw)
if self.isblocklevel(html):
replacements["{}
".format(
self.md.htmlStash.get_placeholder(i))] = html
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py
index 0f63cdd36..25cf86de3 100644
--- a/markdown/preprocessors.py
+++ b/markdown/preprocessors.py
@@ -36,7 +36,7 @@
def build_preprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Preprocessor]:
""" Build and return the default set of preprocessors used by Markdown. """
- preprocessors = util.Registry()
+ preprocessors: util.Registry[Preprocessor] = util.Registry()
preprocessors.register(NormalizeWhitespace(md), 'normalize_whitespace', 30)
preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20)
return preprocessors
@@ -60,12 +60,14 @@ def run(self, lines: list[str]) -> list[str]:
the (possibly modified) list of lines.
"""
- pass # pragma: no cover
+ raise NotImplementedError() # pragma: no cover
class NormalizeWhitespace(Preprocessor):
""" Normalize whitespace for consistent parsing. """
+ md: Markdown
+
def run(self, lines: list[str]) -> list[str]:
source = '\n'.join(lines)
source = source.replace(util.STX, "").replace(util.ETX, "")
@@ -83,6 +85,8 @@ class HtmlBlockPreprocessor(Preprocessor):
[`Markdown`][markdown.Markdown] instance.
"""
+ md: Markdown
+
def run(self, lines: list[str]) -> list[str]:
source = '\n'.join(lines)
parser = HTMLExtractor(self.md)
diff --git a/markdown/py.typed b/markdown/py.typed
new file mode 100644
index 000000000..e69de29bb
diff --git a/markdown/serializers.py b/markdown/serializers.py
index 573b26483..29ce305b0 100644
--- a/markdown/serializers.py
+++ b/markdown/serializers.py
@@ -45,10 +45,10 @@
from __future__ import annotations
-from xml.etree.ElementTree import ProcessingInstruction
-from xml.etree.ElementTree import Comment, ElementTree, Element, QName, HTML_EMPTY
+from xml.etree.ElementTree import ProcessingInstruction, Comment, ElementTree, Element, QName
+from xml.etree.ElementTree import HTML_EMPTY # type: ignore[attr-defined]
import re
-from typing import Callable, Literal, NoReturn
+from typing import Callable, Iterable, Literal, NoReturn
__all__ = ['to_html_string', 'to_xhtml_string']
@@ -123,7 +123,7 @@ def _serialize_html(write: Callable[[str], None], elem: Element, format: Literal
elif tag is ProcessingInstruction:
write("%s?>" % _escape_cdata(text))
elif tag is None:
- if text:
+ if text: # type: ignore[unreachable]
write(_escape_cdata(text))
for e in elem:
_serialize_html(write, e, format)
@@ -136,7 +136,7 @@ def _serialize_html(write: Callable[[str], None], elem: Element, format: Literal
else:
raise ValueError('QName objects must define a tag.')
write("<" + tag)
- items = elem.items()
+ items: Iterable[tuple[str, str]] = elem.items()
if items:
items = sorted(items) # lexical order
for k, v in items:
diff --git a/markdown/test_tools.py b/markdown/test_tools.py
index 895e44ec5..5f2cfbd8f 100644
--- a/markdown/test_tools.py
+++ b/markdown/test_tools.py
@@ -29,7 +29,7 @@
from . import markdown, Markdown, util
try:
- import tidylib
+ import tidylib # type: ignore
except ImportError:
tidylib = None
diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py
index dc857204b..e9eb190cd 100644
--- a/markdown/treeprocessors.py
+++ b/markdown/treeprocessors.py
@@ -34,18 +34,19 @@
if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
+ from typing import TypeGuard
def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Treeprocessor]:
""" Build the default `treeprocessors` for Markdown. """
- treeprocessors = util.Registry()
+ treeprocessors: util.Registry[Treeprocessor] = util.Registry()
treeprocessors.register(InlineProcessor(md), 'inline', 20)
treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10)
treeprocessors.register(UnescapeTreeprocessor(md), 'unescape', 0)
return treeprocessors
-def isString(s: object) -> bool:
+def isString(s: object) -> TypeGuard[str]:
""" Return `True` if object is a string but not an [`AtomicString`][markdown.util.AtomicString]. """
if not isinstance(s, util.AtomicString):
return isinstance(s, str)
@@ -69,7 +70,7 @@ def run(self, root: etree.Element) -> etree.Element | None:
object, and the existing root `Element` will be replaced, or it can
modify the current tree and return `None`.
"""
- pass # pragma: no cover
+ raise NotImplementedError() # pragma: no cover
class InlineProcessor(Treeprocessor):
@@ -204,7 +205,7 @@ def linkText(text: str | None) -> None:
parent.text += text
else:
parent.text = text
- result = []
+ result: list[tuple[etree.Element, list[str]]] = []
strartIndex = 0
while data:
index = data.find(self.__placeholder_prefix, strartIndex)
@@ -212,13 +213,13 @@ def linkText(text: str | None) -> None:
id, phEndIndex = self.__findPlaceholder(data, index)
if id in self.stashed_nodes:
- node = self.stashed_nodes.get(id)
+ node = self.stashed_nodes[id]
if index > 0:
text = data[strartIndex:index]
linkText(text)
- if not isString(node): # it's Element
+ if not isinstance(node, str): # it's Element
for child in [node] + list(node):
if child.tail:
if child.tail.strip():
@@ -252,7 +253,7 @@ def linkText(text: str | None) -> None:
def __applyPattern(
self,
- pattern: inlinepatterns.Pattern,
+ pattern: inlinepatterns.InlineProcessor | inlinepatterns.LegacyPattern,
data: str,
patternIndex: int,
startIndex: int = 0
@@ -271,7 +272,12 @@ def __applyPattern(
String with placeholders instead of `ElementTree` elements.
"""
- new_style = isinstance(pattern, inlinepatterns.InlineProcessor)
+ if isinstance(pattern, inlinepatterns.InlineProcessor):
+ new_style = True
+ new_pattern = pattern
+ else:
+ new_style = False
+ legacy_pattern = pattern
for exclude in pattern.ANCESTOR_EXCLUDES:
if exclude.lower() in self.ancestors:
@@ -282,29 +288,27 @@ def __applyPattern(
# Since `handleMatch` may reject our first match,
# we iterate over the buffer looking for matches
# until we can't find any more.
- for match in pattern.getCompiledRegExp().finditer(data, startIndex):
- node, start, end = pattern.handleMatch(match, data)
- if start is None or end is None:
- startIndex += match.end(0)
- match = None
- continue
- break
+ for try_match in new_pattern.getCompiledRegExp().finditer(data, startIndex):
+ try_node, try_start, try_end = new_pattern.handleMatch(try_match, data)
+ if try_start is not None and try_end is not None:
+ match, node, start, end = try_match, try_node, try_start, try_end
+ break
else: # pragma: no cover
- match = pattern.getCompiledRegExp().match(data[startIndex:])
+ match = legacy_pattern.getCompiledRegExp().match(data[startIndex:])
leftData = data[:startIndex]
if not match:
return data, False, 0
if not new_style: # pragma: no cover
- node = pattern.handleMatch(match)
+ node = legacy_pattern.handleMatch(match)
start = match.start(0)
end = match.end(0)
if node is None:
return data, True, end
- if not isString(node):
+ if not isinstance(node, str):
if not isinstance(node.text, util.AtomicString):
# We need to process current node too
for child in [node] + list(node):
@@ -373,7 +377,7 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree.
self.ancestors = parents
self.__build_ancestors(currElement, self.ancestors)
- insertQueue = []
+ insertQueue: list[tuple[etree.Element, list[tuple[etree.Element, list[str]]]]] = []
for child in currElement:
if child.text and not isinstance(
child.text, util.AtomicString
@@ -398,9 +402,9 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree.
child.tail = dumby.tail
pos = list(currElement).index(child) + 1
tailResult.reverse()
- for newChild in tailResult:
- self.parent_map[newChild[0]] = currElement
- currElement.insert(pos, newChild[0])
+ for subChild in tailResult:
+ self.parent_map[subChild[0]] = currElement
+ currElement.insert(pos, subChild[0])
if len(child):
self.parent_map[child] = currElement
stack.append((child, self.ancestors[:]))
@@ -415,6 +419,8 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree.
class PrettifyTreeprocessor(Treeprocessor):
""" Add line breaks to the html document. """
+ md: Markdown
+
def _prettifyETree(self, elem: etree.Element) -> None:
""" Recursively add line breaks to `ElementTree` children. """
diff --git a/markdown/util.py b/markdown/util.py
index b4642023e..0741b6ea6 100644
--- a/markdown/util.py
+++ b/markdown/util.py
@@ -135,6 +135,16 @@ def deprecated_func(*args, **kwargs):
return wrapper
+@overload
+def parseBoolValue(value: str) -> bool:
+ ... # pragma: no cover
+
+
+@overload
+def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None:
+ ... # pragma: no cover
+
+
def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None:
"""Parses a string representing a boolean value. If parsing was successful,
returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`,
@@ -152,6 +162,7 @@ def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none
return False
elif fail_on_errors:
raise ValueError('Cannot parse bool value: %r' % value)
+ return None
def code_escape(text: str) -> str:
@@ -171,9 +182,12 @@ def _get_stack_depth(size: int = 2) -> int:
frame = sys._getframe(size)
for size in count(size):
- frame = frame.f_back
- if not frame:
+ next_frame = frame.f_back
+ if next_frame is None:
return size
+ frame = next_frame
+
+ raise RuntimeError("Could not get stack depth")
def nearing_recursion_limit() -> bool:
@@ -218,7 +232,7 @@ class HtmlStash:
in the beginning and replace with place-holders.
"""
- def __init__(self):
+ def __init__(self) -> None:
""" Create an `HtmlStash`. """
self.html_counter = 0 # for counting inline html segments
self.rawHtmlBlocks: list[str | etree.Element] = []
@@ -309,7 +323,7 @@ class Registry(Generic[_T]):
an item using that item's assigned "name".
"""
- def __init__(self):
+ def __init__(self) -> None:
self._data: dict[str, _T] = {}
self._priority: list[_PriorityItem] = []
self._is_sorted = False
diff --git a/pyproject.toml b/pyproject.toml
index 8c9e9bcfe..45eb20506 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -95,5 +95,12 @@ legacy_em = 'markdown.extensions.legacy_em:LegacyEmExtension'
[tool.setuptools]
packages = ['markdown', 'markdown.extensions']
+[tool.setuptools.package-data]
+"markdown" = ["py.typed"]
+
[tool.setuptools.dynamic]
version = {attr = 'markdown.__meta__.__version__'}
+
+[tool.mypy]
+warn_unreachable = true
+show_error_codes = true
diff --git a/tox.ini b/tox.ini
index d071054ea..7e528d478 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py{38, 39, 310, 311, 312}, pypy{38, 39, 310}, pygments, flake8, checkspelling, pep517check, checklinks
+envlist = py{38, 39, 310, 311, 312}, pypy{38, 39, 310}, pygments, mypy, flake8, checkspelling, pep517check, checklinks
isolated_build = True
[testenv]
@@ -19,6 +19,15 @@ deps =
pytidylib
pygments=={env:PYGMENTS_VERSION}
+[testenv:mypy]
+deps =
+ mypy
+ types-PyYAML
+ types-Pygments
+allowlist_externals = mypy
+commands = mypy {toxinidir}/markdown
+skip_install = true
+
[testenv:flake8]
deps = flake8
allowlist_externals = flake8