diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 8f45e22e6..f80c0c6eb 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -71,7 +71,7 @@ jobs: fail-fast: false max-parallel: 4 matrix: - tox-env: [flake8, pep517check, checkspelling] + tox-env: [mypy, flake8, pep517check, checkspelling] env: TOXENV: ${{ matrix.tox-env }} diff --git a/markdown/__main__.py b/markdown/__main__.py index c323aaac4..b907108c1 100644 --- a/markdown/__main__.py +++ b/markdown/__main__.py @@ -24,6 +24,11 @@ import codecs import warnings import markdown +import logging +from logging import DEBUG, WARNING, CRITICAL +from typing import Any, Callable, IO, Mapping + +yaml_load: Callable[[IO], Any] try: # We use `unsafe_load` because users may need to pass in actual Python # objects. As this is only available from the CLI, the user has much @@ -32,18 +37,16 @@ except ImportError: # pragma: no cover try: # Fall back to PyYAML <5.1 - from yaml import load as yaml_load + from yaml import load as yaml_load # type: ignore except ImportError: # Fall back to JSON from json import load as yaml_load -import logging -from logging import DEBUG, WARNING, CRITICAL logger = logging.getLogger('MARKDOWN') -def parse_options(args=None, values=None): +def parse_options(args=None, values=None) -> tuple[Mapping[str, Any], bool]: """ Define and parse `optparse` options for command-line usage. """ diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index d2020b9b6..798977789 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -121,7 +121,7 @@ def test(self, parent: etree.Element, block: str) -> bool: parent: An `etree` element which will be the parent of the block. block: A block of text from the source which has been split at blank lines. """ - pass # pragma: no cover + raise NotImplementedError() # pragma: no cover def run(self, parent: etree.Element, blocks: list[str]) -> bool | None: """ Run processor. Must be overridden by subclasses. @@ -147,7 +147,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None: parent: An `etree` element which is the parent of the current block. blocks: A list of all remaining blocks of the document. """ - pass # pragma: no cover + raise NotImplementedError() # pragma: no cover class ListIndentProcessor(BlockProcessor): @@ -167,7 +167,7 @@ class ListIndentProcessor(BlockProcessor): LIST_TYPES = ['ul', 'ol'] """ Types of lists this processor can operate on. """ - def __init__(self, *args): + def __init__(self, *args) -> None: super().__init__(*args) self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length) @@ -175,7 +175,7 @@ def test(self, parent: etree.Element, block: str) -> bool: return block.startswith(' '*self.tab_length) and \ not self.parser.state.isstate('detabbed') and \ (parent.tag in self.ITEM_TYPES or - (len(parent) and parent[-1] is not None and + (len(parent) > 0 and parent[-1] is not None and (parent[-1].tag in self.LIST_TYPES))) def run(self, parent: etree.Element, blocks: list[str]) -> None: @@ -417,7 +417,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> None: def get_items(self, block: str) -> list[str]: """ Break a block into list items. """ - items = [] + items: list[str] = [] for line in block.split('\n'): m = self.CHILD_RE.match(line) if m: @@ -426,7 +426,9 @@ def get_items(self, block: str) -> list[str]: if not items and self.TAG == 'ol': # Detect the integer value of first list item INTEGER_RE = re.compile(r'(\d+)') - self.STARTSWITH = INTEGER_RE.match(m.group(1)).group() + int_match = INTEGER_RE.match(m.group(1)) + assert int_match is not None + self.STARTSWITH = int_match.group() # Append to the list items.append(m.group(3)) elif self.INDENT_RE.match(line): diff --git a/markdown/core.py b/markdown/core.py index 6c7a21be9..92b729119 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -85,7 +85,7 @@ class Markdown: callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs: Any): """ Creates a new Markdown instance. @@ -183,7 +183,7 @@ def registerExtensions( 'Successfully loaded extension "%s.%s".' % (ext.__class__.__module__, ext.__class__.__name__) ) - elif ext is not None: + elif ext is not None: # type: ignore[unreachable] raise TypeError( 'Extension "{}.{}" must be of type: "{}.{}"'.format( ext.__class__.__module__, ext.__class__.__name__, @@ -417,11 +417,11 @@ def convertFile( # Read the source if input: if isinstance(input, str): - input_file = codecs.open(input, mode="r", encoding=encoding) + with codecs.open(input, mode="r", encoding=encoding) as input_file: + text = input_file.read() else: - input_file = codecs.getreader(encoding)(input) - text = input_file.read() - input_file.close() + with codecs.getreader(encoding)(input) as input_file: + text = input_file.read() else: text = sys.stdin.read() @@ -440,13 +440,13 @@ def convertFile( output_file.close() else: writer = codecs.getwriter(encoding) - output_file = writer(output, errors="xmlcharrefreplace") - output_file.write(html) + output_writer = writer(output, errors="xmlcharrefreplace") + output_writer.write(html) # Don't close here. User may want to write more. else: # Encode manually and write bytes to stdout. - html = html.encode(encoding, "xmlcharrefreplace") - sys.stdout.buffer.write(html) + html_bytes = html.encode(encoding, "xmlcharrefreplace") + sys.stdout.buffer.write(html_bytes) return self @@ -482,7 +482,13 @@ def markdown(text: str, **kwargs: Any) -> str: return md.convert(text) -def markdownFromFile(**kwargs: Any): +def markdownFromFile( + *, + input: str | BinaryIO | None = None, + output: str | BinaryIO | None = None, + encoding: str | None = None, + **kwargs: Any +) -> None: """ Read Markdown text from a file and write output to a file or a stream. @@ -491,13 +497,11 @@ def markdownFromFile(**kwargs: Any): [`convert`][markdown.Markdown.convert]. Keyword arguments: - input (str | BinaryIO): A file name or readable object. - output (str | BinaryIO): A file name or writable object. - encoding (str): Encoding of input and output. + input: A file name or readable object. + output: A file name or writable object. + encoding: Encoding of input and output. **kwargs: Any arguments accepted by the `Markdown` class. """ md = Markdown(**kwargs) - md.convertFile(kwargs.get('input', None), - kwargs.get('output', None), - kwargs.get('encoding', None)) + md.convertFile(input, output, encoding) diff --git a/markdown/extensions/__init__.py b/markdown/extensions/__init__.py index a5ec07b27..32ba55658 100644 --- a/markdown/extensions/__init__.py +++ b/markdown/extensions/__init__.py @@ -53,7 +53,7 @@ class Extension: if a default is not set for each option. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: """ Initiate Extension and set up configs. """ self.setConfigs(kwargs) diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py index 738368afe..3d89fefa5 100644 --- a/markdown/extensions/abbr.py +++ b/markdown/extensions/abbr.py @@ -28,12 +28,16 @@ from ..util import AtomicString import re import xml.etree.ElementTree as etree +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown class AbbrExtension(Extension): """ Abbreviation Extension for Python-Markdown. """ - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Insert `AbbrPreprocessor` before `ReferencePreprocessor`. """ md.parser.blockprocessors.register(AbbrPreprocessor(md.parser), 'abbr', 16) diff --git a/markdown/extensions/admonition.py b/markdown/extensions/admonition.py index d0e97002d..9346f3c28 100644 --- a/markdown/extensions/admonition.py +++ b/markdown/extensions/admonition.py @@ -33,13 +33,14 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown from markdown import blockparser class AdmonitionExtension(Extension): """ Admonition extension for Python-Markdown. """ - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add Admonition to Markdown instance. """ md.registerExtension(self) @@ -59,7 +60,7 @@ def __init__(self, parser: blockparser.BlockParser): super().__init__(parser) self.current_sibling: etree.Element | None = None - self.content_indention = 0 + self.content_indent = 0 def parse_content(self, parent: etree.Element, block: str) -> tuple[etree.Element | None, str, str]: """Get sibling admonition. @@ -74,11 +75,11 @@ def parse_content(self, parent: etree.Element, block: str) -> tuple[etree.Elemen # We already acquired the block via test if self.current_sibling is not None: - sibling = self.current_sibling + prev_sibling = self.current_sibling block, the_rest = self.detab(block, self.content_indent) self.current_sibling = None self.content_indent = 0 - return sibling, block, the_rest + return prev_sibling, block, the_rest sibling = self.lastChild(parent) @@ -147,6 +148,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> None: p.text = title p.set('class', self.CLASSNAME_TITLE) else: + assert sibling is not None # Sibling is a list item, but we need to wrap it's content should be wrapped in

if sibling.tag in ('li', 'dd') and sibling.text: text = sibling.text diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 7ce3f9925..a7276e747 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -32,6 +32,7 @@ if TYPE_CHECKING: # pragma: no cover from xml.etree.ElementTree import Element + from markdown import Markdown def _handle_double_quote(s, t): @@ -56,7 +57,7 @@ def _handle_word(s, t): return t, t -_scanner = re.Scanner([ +_scanner = re.Scanner([ # type: ignore[attr-defined] (r'[^ =]+=".*?"', _handle_double_quote), (r"[^ =]+='.*?'", _handle_single_quote), (r'[^ =]+=[^ =]+', _handle_key_value), @@ -86,6 +87,8 @@ class AttrListTreeprocessor(Treeprocessor): r'\uf900-\ufdcf\ufdf0-\ufffd' r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') + md: Markdown + def run(self, doc: Element) -> None: for elem in doc.iter(): if self.md.is_block_level(elem.tag): @@ -102,18 +105,18 @@ def run(self, doc: Element) -> None: if child.tag in ['ul', 'ol']: pos = i break - if pos is None and elem[-1].tail: + if pos is None and (tail := elem[-1].tail): # use tail of last child. no `ul` or `ol`. - m = RE.search(elem[-1].tail) + m = RE.search(tail) if m: self.assign_attrs(elem, m.group(1)) - elem[-1].tail = elem[-1].tail[:m.start()] - elif pos is not None and pos > 0 and elem[pos-1].tail: + elem[-1].tail = tail[:m.start()] + elif pos is not None and pos > 0 and (tail := elem[pos-1].tail): # use tail of last child before `ul` or `ol` - m = RE.search(elem[pos-1].tail) + m = RE.search(tail) if m: self.assign_attrs(elem, m.group(1)) - elem[pos-1].tail = elem[pos-1].tail[:m.start()] + elem[pos-1].tail = tail[:m.start()] elif elem.text: # use text. `ul` is first child. m = RE.search(elem.text) @@ -170,7 +173,7 @@ def sanitize_name(self, name: str) -> str: class AttrListExtension(Extension): """ Attribute List extension for Python-Markdown """ - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8) md.registerExtension(self) diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py index 0114908f6..a33e7be19 100644 --- a/markdown/extensions/codehilite.py +++ b/markdown/extensions/codehilite.py @@ -27,6 +27,7 @@ from typing import TYPE_CHECKING, Callable, Any if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown import xml.etree.ElementTree as etree try: # pragma: no cover @@ -150,7 +151,7 @@ def hilite(self, shebang: bool = True) -> str: if pygments and self.use_pygments: try: - lexer = get_lexer_by_name(self.lang, **self.options) + lexer = get_lexer_by_name(self.lang or '', **self.options) except ValueError: try: if self.guess_lang: @@ -161,7 +162,7 @@ def hilite(self, shebang: bool = True) -> str: lexer = get_lexer_by_name('text', **self.options) if not self.lang: # Use the guessed lexer's language instead - self.lang = lexer.aliases[0] + self.lang = lexer.aliases[0] # type: ignore[attr-defined] lang_str = f'{self.lang_prefix}{self.lang}' if isinstance(self.pygments_formatter, str): try: @@ -254,6 +255,7 @@ class HiliteTreeprocessor(Treeprocessor): """ Highlight source code in code blocks. """ config: dict[str, Any] + md: Markdown def code_unescape(self, text: str) -> str: """Unescape code.""" @@ -270,8 +272,10 @@ def run(self, root: etree.Element) -> None: for block in blocks: if len(block) == 1 and block[0].tag == 'code': local_config = self.config.copy() + text = block[0].text + assert text is not None code = CodeHilite( - self.code_unescape(block[0].text), + self.code_unescape(text), tab_length=self.md.tab_length, style=local_config.pop('pygments_style', 'default'), **local_config @@ -288,7 +292,7 @@ def run(self, root: etree.Element) -> None: class CodeHiliteExtension(Extension): """ Add source code highlighting to markdown code blocks. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: # define default configs self.config = { 'linenums': [ @@ -331,7 +335,7 @@ def __init__(self, **kwargs): pass # Assume it's not a boolean value. Use as-is. self.config[key] = [value, ''] - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add `HilitePostprocessor` to Markdown instance. """ hiliter = HiliteTreeprocessor(md) hiliter.config = self.getConfigs() diff --git a/markdown/extensions/def_list.py b/markdown/extensions/def_list.py index 5324bf193..8fb5c3db8 100644 --- a/markdown/extensions/def_list.py +++ b/markdown/extensions/def_list.py @@ -25,6 +25,10 @@ from ..blockprocessors import BlockProcessor, ListIndentProcessor import xml.etree.ElementTree as etree import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown class DefListProcessor(BlockProcessor): @@ -40,6 +44,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None: raw_block = blocks.pop(0) m = self.RE.search(raw_block) + assert m is not None terms = [term.strip() for term in raw_block[:m.start()].split('\n') if term.strip()] block = raw_block[m.end():] @@ -53,20 +58,21 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None: else: d = m.group(2) sibling = self.lastChild(parent) - if not terms and sibling is None: - # This is not a definition item. Most likely a paragraph that - # starts with a colon at the beginning of a document or list. - blocks.insert(0, raw_block) - return False - if not terms and sibling.tag == 'p': - # The previous paragraph contains the terms - state = 'looselist' - terms = sibling.text.split('\n') - parent.remove(sibling) - # Acquire new sibling - sibling = self.lastChild(parent) - else: - state = 'list' + state = 'list' + if not terms: + if sibling is None: + # This is not a definition item. Most likely a paragraph that + # starts with a colon at the beginning of a document or list. + blocks.insert(0, raw_block) + return False + if sibling.tag == 'p': + # The previous paragraph contains the terms + state = 'looselist' + assert sibling.text is not None + terms = sibling.text.split('\n') + parent.remove(sibling) + # Acquire new sibling + sibling = self.lastChild(parent) if sibling is not None and sibling.tag == 'dl': # This is another item on an existing list @@ -88,6 +94,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None: if theRest: blocks.insert(0, theRest) + return None class DefListIndentProcessor(ListIndentProcessor): @@ -99,7 +106,7 @@ class DefListIndentProcessor(ListIndentProcessor): LIST_TYPES = ['dl', 'ol', 'ul'] """ Include `dl` is list types. """ - def create_item(self, parent: etree.Element, block: str) -> None: + def create_item(self, parent: etree.Element, block: str): """ Create a new `dd` or `li` (depending on parent) and parse the block with it as the parent. """ dd = etree.SubElement(parent, 'dd') @@ -109,7 +116,7 @@ def create_item(self, parent: etree.Element, block: str) -> None: class DefListExtension(Extension): """ Add definition lists to Markdown. """ - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add an instance of `DefListProcessor` to `BlockParser`. """ md.parser.blockprocessors.register(DefListIndentProcessor(md.parser), 'defindent', 85) md.parser.blockprocessors.register(DefListProcessor(md.parser), 'deflist', 25) diff --git a/markdown/extensions/extra.py b/markdown/extensions/extra.py index 74ebc192c..d33a09697 100644 --- a/markdown/extensions/extra.py +++ b/markdown/extensions/extra.py @@ -53,7 +53,7 @@ class ExtraExtension(Extension): """ Add various extensions to Markdown class.""" - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: """ `config` is a dumb holder which gets passed to the actual extension later. """ self.config = kwargs diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py index da1a9be1e..7cd33c85b 100644 --- a/markdown/extensions/fenced_code.py +++ b/markdown/extensions/fenced_code.py @@ -36,14 +36,14 @@ class FencedCodeExtension(Extension): - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.config = { 'lang_prefix': ['language-', 'Prefix prepended to the language. Default: "language-"'] } """ Default configuration options. """ super().__init__(**kwargs) - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add `FencedBlockPreprocessor` to the Markdown instance. """ md.registerExtension(self) @@ -53,6 +53,8 @@ def extendMarkdown(self, md): class FencedBlockPreprocessor(Preprocessor): """ Find and extract fenced code blocks. """ + md: Markdown + FENCED_BLOCK_RE = re.compile( dedent(r''' (?P^(?:~{3,}|`{3,}))[ ]* # opening fence @@ -97,12 +99,13 @@ def run(self, lines: list[str]) -> list[str]: while 1: m = self.FENCED_BLOCK_RE.search(text) if m: - lang, id, classes, config = None, '', [], {} + lang = None if m.group('attrs'): id, classes, config = self.handle_attrs(get_attrs(m.group('attrs'))) if len(classes): lang = classes.pop(0) else: + id, classes, config = '', [], {} if m.group('lang'): lang = m.group('lang') if m.group('hl_lines'): @@ -159,7 +162,7 @@ def handle_attrs(self, attrs: Iterable[tuple[str, str]]) -> tuple[str, list[str] """ Return tuple: `(id, [list, of, classes], {configs})` """ id = '' classes = [] - configs = {} + configs: dict[str, Any] = {} for k, v in attrs: if k == 'id': id = v diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py index 30c081138..ac35cf484 100644 --- a/markdown/extensions/footnotes.py +++ b/markdown/extensions/footnotes.py @@ -29,6 +29,10 @@ import re import copy import xml.etree.ElementTree as etree +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX @@ -38,7 +42,7 @@ class FootnoteExtension(Extension): """ Footnote Extension. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: """ Setup configs. """ self.config = { @@ -73,7 +77,7 @@ def __init__(self, **kwargs): self.reset() - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add pieces to Markdown. """ md.registerExtension(self) self.parser = md.parser @@ -130,7 +134,7 @@ def findFootnotesPlaceholder( self, root: etree.Element ) -> tuple[etree.Element, etree.Element, bool] | None: """ Return ElementTree Element that contains Footnote placeholder. """ - def finder(element): + def finder(element: etree.Element) -> tuple[etree.Element, etree.Element, bool] | None: for child in element: if child.text: if child.text.find(self.getConfig("PLACE_MARKER")) > -1: @@ -205,6 +209,7 @@ def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None: if len(li): node = li[-1] if node.tag == "p": + assert node.text is not None node.text = node.text + NBSP_PLACEHOLDER node.append(backlink) else: @@ -290,7 +295,7 @@ def detectTabbed(self, blocks: list[str]) -> list[str]: break return fn_blocks - def detab(self, block: str) -> str: + def detab(self, block: str) -> str: # type: ignore[override] """ Remove one level of indent from a block. Preserve lazily indented blocks by only removing indent from indented lines. diff --git a/markdown/extensions/legacy_attrs.py b/markdown/extensions/legacy_attrs.py index 6641e6ea6..3f1c7a6dc 100644 --- a/markdown/extensions/legacy_attrs.py +++ b/markdown/extensions/legacy_attrs.py @@ -37,6 +37,7 @@ if TYPE_CHECKING: # pragma: no cover import xml.etree.ElementTree as etree + from markdown import Markdown ATTR_RE = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123} @@ -56,13 +57,14 @@ def run(self, doc: etree.Element) -> None: def handleAttributes(self, el: etree.Element, txt: str) -> str: """ Set attributes and return text without definitions. """ - def attributeCallback(match: re.Match[str]): + def attributeCallback(match: re.Match[str]) -> str: el.set(match.group(1), match.group(2).replace('\n', ' ')) + return '' return ATTR_RE.sub(attributeCallback, txt) class LegacyAttrExtension(Extension): - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add `LegacyAttrs` to Markdown instance. """ md.treeprocessors.register(LegacyAttrs(md), 'legacyattrs', 15) diff --git a/markdown/extensions/legacy_em.py b/markdown/extensions/legacy_em.py index a6f67b7ef..6fbff4a06 100644 --- a/markdown/extensions/legacy_em.py +++ b/markdown/extensions/legacy_em.py @@ -16,6 +16,10 @@ from . import Extension from ..inlinepatterns import UnderscoreProcessor, EmStrongItem, EM_STRONG2_RE, STRONG_EM2_RE import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown # _emphasis_ EMPHASIS_RE = r'(_)([^_]+)\1' @@ -42,7 +46,7 @@ class LegacyUnderscoreProcessor(UnderscoreProcessor): class LegacyEmExtension(Extension): """ Add legacy_em extension to Markdown class.""" - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Modify inline patterns. """ md.inlinePatterns.register(LegacyUnderscoreProcessor(r'_'), 'em_strong2', 50) diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 64b84a5f4..ebcea86c2 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -28,7 +28,7 @@ from .. import util from ..htmlparser import HTMLExtractor, blank_line_re import xml.etree.ElementTree as etree -from typing import TYPE_CHECKING, Literal, Mapping +from typing import TYPE_CHECKING, Literal, Mapping, Sequence if TYPE_CHECKING: # pragma: no cover from markdown import Markdown @@ -56,14 +56,14 @@ def __init__(self, md: Markdown, *args, **kwargs): self.block_tags = set(self.block_level_tags) - (self.span_tags | self.raw_tags | self.empty_tags) self.span_and_blocks_tags = self.block_tags | self.span_tags - def reset(self): + def reset(self) -> None: """Reset this instance. Loses all unprocessed data.""" self.mdstack: list[str] = [] # When markdown=1, stack contains a list of tags self.treebuilder = etree.TreeBuilder() self.mdstate: list[Literal['block', 'span', 'off', None]] = [] super().reset() - def close(self): + def close(self) -> None: """Handle any buffered data.""" super().close() # Handle any unclosed tags. @@ -98,13 +98,13 @@ def get_state(self, tag, attrs: Mapping[str, str]) -> Literal['block', 'span', ' else: # pragma: no cover return None - def handle_starttag(self, tag, attrs): + def handle_starttag(self, tag: str, attrs: Sequence[tuple[str, str]]) -> None: # Handle tags that should always be empty and do not specify a closing tag if tag in self.empty_tags and (self.at_line_start() or self.intail): - attrs = {key: value if value is not None else key for key, value in attrs} - if "markdown" in attrs: - attrs.pop('markdown') - element = etree.Element(tag, attrs) + attrs_dict = {key: value if value is not None else key for key, value in attrs} + if "markdown" in attrs_dict: + attrs_dict.pop('markdown') + element = etree.Element(tag, attrs_dict) data = etree.tostring(element, encoding='unicode', method='html') else: data = self.get_starttag_text() @@ -114,20 +114,20 @@ def handle_starttag(self, tag, attrs): if tag in self.block_level_tags and (self.at_line_start() or self.intail): # Valueless attribute (ex: ``) results in `[('checked', None)]`. # Convert to `{'checked': 'checked'}`. - attrs = {key: value if value is not None else key for key, value in attrs} - state = self.get_state(tag, attrs) - if self.inraw or (state in [None, 'off'] and not self.mdstack): + attrs_dict = {key: value if value is not None else key for key, value in attrs} + state = self.get_state(tag, attrs_dict) + if self.inraw or ((state is None or state == 'off') and not self.mdstack): # fall back to default behavior - attrs.pop('markdown', None) - super().handle_starttag(tag, attrs) + attrs_dict.pop('markdown', None) + super().handle_starttag(tag, attrs_dict) # type: ignore[arg-type] else: if 'p' in self.mdstack and tag in self.block_level_tags: # Close unclosed 'p' tag self.handle_endtag('p') self.mdstate.append(state) self.mdstack.append(tag) - attrs['markdown'] = state - self.treebuilder.start(tag, attrs) + attrs_dict['markdown'] = state # type: ignore[assignment] + self.treebuilder.start(tag, attrs_dict) else: # Span level tag if self.inraw: @@ -142,7 +142,7 @@ def handle_starttag(self, tag, attrs): # This is presumably a standalone tag in a code span (see #1036). self.clear_cdata_mode() - def handle_endtag(self, tag): + def handle_endtag(self, tag: str) -> None: if tag in self.block_level_tags: if self.inraw: super().handle_endtag(tag) @@ -165,7 +165,7 @@ def handle_endtag(self, tag): self.cleandoc.append('\n') self.cleandoc.append(self.md.htmlStash.store(element)) self.cleandoc.append('\n\n') - self.state = [] + self.state: list = [] # Check if element has a tail if not blank_line_re.match( self.rawdata[self.line_offset + self.offset + len(self.get_endtag_text(tag)):]): @@ -189,12 +189,12 @@ def handle_endtag(self, tag): else: self.handle_data(text) - def handle_startendtag(self, tag, attrs): + def handle_startendtag(self, tag: str, attrs: Sequence[tuple[str, str]]) -> None: if tag in self.empty_tags: - attrs = {key: value if value is not None else key for key, value in attrs} - if "markdown" in attrs: - attrs.pop('markdown') - element = etree.Element(tag, attrs) + attrs_dict = {key: value if value is not None else key for key, value in attrs} + if "markdown" in attrs_dict: + attrs_dict.pop('markdown') + element = etree.Element(tag, attrs_dict) data = etree.tostring(element, encoding='unicode', method='html') else: data = self.get_starttag_text() @@ -202,7 +202,7 @@ def handle_startendtag(self, tag, attrs): data = self.get_starttag_text() self.handle_empty_tag(data, is_block=self.md.is_block_level(tag)) - def handle_data(self, data): + def handle_data(self, data: str) -> None: if self.intail and '\n' in data: self.intail = False if self.inraw or not self.mdstack: @@ -210,7 +210,7 @@ def handle_data(self, data): else: self.treebuilder.data(data) - def handle_empty_tag(self, data, is_block): + def handle_empty_tag(self, data: str, is_block: bool) -> None: if self.inraw or not self.mdstack: super().handle_empty_tag(data, is_block) else: @@ -243,6 +243,8 @@ def parse_html_declaration(self, i: int) -> int: class HtmlBlockPreprocessor(Preprocessor): """Remove html blocks from the text and store them for later retrieval.""" + md: Markdown + def run(self, lines: list[str]) -> list[str]: source = '\n'.join(lines) parser = HTMLExtractorExtra(self.md) @@ -359,7 +361,7 @@ def stash_to_string(self, text: str | etree.Element) -> str: class MarkdownInHtmlExtension(Extension): """Add Markdown parsing in HTML to Markdown class.""" - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Register extension instances. """ # Replace raw HTML preprocessor diff --git a/markdown/extensions/meta.py b/markdown/extensions/meta.py index cb703399b..e43698c28 100644 --- a/markdown/extensions/meta.py +++ b/markdown/extensions/meta.py @@ -25,7 +25,10 @@ from ..preprocessors import Preprocessor import re import logging -from typing import Any +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown log = logging.getLogger('MARKDOWN') @@ -39,19 +42,21 @@ class MetaExtension (Extension): """ Meta-Data extension for Python-Markdown. """ - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add `MetaPreprocessor` to Markdown instance. """ md.registerExtension(self) self.md = md md.preprocessors.register(MetaPreprocessor(md), 'meta', 27) def reset(self) -> None: - self.md.Meta = {} + self.md.Meta = {} # type: ignore[attr-defined] class MetaPreprocessor(Preprocessor): """ Get Meta-Data. """ + md: Markdown + def run(self, lines: list[str]) -> list[str]: """ Parse Meta-Data and store in Markdown.Meta. """ meta: dict[str, Any] = {} @@ -78,7 +83,7 @@ def run(self, lines: list[str]) -> list[str]: else: lines.insert(0, line) break # no meta data - done - self.md.Meta = meta + self.md.Meta = meta # type: ignore[attr-defined] return lines diff --git a/markdown/extensions/nl2br.py b/markdown/extensions/nl2br.py index 177df1ee4..eb715defc 100644 --- a/markdown/extensions/nl2br.py +++ b/markdown/extensions/nl2br.py @@ -25,13 +25,17 @@ from . import Extension from ..inlinepatterns import SubstituteTagInlineProcessor +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown BR_RE = r'\n' class Nl2BrExtension(Extension): - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add a `SubstituteTagInlineProcessor` to Markdown. """ br_tag = SubstituteTagInlineProcessor(BR_RE, 'br') md.inlinePatterns.register(br_tag, 'nl', 5) diff --git a/markdown/extensions/sane_lists.py b/markdown/extensions/sane_lists.py index be421f943..cf2b5ccc7 100644 --- a/markdown/extensions/sane_lists.py +++ b/markdown/extensions/sane_lists.py @@ -27,6 +27,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown from .. import blockparser @@ -59,7 +60,7 @@ def __init__(self, parser: blockparser.BlockParser): class SaneListExtension(Extension): """ Add sane lists to Markdown. """ - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Override existing Processors. """ md.parser.blockprocessors.register(SaneOListProcessor(md.parser), 'olist', 40) md.parser.blockprocessors.register(SaneUListProcessor(md.parser), 'ulist', 30) diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index 0ce7772a7..14abb8422 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -179,7 +179,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: class SmartyExtension(Extension): """ Add Smarty to Markdown. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.config = { 'smart_quotes': [True, 'Educate quotes'], 'smart_angled_quotes': [False, 'Educate angled quotes'], @@ -198,10 +198,9 @@ def _addPatterns( patterns: Sequence[tuple[str, Sequence[int | str | etree.Element]]], serie: str, priority: int, - ): - for ind, pattern in enumerate(patterns): - pattern += (md,) - pattern = SubstituteTextPattern(*pattern) + ) -> None: + for ind, pattern_parts in enumerate(patterns): + pattern = SubstituteTextPattern(*pattern_parts, md) name = 'smarty-%s-%d' % (serie, ind) self.inlinePatterns.register(pattern, name, priority-ind) @@ -253,7 +252,7 @@ def educateQuotes(self, md: Markdown) -> None: ) self._addPatterns(md, patterns, 'quotes', 30) - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: configs = self.getConfigs() self.inlinePatterns: Registry[inlinepatterns.InlineProcessor] = Registry() if configs['smart_ellipses']: diff --git a/markdown/extensions/tables.py b/markdown/extensions/tables.py index 6e2fa1742..3220c7f76 100644 --- a/markdown/extensions/tables.py +++ b/markdown/extensions/tables.py @@ -28,6 +28,7 @@ from typing import TYPE_CHECKING, Any, Sequence if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown from .. import blockparser PIPE_NONE = 0 @@ -228,7 +229,7 @@ def _split(self, row: str) -> list[str]: class TableExtension(Extension): """ Add tables to Markdown. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.config = { 'use_align_attribute': [False, 'True to use align attribute instead of style.'], } @@ -236,7 +237,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add an instance of `TableProcessor` to `BlockParser`. """ if '|' not in md.ESCAPED_CHARS: md.ESCAPED_CHARS.append('|') diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index a17d7241c..41838287e 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -27,10 +27,11 @@ import html import unicodedata import xml.etree.ElementTree as etree -from typing import TYPE_CHECKING, Any, Iterator, MutableSet +from typing import TYPE_CHECKING, Any, Iterator, MutableSet, TypedDict if TYPE_CHECKING: # pragma: no cover from markdown import Markdown + from .. import treeprocessors def slugify(value: str, separator: str, unicode: bool = False) -> str: @@ -83,6 +84,7 @@ def _html_sub(m: re.Match[str]) -> str: raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))] except (IndexError, TypeError): # pragma: no cover return m.group(0) + assert isinstance(raw, str) # Strip out tags and/or entities - leaving text res = re.sub(r'(<[^>]+>)', '', raw) if strip_entities: @@ -98,7 +100,17 @@ def unescape(text: str) -> str: return c.unescape(text) -def nest_toc_tokens(toc_list): +if TYPE_CHECKING: + class FlatTocToken(TypedDict): + level: int + id: str + name: str + + class TocToken(FlatTocToken): + children: list[TocToken] + + +def nest_toc_tokens(toc_list: list[FlatTocToken]) -> list[TocToken]: """Given an unsorted list with errors and skips, return a nested one. [{'level': 1}, {'level': 2}] @@ -115,15 +127,15 @@ def nest_toc_tokens(toc_list): ordered_list = [] if len(toc_list): # Initialize everything by processing the first entry - last = toc_list.pop(0) + last: TocToken = toc_list.pop(0) # type: ignore[assignment] last['children'] = [] levels = [last['level']] ordered_list.append(last) - parents = [] + parents: list[TocToken] = [] # Walk the rest nesting the entries properly while toc_list: - t = toc_list.pop(0) + t: TocToken = toc_list.pop(0) # type: ignore[assignment] current_level = t['level'] t['children'] = [] @@ -166,6 +178,8 @@ def nest_toc_tokens(toc_list): class TocTreeprocessor(Treeprocessor): """ Step through document and build TOC. """ + md: Markdown + def __init__(self, md: Markdown, config: dict[str, Any]): super().__init__(md) @@ -178,9 +192,11 @@ def __init__(self, md: Markdown, config: dict[str, Any]): self.title_class: str = config["title_class"] self.use_anchors: bool = parseBoolValue(config["anchorlink"]) self.anchorlink_class: str = config["anchorlink_class"] - self.use_permalinks = parseBoolValue(config["permalink"], False) - if self.use_permalinks is None: + use_permalinks = parseBoolValue(config["permalink"], False) + if use_permalinks is None: self.use_permalinks = config["permalink"] + else: + self.use_permalinks = use_permalinks self.permalink_class: str = config["permalink_class"] self.permalink_title: str = config["permalink_title"] self.permalink_leading: bool | None = parseBoolValue(config["permalink_leading"], False) @@ -258,7 +274,7 @@ def add_permalink(self, c: etree.Element, elem_id: str) -> None: else: c.append(permalink) - def build_toc_div(self, toc_list: list) -> etree.Element: + def build_toc_div(self, toc_list: list[TocToken]) -> etree.Element: """ Return a string div given a toc list. """ div = etree.Element("div") div.attrib["class"] = self.toc_class @@ -270,7 +286,7 @@ def build_toc_div(self, toc_list: list) -> etree.Element: header.attrib["class"] = self.title_class header.text = self.title - def build_etree_ul(toc_list: list, parent: etree.Element) -> etree.Element: + def build_etree_ul(toc_list: list[TocToken], parent: etree.Element) -> etree.Element: ul = etree.SubElement(parent, "ul") for item in toc_list: # List item link, to be inserted into the toc div @@ -285,7 +301,9 @@ def build_etree_ul(toc_list: list, parent: etree.Element) -> etree.Element: build_etree_ul(toc_list, div) if 'prettify' in self.md.treeprocessors: - self.md.treeprocessors['prettify'].run(div) + prettify_processor: treeprocessors.PrettifyTreeprocessor + prettify_processor = self.md.treeprocessors['prettify'] # type: ignore[assignment] + prettify_processor.run(div) return div @@ -296,7 +314,7 @@ def run(self, doc: etree.Element) -> None: if "id" in el.attrib: used_ids.add(el.attrib["id"]) - toc_tokens = [] + flat_toc_tokens: list[FlatTocToken] = [] for el in doc.iter(): if isinstance(el.tag, str) and self.header_rgx.match(el.tag): self.set_level(el) @@ -308,7 +326,7 @@ def run(self, doc: etree.Element) -> None: el.attrib["id"] = unique(self.slugify(innertext, self.sep), used_ids) if int(el.tag[-1]) >= self.toc_top and int(el.tag[-1]) <= self.toc_bottom: - toc_tokens.append({ + flat_toc_tokens.append({ 'level': int(el.tag[-1]), 'id': el.attrib["id"], 'name': unescape(stashedHTML2text( @@ -326,7 +344,7 @@ def run(self, doc: etree.Element) -> None: if self.use_permalinks not in [False, None]: self.add_permalink(el, el.attrib["id"]) - toc_tokens = nest_toc_tokens(toc_tokens) + toc_tokens = nest_toc_tokens(flat_toc_tokens) div = self.build_toc_div(toc_tokens) if self.marker: self.replace_marker(doc, div) @@ -335,15 +353,15 @@ def run(self, doc: etree.Element) -> None: toc = self.md.serializer(div) for pp in self.md.postprocessors: toc = pp.run(toc) - self.md.toc_tokens = toc_tokens - self.md.toc = toc + self.md.toc_tokens = toc_tokens # type: ignore[attr-defined] + self.md.toc = toc # type: ignore[attr-defined] class TocExtension(Extension): TreeProcessorClass = TocTreeprocessor - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.config = { 'marker': [ '[TOC]', @@ -395,7 +413,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add TOC tree processor to Markdown. """ md.registerExtension(self) self.md = md @@ -404,8 +422,8 @@ def extendMarkdown(self, md): md.treeprocessors.register(tocext, 'toc', 5) def reset(self) -> None: - self.md.toc = '' - self.md.toc_tokens = [] + self.md.toc = '' # type: ignore[attr-defined] + self.md.toc_tokens = [] # type: ignore[attr-defined] def makeExtension(**kwargs): # pragma: no cover diff --git a/markdown/extensions/wikilinks.py b/markdown/extensions/wikilinks.py index 3f3cbe2dd..ed8e7335b 100644 --- a/markdown/extensions/wikilinks.py +++ b/markdown/extensions/wikilinks.py @@ -25,7 +25,10 @@ from ..inlinepatterns import InlineProcessor import xml.etree.ElementTree as etree import re -from typing import Any +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown def build_url(label: str, base: str, end: str) -> str: @@ -37,7 +40,7 @@ def build_url(label: str, base: str, end: str) -> str: class WikiLinkExtension(Extension): """ Add inline processor to Markdown. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.config = { 'base_url': ['/', 'String to append to beginning or URL.'], 'end_url': ['/', 'String to append to end of URL.'], @@ -47,7 +50,7 @@ def __init__(self, **kwargs): """ Default configuration options. """ super().__init__(**kwargs) - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: self.md = md # append to end of inline patterns @@ -60,11 +63,14 @@ def extendMarkdown(self, md): class WikiLinksInlineProcessor(InlineProcessor): """ Build link from `wikilink`. """ + md: Markdown + def __init__(self, pattern: str, config: dict[str, Any]): super().__init__(pattern) self.config = config def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str, int, int]: + a: etree.Element | str if m.group(1).strip(): base_url, end_url, html_class = self._getMeta() label = m.group(1).strip() diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 5155ef69d..faef4daac 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -28,7 +28,7 @@ import re import importlib.util import sys -from typing import TYPE_CHECKING, Sequence +from typing import TYPE_CHECKING, Any, Sequence if TYPE_CHECKING: # pragma: no cover from markdown import Markdown @@ -37,7 +37,9 @@ # Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it. # Users can still do `from html import parser` and get the default behavior. spec = importlib.util.find_spec('html.parser') -htmlparser = importlib.util.module_from_spec(spec) +assert spec is not None +htmlparser: Any = importlib.util.module_from_spec(spec) +assert spec.loader is not None spec.loader.exec_module(htmlparser) sys.modules['htmlparser'] = htmlparser @@ -80,6 +82,8 @@ class HTMLExtractor(htmlparser.HTMLParser): is stored in `cleandoc` as a list of strings. """ + md: Markdown + def __init__(self, md: Markdown, *args, **kwargs): if 'convert_charrefs' not in kwargs: kwargs['convert_charrefs'] = False @@ -93,7 +97,7 @@ def __init__(self, md: Markdown, *args, **kwargs): super().__init__(*args, **kwargs) self.md = md - def reset(self): + def reset(self) -> None: """Reset this instance. Loses all unprocessed data.""" self.inraw = False self.intail = False @@ -104,7 +108,7 @@ def reset(self): super().reset() - def close(self): + def close(self) -> None: """Handle any buffered data.""" super().close() if len(self.rawdata): @@ -160,7 +164,7 @@ def get_endtag_text(self, tag: str) -> str: # Failed to extract from raw data. Assume well formed and lowercase. return ''.format(tag) - def handle_starttag(self, tag: str, attrs: Sequence[tuple[str, str]]): + def handle_starttag(self, tag: str, attrs: Sequence[tuple[str, str]]) -> None: # Handle tags that should always be empty and do not specify a closing tag if tag in self.empty_tags: self.handle_startendtag(tag, attrs) @@ -181,7 +185,7 @@ def handle_starttag(self, tag: str, attrs: Sequence[tuple[str, str]]): # This is presumably a standalone tag in a code span (see #1036). self.clear_cdata_mode() - def handle_endtag(self, tag: str): + def handle_endtag(self, tag: str) -> None: text = self.get_endtag_text(tag) if self.inraw: @@ -208,7 +212,7 @@ def handle_endtag(self, tag: str): else: self.cleandoc.append(text) - def handle_data(self, data: str): + def handle_data(self, data: str) -> None: if self.intail and '\n' in data: self.intail = False if self.inraw: @@ -216,7 +220,7 @@ def handle_data(self, data: str): else: self.cleandoc.append(data) - def handle_empty_tag(self, data: str, is_block: bool): + def handle_empty_tag(self, data: str, is_block: bool) -> None: """ Handle empty tags (``). """ if self.inraw or self.intail: # Append this to the existing raw block @@ -239,25 +243,25 @@ def handle_empty_tag(self, data: str, is_block: bool): else: self.cleandoc.append(data) - def handle_startendtag(self, tag: str, attrs): + def handle_startendtag(self, tag: str, attrs: Sequence[tuple[str, str]]) -> None: self.handle_empty_tag(self.get_starttag_text(), is_block=self.md.is_block_level(tag)) - def handle_charref(self, name: str): + def handle_charref(self, name: str) -> None: self.handle_empty_tag('&#{};'.format(name), is_block=False) - def handle_entityref(self, name: str): + def handle_entityref(self, name: str) -> None: self.handle_empty_tag('&{};'.format(name), is_block=False) - def handle_comment(self, data: str): + def handle_comment(self, data: str) -> None: self.handle_empty_tag(''.format(data), is_block=True) - def handle_decl(self, data: str): + def handle_decl(self, data: str) -> None: self.handle_empty_tag(''.format(data), is_block=True) - def handle_pi(self, data: str): + def handle_pi(self, data: str) -> None: self.handle_empty_tag(''.format(data), is_block=True) - def unknown_decl(self, data: str): + def unknown_decl(self, data: str) -> None: end = ']]>' if data.startswith('CDATA[') else ']>' self.handle_empty_tag(' int: def get_starttag_text(self) -> str: """Return full source of start tag: `<...>`.""" + assert self.__starttag_text is not None return self.__starttag_text def parse_starttag(self, i: int) -> int: # pragma: no cover diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index ef6f0fbc6..4becb2988 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -44,13 +44,11 @@ from typing import TYPE_CHECKING, Any, Collection, NamedTuple import re import xml.etree.ElementTree as etree -try: # pragma: no cover - from html import entities -except ImportError: # pragma: no cover - import htmlentitydefs as entities +from html import entities if TYPE_CHECKING: # pragma: no cover from markdown import Markdown + from . import treeprocessors def build_inlinepatterns(md: Markdown, **kwargs: Any) -> util.Registry[InlineProcessor]: @@ -72,7 +70,7 @@ def build_inlinepatterns(md: Markdown, **kwargs: Any) -> util.Registry[InlinePro * finally we apply strong, emphasis, etc. """ - inlinePatterns = util.Registry() + inlinePatterns: util.Registry[InlineProcessor] = util.Registry() inlinePatterns.register(BacktickInlineProcessor(BACKTICK_RE), 'backtick', 190) inlinePatterns.register(EscapeInlineProcessor(ESCAPE_RE, md), 'escape', 180) inlinePatterns.register(ReferenceInlineProcessor(REFERENCE_RE, md), 'reference', 170) @@ -191,7 +189,7 @@ class EmStrongItem(NamedTuple): # ----------------------------------------------------------------------------- -class Pattern: # pragma: no cover +class _BasePattern: """ Base class that inline patterns subclass. @@ -241,42 +239,54 @@ def getCompiledRegExp(self) -> re.Pattern: """ Return a compiled regular expression. """ return self.compiled_re - def handleMatch(self, m: re.Match[str]) -> etree.Element | str: - """Return a ElementTree element from the given match. - - Subclasses should override this method. - - Arguments: - m: A match object containing a match of the pattern. - - Returns: An ElementTree Element object. - - """ - pass # pragma: no cover - def type(self) -> str: """ Return class name, to define pattern type """ return self.__class__.__name__ def unescape(self, text: str) -> str: """ Return unescaped text given text with an inline placeholder. """ + assert self.md is not None try: - stash = self.md.treeprocessors['inline'].stashed_nodes + inlineprocessor: treeprocessors.InlineProcessor + inlineprocessor = self.md.treeprocessors['inline'] # type: ignore[assignment] + stash = inlineprocessor.stashed_nodes except KeyError: # pragma: no cover return text - def get_stash(m): + def get_stash(m: re.Match[str]) -> str: id = m.group(1) if id in stash: - value = stash.get(id) + value = stash[id] if isinstance(value, str): return value else: # An `etree` Element - return text content only return ''.join(value.itertext()) + return '' return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) +class LegacyPattern(_BasePattern): + def handleMatch(self, m: re.Match[str]) -> etree.Element | str: + """Return a ElementTree element from the given match. + + Subclasses should override this method. + + Arguments: + m: A match object containing a match of the pattern. + + Returns: An ElementTree Element object. + + """ + raise NotImplementedError() # pragma: no cover + + +if TYPE_CHECKING: # pragma: no cover + Pattern = _BasePattern +else: + Pattern = LegacyPattern + + class InlineProcessor(Pattern): """ Base class that inline processors subclass. @@ -322,7 +332,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str end: The end of the region that has been matched or None. """ - pass # pragma: no cover + raise NotImplementedError() # pragma: no cover class SimpleTextPattern(Pattern): # pragma: no cover @@ -342,6 +352,8 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: class EscapeInlineProcessor(InlineProcessor): """ Return an escaped character. """ + md: Markdown + def handleMatch(self, m: re.Match[str], data: str) -> tuple[str | None, int, int]: """ If the character matched by `group(1)` of a pattern is in [`ESCAPED_CHARS`][markdown.Markdown.ESCAPED_CHARS] @@ -499,6 +511,9 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, class HtmlInlineProcessor(InlineProcessor): """ Store raw inline html and return a placeholder. """ + + md: Markdown + def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: """ Store the text of `group(1)` of a pattern and return a placeholder string. """ rawhtml = self.backslash_unescape(self.unescape(m.group(1))) @@ -508,7 +523,9 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: def unescape(self, text: str) -> str: """ Return unescaped text given text with an inline placeholder. """ try: - stash = self.md.treeprocessors['inline'].stashed_nodes + inlineprocessor: treeprocessors.InlineProcessor + inlineprocessor = self.md.treeprocessors['inline'] # type: ignore[assignment] + stash = inlineprocessor.stashed_nodes except KeyError: # pragma: no cover return text @@ -517,16 +534,20 @@ def get_stash(m: re.Match[str]) -> str: value = stash.get(id) if value is not None: try: + assert isinstance(value, etree.Element) return self.md.serializer(value) except Exception: return r'\%s' % value + return '' return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) def backslash_unescape(self, text: str) -> str: """ Return text with backslash escapes undone (backslashes are restored). """ try: - RE = self.md.treeprocessors['unescape'].RE + unescape_processor: treeprocessors.UnescapeTreeprocessor + unescape_processor = self.md.treeprocessors['unescape'] # type: ignore[assignment] + RE = unescape_processor.RE except KeyError: # pragma: no cover return text @@ -875,6 +896,8 @@ class ReferenceInlineProcessor(LinkInlineProcessor): RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE) + md: Markdown + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]: """ Return [`Element`][xml.etree.ElementTree.Element] returned by `makeTag` method or `(None, None, None)`. @@ -887,6 +910,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None id, end, handled = self.evalId(data, index, text) if not handled: return None, None, None + assert id is not None # Clean up line breaks in id id = self.NEWLINE_CLEANUP_RE.sub(' ', id) diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 7f5ede90c..f05dc4487 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -39,7 +39,7 @@ def build_postprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Postprocessor]: """ Build the default postprocessors for Markdown. """ - postprocessors = util.Registry() + postprocessors: util.Registry[Postprocessor] = util.Registry() postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30) postprocessors.register(AndSubstitutePostprocessor(), 'amp_substitute', 20) return postprocessors @@ -63,7 +63,7 @@ def run(self, text: str) -> str: (possibly modified) string. """ - pass # pragma: no cover + raise NotImplementedError() # pragma: no cover class RawHtmlPostprocessor(Postprocessor): @@ -71,11 +71,14 @@ class RawHtmlPostprocessor(Postprocessor): BLOCK_LEVEL_REGEX = re.compile(r'^\<\/?([^ >]+)') + md: Markdown + def run(self, text: str) -> str: """ Iterate over html stash and restore html. """ replacements = OrderedDict() for i in range(self.md.htmlStash.html_counter): - html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) + raw: str = self.md.htmlStash.rawHtmlBlocks[i] # type: ignore[assignment] + html = self.stash_to_string(raw) if self.isblocklevel(html): replacements["

{}

".format( self.md.htmlStash.get_placeholder(i))] = html diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index 0f63cdd36..25cf86de3 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -36,7 +36,7 @@ def build_preprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Preprocessor]: """ Build and return the default set of preprocessors used by Markdown. """ - preprocessors = util.Registry() + preprocessors: util.Registry[Preprocessor] = util.Registry() preprocessors.register(NormalizeWhitespace(md), 'normalize_whitespace', 30) preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20) return preprocessors @@ -60,12 +60,14 @@ def run(self, lines: list[str]) -> list[str]: the (possibly modified) list of lines. """ - pass # pragma: no cover + raise NotImplementedError() # pragma: no cover class NormalizeWhitespace(Preprocessor): """ Normalize whitespace for consistent parsing. """ + md: Markdown + def run(self, lines: list[str]) -> list[str]: source = '\n'.join(lines) source = source.replace(util.STX, "").replace(util.ETX, "") @@ -83,6 +85,8 @@ class HtmlBlockPreprocessor(Preprocessor): [`Markdown`][markdown.Markdown] instance. """ + md: Markdown + def run(self, lines: list[str]) -> list[str]: source = '\n'.join(lines) parser = HTMLExtractor(self.md) diff --git a/markdown/py.typed b/markdown/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/markdown/serializers.py b/markdown/serializers.py index 573b26483..29ce305b0 100644 --- a/markdown/serializers.py +++ b/markdown/serializers.py @@ -45,10 +45,10 @@ from __future__ import annotations -from xml.etree.ElementTree import ProcessingInstruction -from xml.etree.ElementTree import Comment, ElementTree, Element, QName, HTML_EMPTY +from xml.etree.ElementTree import ProcessingInstruction, Comment, ElementTree, Element, QName +from xml.etree.ElementTree import HTML_EMPTY # type: ignore[attr-defined] import re -from typing import Callable, Literal, NoReturn +from typing import Callable, Iterable, Literal, NoReturn __all__ = ['to_html_string', 'to_xhtml_string'] @@ -123,7 +123,7 @@ def _serialize_html(write: Callable[[str], None], elem: Element, format: Literal elif tag is ProcessingInstruction: write("" % _escape_cdata(text)) elif tag is None: - if text: + if text: # type: ignore[unreachable] write(_escape_cdata(text)) for e in elem: _serialize_html(write, e, format) @@ -136,7 +136,7 @@ def _serialize_html(write: Callable[[str], None], elem: Element, format: Literal else: raise ValueError('QName objects must define a tag.') write("<" + tag) - items = elem.items() + items: Iterable[tuple[str, str]] = elem.items() if items: items = sorted(items) # lexical order for k, v in items: diff --git a/markdown/test_tools.py b/markdown/test_tools.py index 895e44ec5..5f2cfbd8f 100644 --- a/markdown/test_tools.py +++ b/markdown/test_tools.py @@ -29,7 +29,7 @@ from . import markdown, Markdown, util try: - import tidylib + import tidylib # type: ignore except ImportError: tidylib = None diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index dc857204b..e9eb190cd 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -34,18 +34,19 @@ if TYPE_CHECKING: # pragma: no cover from markdown import Markdown + from typing import TypeGuard def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Treeprocessor]: """ Build the default `treeprocessors` for Markdown. """ - treeprocessors = util.Registry() + treeprocessors: util.Registry[Treeprocessor] = util.Registry() treeprocessors.register(InlineProcessor(md), 'inline', 20) treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10) treeprocessors.register(UnescapeTreeprocessor(md), 'unescape', 0) return treeprocessors -def isString(s: object) -> bool: +def isString(s: object) -> TypeGuard[str]: """ Return `True` if object is a string but not an [`AtomicString`][markdown.util.AtomicString]. """ if not isinstance(s, util.AtomicString): return isinstance(s, str) @@ -69,7 +70,7 @@ def run(self, root: etree.Element) -> etree.Element | None: object, and the existing root `Element` will be replaced, or it can modify the current tree and return `None`. """ - pass # pragma: no cover + raise NotImplementedError() # pragma: no cover class InlineProcessor(Treeprocessor): @@ -204,7 +205,7 @@ def linkText(text: str | None) -> None: parent.text += text else: parent.text = text - result = [] + result: list[tuple[etree.Element, list[str]]] = [] strartIndex = 0 while data: index = data.find(self.__placeholder_prefix, strartIndex) @@ -212,13 +213,13 @@ def linkText(text: str | None) -> None: id, phEndIndex = self.__findPlaceholder(data, index) if id in self.stashed_nodes: - node = self.stashed_nodes.get(id) + node = self.stashed_nodes[id] if index > 0: text = data[strartIndex:index] linkText(text) - if not isString(node): # it's Element + if not isinstance(node, str): # it's Element for child in [node] + list(node): if child.tail: if child.tail.strip(): @@ -252,7 +253,7 @@ def linkText(text: str | None) -> None: def __applyPattern( self, - pattern: inlinepatterns.Pattern, + pattern: inlinepatterns.InlineProcessor | inlinepatterns.LegacyPattern, data: str, patternIndex: int, startIndex: int = 0 @@ -271,7 +272,12 @@ def __applyPattern( String with placeholders instead of `ElementTree` elements. """ - new_style = isinstance(pattern, inlinepatterns.InlineProcessor) + if isinstance(pattern, inlinepatterns.InlineProcessor): + new_style = True + new_pattern = pattern + else: + new_style = False + legacy_pattern = pattern for exclude in pattern.ANCESTOR_EXCLUDES: if exclude.lower() in self.ancestors: @@ -282,29 +288,27 @@ def __applyPattern( # Since `handleMatch` may reject our first match, # we iterate over the buffer looking for matches # until we can't find any more. - for match in pattern.getCompiledRegExp().finditer(data, startIndex): - node, start, end = pattern.handleMatch(match, data) - if start is None or end is None: - startIndex += match.end(0) - match = None - continue - break + for try_match in new_pattern.getCompiledRegExp().finditer(data, startIndex): + try_node, try_start, try_end = new_pattern.handleMatch(try_match, data) + if try_start is not None and try_end is not None: + match, node, start, end = try_match, try_node, try_start, try_end + break else: # pragma: no cover - match = pattern.getCompiledRegExp().match(data[startIndex:]) + match = legacy_pattern.getCompiledRegExp().match(data[startIndex:]) leftData = data[:startIndex] if not match: return data, False, 0 if not new_style: # pragma: no cover - node = pattern.handleMatch(match) + node = legacy_pattern.handleMatch(match) start = match.start(0) end = match.end(0) if node is None: return data, True, end - if not isString(node): + if not isinstance(node, str): if not isinstance(node.text, util.AtomicString): # We need to process current node too for child in [node] + list(node): @@ -373,7 +377,7 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. self.ancestors = parents self.__build_ancestors(currElement, self.ancestors) - insertQueue = [] + insertQueue: list[tuple[etree.Element, list[tuple[etree.Element, list[str]]]]] = [] for child in currElement: if child.text and not isinstance( child.text, util.AtomicString @@ -398,9 +402,9 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. child.tail = dumby.tail pos = list(currElement).index(child) + 1 tailResult.reverse() - for newChild in tailResult: - self.parent_map[newChild[0]] = currElement - currElement.insert(pos, newChild[0]) + for subChild in tailResult: + self.parent_map[subChild[0]] = currElement + currElement.insert(pos, subChild[0]) if len(child): self.parent_map[child] = currElement stack.append((child, self.ancestors[:])) @@ -415,6 +419,8 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. class PrettifyTreeprocessor(Treeprocessor): """ Add line breaks to the html document. """ + md: Markdown + def _prettifyETree(self, elem: etree.Element) -> None: """ Recursively add line breaks to `ElementTree` children. """ diff --git a/markdown/util.py b/markdown/util.py index b4642023e..0741b6ea6 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -135,6 +135,16 @@ def deprecated_func(*args, **kwargs): return wrapper +@overload +def parseBoolValue(value: str) -> bool: + ... # pragma: no cover + + +@overload +def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None: + ... # pragma: no cover + + def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None: """Parses a string representing a boolean value. If parsing was successful, returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`, @@ -152,6 +162,7 @@ def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none return False elif fail_on_errors: raise ValueError('Cannot parse bool value: %r' % value) + return None def code_escape(text: str) -> str: @@ -171,9 +182,12 @@ def _get_stack_depth(size: int = 2) -> int: frame = sys._getframe(size) for size in count(size): - frame = frame.f_back - if not frame: + next_frame = frame.f_back + if next_frame is None: return size + frame = next_frame + + raise RuntimeError("Could not get stack depth") def nearing_recursion_limit() -> bool: @@ -218,7 +232,7 @@ class HtmlStash: in the beginning and replace with place-holders. """ - def __init__(self): + def __init__(self) -> None: """ Create an `HtmlStash`. """ self.html_counter = 0 # for counting inline html segments self.rawHtmlBlocks: list[str | etree.Element] = [] @@ -309,7 +323,7 @@ class Registry(Generic[_T]): an item using that item's assigned "name". """ - def __init__(self): + def __init__(self) -> None: self._data: dict[str, _T] = {} self._priority: list[_PriorityItem] = [] self._is_sorted = False diff --git a/pyproject.toml b/pyproject.toml index 8c9e9bcfe..45eb20506 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,5 +95,12 @@ legacy_em = 'markdown.extensions.legacy_em:LegacyEmExtension' [tool.setuptools] packages = ['markdown', 'markdown.extensions'] +[tool.setuptools.package-data] +"markdown" = ["py.typed"] + [tool.setuptools.dynamic] version = {attr = 'markdown.__meta__.__version__'} + +[tool.mypy] +warn_unreachable = true +show_error_codes = true diff --git a/tox.ini b/tox.ini index d071054ea..7e528d478 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{38, 39, 310, 311, 312}, pypy{38, 39, 310}, pygments, flake8, checkspelling, pep517check, checklinks +envlist = py{38, 39, 310, 311, 312}, pypy{38, 39, 310}, pygments, mypy, flake8, checkspelling, pep517check, checklinks isolated_build = True [testenv] @@ -19,6 +19,15 @@ deps = pytidylib pygments=={env:PYGMENTS_VERSION} +[testenv:mypy] +deps = + mypy + types-PyYAML + types-Pygments +allowlist_externals = mypy +commands = mypy {toxinidir}/markdown +skip_install = true + [testenv:flake8] deps = flake8 allowlist_externals = flake8