From b79d59d0605bc0c796625d048adb2a3d77669086 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Tue, 31 Oct 2023 22:11:51 +0100 Subject: [PATCH 1/5] Fix all mypy typechecking errors, enable the check Functions that don't have any type annotations aren't checked by mypy, they are skipped for now in this commit --- .github/workflows/tox.yml | 2 +- markdown/__main__.py | 9 +++-- markdown/blockprocessors.py | 10 +++-- markdown/core.py | 52 +++++++++++++------------- markdown/extensions/__init__.py | 4 +- markdown/extensions/attr_list.py | 17 +++++---- markdown/extensions/codehilite.py | 15 ++++---- markdown/extensions/smarty.py | 8 +++- markdown/htmlparser.py | 6 ++- markdown/inlinepatterns.py | 61 ++++++++++++++++++++----------- markdown/postprocessors.py | 6 ++- markdown/preprocessors.py | 6 ++- markdown/serializers.py | 4 +- markdown/test_tools.py | 2 +- markdown/treeprocessors.py | 56 +++++++++++++++------------- markdown/util.py | 19 ++++++++-- pyproject.toml | 4 ++ tests/test_apis.py | 8 ++-- tox.ini | 11 +++++- 19 files changed, 183 insertions(+), 117 deletions(-) diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 9d258030..de91d997 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -71,7 +71,7 @@ jobs: fail-fast: false max-parallel: 4 matrix: - tox-env: [flake8, pep517check, checkspelling] + tox-env: [mypy, flake8, pep517check, checkspelling] env: TOXENV: ${{ matrix.tox-env }} diff --git a/markdown/__main__.py b/markdown/__main__.py index c323aaac..3ffb9ae4 100644 --- a/markdown/__main__.py +++ b/markdown/__main__.py @@ -24,6 +24,11 @@ import codecs import warnings import markdown +import logging +from logging import DEBUG, WARNING, CRITICAL +from typing import Any, Callable, IO + +yaml_load: Callable[[IO], Any] try: # We use `unsafe_load` because users may need to pass in actual Python # objects. As this is only available from the CLI, the user has much @@ -32,13 +37,11 @@ except ImportError: # pragma: no cover try: # Fall back to PyYAML <5.1 - from yaml import load as yaml_load + from yaml import load as yaml_load # type: ignore except ImportError: # Fall back to JSON from json import load as yaml_load -import logging -from logging import DEBUG, WARNING, CRITICAL logger = logging.getLogger('MARKDOWN') diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index d8084680..d32e7eca 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -121,7 +121,7 @@ def test(self, parent: etree.Element, block: str) -> bool: parent: An `etree` element which will be the parent of the block. block: A block of text from the source which has been split at blank lines. """ - pass # pragma: no cover + raise NotImplementedError() # pragma: no cover def run(self, parent: etree.Element, blocks: list[str]) -> bool | None: """ Run processor. Must be overridden by subclasses. @@ -147,7 +147,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None: parent: An `etree` element which is the parent of the current block. blocks: A list of all remaining blocks of the document. """ - pass # pragma: no cover + raise NotImplementedError() # pragma: no cover class ListIndentProcessor(BlockProcessor): @@ -417,7 +417,7 @@ def run(self, parent, blocks): def get_items(self, block: str) -> list[str]: """ Break a block into list items. """ - items = [] + items: list[str] = [] for line in block.split('\n'): m = self.CHILD_RE.match(line) if m: @@ -426,7 +426,9 @@ def get_items(self, block: str) -> list[str]: if not items and self.TAG == 'ol': # Detect the integer value of first list item INTEGER_RE = re.compile(r'(\d+)') - self.STARTSWITH = INTEGER_RE.match(m.group(1)).group() + int_match = INTEGER_RE.match(m.group(1)) + assert int_match is not None + self.STARTSWITH = int_match.group() # Append to the list items.append(m.group(3)) elif self.INDENT_RE.match(line): diff --git a/markdown/core.py b/markdown/core.py index 6b556b45..42e1faba 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -23,7 +23,7 @@ import sys import logging import importlib -from typing import TYPE_CHECKING, Any, Callable, ClassVar, Mapping, Sequence, TextIO +from typing import TYPE_CHECKING, Any, BinaryIO, Callable, ClassVar, Mapping, Sequence from . import util from .preprocessors import build_preprocessors from .blockprocessors import build_block_parser @@ -85,7 +85,7 @@ class Markdown: callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs: Any): """ Creates a new Markdown instance. @@ -183,7 +183,7 @@ def registerExtensions( 'Successfully loaded extension "%s.%s".' % (ext.__class__.__module__, ext.__class__.__name__) ) - elif ext is not None: + elif ext is not None: # type: ignore[unreachable] raise TypeError( 'Extension "{}.{}" must be of type: "{}.{}"'.format( ext.__class__.__module__, ext.__class__.__name__, @@ -387,8 +387,8 @@ def convert(self, source: str) -> str: def convertFile( self, - input: str | TextIO | None = None, - output: str | TextIO | None = None, + input: str | BinaryIO | None = None, + output: str | BinaryIO | None = None, encoding: str | None = None, ) -> Markdown: """ @@ -417,15 +417,13 @@ def convertFile( # Read the source if input: if isinstance(input, str): - input_file = codecs.open(input, mode="r", encoding=encoding) + with codecs.open(input, mode="r", encoding=encoding) as input_file: + text = input_file.read() else: - input_file = codecs.getreader(encoding)(input) - text = input_file.read() - input_file.close() + with codecs.getreader(encoding)(input) as input_file: + text = input_file.read() else: text = sys.stdin.read() - if not isinstance(text, str): # pragma: no cover - text = text.decode(encoding) text = text.lstrip('\ufeff') # remove the byte-order mark @@ -442,18 +440,14 @@ def convertFile( output_file.close() else: writer = codecs.getwriter(encoding) - output_file = writer(output, errors="xmlcharrefreplace") - output_file.write(html) + output_writer = writer(output, errors="xmlcharrefreplace") + output_writer.write(html) # Don't close here. User may want to write more. else: # Encode manually and write bytes to stdout. - html = html.encode(encoding, "xmlcharrefreplace") - try: - # Write bytes directly to buffer (Python 3). - sys.stdout.buffer.write(html) - except AttributeError: # pragma: no cover - # Probably Python 2, which works with bytes by default. - sys.stdout.write(html) + html_bytes = html.encode(encoding, "xmlcharrefreplace") + # Write bytes directly to buffer (Python 3). + sys.stdout.buffer.write(html_bytes) return self @@ -489,7 +483,13 @@ def markdown(text: str, **kwargs: Any) -> str: return md.convert(text) -def markdownFromFile(**kwargs: Any): +def markdownFromFile( + *, + input: str | BinaryIO | None = None, + output: str | BinaryIO | None = None, + encoding: str | None = None, + **kwargs: Any +): """ Read Markdown text from a file and write output to a file or a stream. @@ -498,13 +498,11 @@ def markdownFromFile(**kwargs: Any): [`convert`][markdown.Markdown.convert]. Keyword arguments: - input (str | TextIO): A file name or readable object. - output (str | TextIO): A file name or writable object. - encoding (str): Encoding of input and output. + input: A file name or readable object. + output: A file name or writable object. + encoding: Encoding of input and output. **kwargs: Any arguments accepted by the `Markdown` class. """ md = Markdown(**kwargs) - md.convertFile(kwargs.get('input', None), - kwargs.get('output', None), - kwargs.get('encoding', None)) + md.convertFile(input, output, encoding) diff --git a/markdown/extensions/__init__.py b/markdown/extensions/__init__.py index 070c4cce..18c88a8a 100644 --- a/markdown/extensions/__init__.py +++ b/markdown/extensions/__init__.py @@ -27,7 +27,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Mapping, Sequence +from typing import TYPE_CHECKING, Any, Iterable, Mapping from ..util import parseBoolValue if TYPE_CHECKING: # pragma: no cover @@ -112,7 +112,7 @@ def setConfig(self, key: str, value: Any) -> None: value = parseBoolValue(value, preserve_none=True) self.config[key][0] = value - def setConfigs(self, items: Mapping[str, Any] | Sequence[tuple[str, Any]]): + def setConfigs(self, items: Mapping[str, Any] | Iterable[tuple[str, Any]]): """ Loop through a collection of configuration options, passing each to [`setConfig`][markdown.extensions.Extension.setConfig]. diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index 0c317d1b..d15d24df 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -32,6 +32,7 @@ if TYPE_CHECKING: # pragma: no cover from xml.etree.ElementTree import Element + from markdown import Markdown def _handle_double_quote(s, t): @@ -56,7 +57,7 @@ def _handle_word(s, t): return t, t -_scanner = re.Scanner([ +_scanner = re.Scanner([ # type: ignore[attr-defined] (r'[^ =]+=".*?"', _handle_double_quote), (r"[^ =]+='.*?'", _handle_single_quote), (r'[^ =]+=[^ =]+', _handle_key_value), @@ -86,6 +87,8 @@ class AttrListTreeprocessor(Treeprocessor): r'\uf900-\ufdcf\ufdf0-\ufffd' r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') + md: Markdown + def run(self, doc: Element): for elem in doc.iter(): if self.md.is_block_level(elem.tag): @@ -102,18 +105,18 @@ def run(self, doc: Element): if child.tag in ['ul', 'ol']: pos = i break - if pos is None and elem[-1].tail: + if pos is None and (tail := elem[-1].tail): # use tail of last child. no `ul` or `ol`. - m = RE.search(elem[-1].tail) + m = RE.search(tail) if m: self.assign_attrs(elem, m.group(1)) - elem[-1].tail = elem[-1].tail[:m.start()] - elif pos is not None and pos > 0 and elem[pos-1].tail: + elem[-1].tail = tail[:m.start()] + elif pos is not None and pos > 0 and (tail := elem[pos-1].tail): # use tail of last child before `ul` or `ol` - m = RE.search(elem[pos-1].tail) + m = RE.search(tail) if m: self.assign_attrs(elem, m.group(1)) - elem[pos-1].tail = elem[pos-1].tail[:m.start()] + elem[pos-1].tail = tail[:m.start()] elif elem.text: # use text. `ul` is first child. m = RE.search(elem.text) diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py index f8d25b0f..06acaec6 100644 --- a/markdown/extensions/codehilite.py +++ b/markdown/extensions/codehilite.py @@ -24,6 +24,7 @@ from . import Extension from ..treeprocessors import Treeprocessor from ..util import parseBoolValue +from typing import Callable try: # pragma: no cover from pygments import highlight @@ -110,11 +111,11 @@ class CodeHilite: def __init__(self, src: str, **options): self.src = src - self.lang = options.pop('lang', None) - self.guess_lang = options.pop('guess_lang', True) - self.use_pygments = options.pop('use_pygments', True) - self.lang_prefix = options.pop('lang_prefix', 'language-') - self.pygments_formatter = options.pop('pygments_formatter', 'html') + self.lang: str | None = options.pop('lang', None) + self.guess_lang: bool = options.pop('guess_lang', True) + self.use_pygments: bool = options.pop('use_pygments', True) + self.lang_prefix: str = options.pop('lang_prefix', 'language-') + self.pygments_formatter: str | Callable = options.pop('pygments_formatter', 'html') if 'linenos' not in options: options['linenos'] = options.pop('linenums', None) @@ -146,7 +147,7 @@ def hilite(self, shebang=True) -> str: if pygments and self.use_pygments: try: - lexer = get_lexer_by_name(self.lang, **self.options) + lexer = get_lexer_by_name(self.lang or '', **self.options) except ValueError: try: if self.guess_lang: @@ -157,7 +158,7 @@ def hilite(self, shebang=True) -> str: lexer = get_lexer_by_name('text', **self.options) if not self.lang: # Use the guessed lexer's language instead - self.lang = lexer.aliases[0] + self.lang = lexer.aliases[0] # type: ignore[attr-defined] lang_str = f'{self.lang_prefix}{self.lang}' if isinstance(self.pygments_formatter, str): try: diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index 3274bf86..e480df4f 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -90,7 +90,11 @@ from ..inlinepatterns import HtmlInlineProcessor, HTML_RE from ..treeprocessors import InlineProcessor from ..util import Registry +from markdown import Markdown +from typing import TYPE_CHECKING +if TYPE_CHECKING: # pragma: no cover + from .. import inlinepatterns # Constants for quote education. punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" @@ -241,9 +245,9 @@ def educateQuotes(self, md) -> None: ) self._addPatterns(md, patterns, 'quotes', 30) - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown): configs = self.getConfigs() - self.inlinePatterns: Registry[HtmlInlineProcessor] = Registry() + self.inlinePatterns: Registry[inlinepatterns.InlineProcessor] = Registry() if configs['smart_ellipses']: self.educateEllipses(md) if configs['smart_quotes']: diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index 29e23009..f68d064c 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -28,12 +28,15 @@ import re import importlib.util import sys +from typing import Any # Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it. # Users can still do `from html import parser` and get the default behavior. spec = importlib.util.find_spec('html.parser') -htmlparser = importlib.util.module_from_spec(spec) +assert spec is not None +htmlparser: Any = importlib.util.module_from_spec(spec) +assert spec.loader is not None spec.loader.exec_module(htmlparser) sys.modules['htmlparser'] = htmlparser @@ -281,6 +284,7 @@ def parse_html_declaration(self, i: int) -> int: def get_starttag_text(self) -> str: """Return full source of start tag: `<...>`.""" + assert self.__starttag_text is not None return self.__starttag_text def parse_starttag(self, i: int) -> int: # pragma: no cover diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 296ab834..23b39079 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -44,13 +44,11 @@ from typing import TYPE_CHECKING, Any, Collection, NamedTuple import re import xml.etree.ElementTree as etree -try: # pragma: no cover - from html import entities -except ImportError: # pragma: no cover - import htmlentitydefs as entities +from html import entities if TYPE_CHECKING: # pragma: no cover from markdown import Markdown + from . import treeprocessors def build_inlinepatterns(md: Markdown, **kwargs: Any) -> util.Registry[InlineProcessor]: @@ -72,7 +70,7 @@ def build_inlinepatterns(md: Markdown, **kwargs: Any) -> util.Registry[InlinePro * finally we apply strong, emphasis, etc. """ - inlinePatterns = util.Registry() + inlinePatterns: util.Registry[InlineProcessor] = util.Registry() inlinePatterns.register(BacktickInlineProcessor(BACKTICK_RE), 'backtick', 190) inlinePatterns.register(EscapeInlineProcessor(ESCAPE_RE, md), 'escape', 180) inlinePatterns.register(ReferenceInlineProcessor(REFERENCE_RE, md), 'reference', 170) @@ -191,7 +189,7 @@ class EmStrongItem(NamedTuple): # ----------------------------------------------------------------------------- -class Pattern: # pragma: no cover +class _BasePattern: """ Base class that inline patterns subclass. @@ -217,6 +215,9 @@ class initialization, the `^(.*)` and `(.*)!` are added automatically and the re would cause the content to be a descendant of one of the listed tag names. """ + compiled_re: re.Pattern[str] + md: Markdown | None + def __init__(self, pattern: str, md: Markdown | None = None): """ Create an instant of an inline pattern. @@ -238,27 +239,17 @@ def getCompiledRegExp(self) -> re.Pattern: """ Return a compiled regular expression. """ return self.compiled_re - def handleMatch(self, m: re.Match[str]) -> etree.Element | str: - """Return a ElementTree element from the given match. - - Subclasses should override this method. - - Arguments: - m: A match object containing a match of the pattern. - - Returns: An ElementTree Element object. - - """ - pass # pragma: no cover - def type(self) -> str: """ Return class name, to define pattern type """ return self.__class__.__name__ def unescape(self, text: str) -> str: """ Return unescaped text given text with an inline placeholder. """ + assert self.md is not None try: - stash = self.md.treeprocessors['inline'].stashed_nodes + inlineprocessor: treeprocessors.InlineProcessor + inlineprocessor = self.md.treeprocessors['inline'] # type: ignore[assignment] + stash = inlineprocessor.stashed_nodes except KeyError: # pragma: no cover return text @@ -274,6 +265,27 @@ def get_stash(m): return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) +class LegacyPattern(_BasePattern): + def handleMatch(self, m: re.Match[str]) -> etree.Element | str: + """Return a ElementTree element from the given match. + + Subclasses should override this method. + + Arguments: + m: A match object containing a match of the pattern. + + Returns: An ElementTree Element object. + + """ + raise NotImplementedError() # pragma: no cover + + +if TYPE_CHECKING: # pragma: no cover + Pattern = _BasePattern +else: + Pattern = LegacyPattern + + class InlineProcessor(Pattern): """ Base class that inline processors subclass. @@ -319,7 +331,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str end: The end of the region that has been matched or None. """ - pass # pragma: no cover + raise NotImplementedError() # pragma: no cover class SimpleTextPattern(Pattern): # pragma: no cover @@ -339,6 +351,8 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: class EscapeInlineProcessor(InlineProcessor): """ Return an escaped character. """ + md: Markdown + def handleMatch(self, m: re.Match[str], data: str) -> tuple[str | None, int, int]: """ If the character matched by `group(1)` of a pattern is in [`ESCAPED_CHARS`][markdown.Markdown.ESCAPED_CHARS] @@ -496,6 +510,9 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, class HtmlInlineProcessor(InlineProcessor): """ Store raw inline html and return a placeholder. """ + + md: Markdown + def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: """ Store the text of `group(1)` of a pattern and return a placeholder string. """ rawhtml = self.backslash_unescape(self.unescape(m.group(1))) @@ -875,6 +892,8 @@ class ReferenceInlineProcessor(LinkInlineProcessor): RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE) + md: Markdown + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]: """ Return [`Element`][xml.etree.ElementTree.Element] returned by `makeTag` method or `(None, None, None)`. diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index 3da5ee1a..e558a23b 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -39,7 +39,7 @@ def build_postprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Postprocessor]: """ Build the default postprocessors for Markdown. """ - postprocessors = util.Registry() + postprocessors: util.Registry[Postprocessor] = util.Registry() postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30) postprocessors.register(AndSubstitutePostprocessor(), 'amp_substitute', 20) return postprocessors @@ -63,7 +63,7 @@ def run(self, text: str) -> str: (possibly modified) string. """ - pass # pragma: no cover + raise NotImplementedError() # pragma: no cover class RawHtmlPostprocessor(Postprocessor): @@ -71,6 +71,8 @@ class RawHtmlPostprocessor(Postprocessor): BLOCK_LEVEL_REGEX = re.compile(r'^\<\/?([^ >]+)') + md: Markdown + def run(self, text: str): """ Iterate over html stash and restore html. """ replacements = OrderedDict() diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index 0f63cdd3..a49ae2c2 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -36,7 +36,7 @@ def build_preprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Preprocessor]: """ Build and return the default set of preprocessors used by Markdown. """ - preprocessors = util.Registry() + preprocessors: util.Registry[Preprocessor] = util.Registry() preprocessors.register(NormalizeWhitespace(md), 'normalize_whitespace', 30) preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20) return preprocessors @@ -60,12 +60,14 @@ def run(self, lines: list[str]) -> list[str]: the (possibly modified) list of lines. """ - pass # pragma: no cover + raise NotImplementedError() # pragma: no cover class NormalizeWhitespace(Preprocessor): """ Normalize whitespace for consistent parsing. """ + md: Markdown + def run(self, lines: list[str]) -> list[str]: source = '\n'.join(lines) source = source.replace(util.STX, "").replace(util.ETX, "") diff --git a/markdown/serializers.py b/markdown/serializers.py index 5a8818e2..468538e1 100644 --- a/markdown/serializers.py +++ b/markdown/serializers.py @@ -45,8 +45,8 @@ from __future__ import annotations -from xml.etree.ElementTree import ProcessingInstruction -from xml.etree.ElementTree import Comment, ElementTree, Element, QName, HTML_EMPTY +from xml.etree.ElementTree import ProcessingInstruction, Comment, ElementTree, Element, QName +from xml.etree.ElementTree import HTML_EMPTY # type: ignore[attr-defined] import re __all__ = ['to_html_string', 'to_xhtml_string'] diff --git a/markdown/test_tools.py b/markdown/test_tools.py index 895e44ec..5f2cfbd8 100644 --- a/markdown/test_tools.py +++ b/markdown/test_tools.py @@ -29,7 +29,7 @@ from . import markdown, Markdown, util try: - import tidylib + import tidylib # type: ignore except ImportError: tidylib = None diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 59a3eb3c..0e8d4d5d 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -34,18 +34,19 @@ if TYPE_CHECKING: # pragma: no cover from markdown import Markdown + from typing import TypeGuard def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Treeprocessor]: """ Build the default `treeprocessors` for Markdown. """ - treeprocessors = util.Registry() + treeprocessors: util.Registry[Treeprocessor] = util.Registry() treeprocessors.register(InlineProcessor(md), 'inline', 20) treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10) treeprocessors.register(UnescapeTreeprocessor(md), 'unescape', 0) return treeprocessors -def isString(s: Any) -> bool: +def isString(s: object) -> TypeGuard[str]: """ Return `True` if object is a string but not an [`AtomicString`][markdown.util.AtomicString]. """ if not isinstance(s, util.AtomicString): return isinstance(s, str) @@ -69,7 +70,7 @@ def run(self, root: etree.Element) -> etree.Element | None: object, and the existing root `Element` will be replaced, or it can modify the current tree and return `None`. """ - pass # pragma: no cover + raise NotImplementedError() # pragma: no cover class InlineProcessor(Treeprocessor): @@ -77,7 +78,7 @@ class InlineProcessor(Treeprocessor): A `Treeprocessor` that traverses a tree, applying inline patterns. """ - def __init__(self, md): + def __init__(self, md: Markdown): self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX self.__placeholder_suffix = util.ETX self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ @@ -85,7 +86,7 @@ def __init__(self, md): self.__placeholder_re = util.INLINE_PLACEHOLDER_RE self.md = md self.inlinePatterns = md.inlinePatterns - self.ancestors = [] + self.ancestors: list[str] = [] def __makePlaceholder(self, type) -> tuple[str, str]: """ Generate a placeholder """ @@ -171,10 +172,10 @@ def __processElementText(self, node: etree.Element, subnode: etree.Element, isTe def __processPlaceholders( self, - data: str, + data: str | None, parent: etree.Element, isText: bool = True - ) -> list[tuple[etree.Element, Any]]: + ) -> list[tuple[etree.Element, list[str]]]: """ Process string with placeholders and generate `ElementTree` tree. @@ -187,7 +188,7 @@ def __processPlaceholders( List with `ElementTree` elements with applied inline patterns. """ - def linkText(text): + def linkText(text: str | None): if text: if result: if result[-1][0].tail: @@ -212,7 +213,7 @@ def linkText(text): id, phEndIndex = self.__findPlaceholder(data, index) if id in self.stashed_nodes: - node = self.stashed_nodes.get(id) + node = self.stashed_nodes[id] if index > 0: text = data[strartIndex:index] @@ -252,7 +253,7 @@ def linkText(text): def __applyPattern( self, - pattern: inlinepatterns.Pattern, + pattern: inlinepatterns.InlineProcessor | inlinepatterns.LegacyPattern, data: str, patternIndex: int, startIndex: int = 0 @@ -271,7 +272,12 @@ def __applyPattern( String with placeholders instead of `ElementTree` elements. """ - new_style = isinstance(pattern, inlinepatterns.InlineProcessor) + if isinstance(pattern, inlinepatterns.InlineProcessor): + new_style = True + new_pattern = pattern + else: + new_style = False + legacy_pattern = pattern for exclude in pattern.ANCESTOR_EXCLUDES: if exclude.lower() in self.ancestors: @@ -282,29 +288,27 @@ def __applyPattern( # Since `handleMatch` may reject our first match, # we iterate over the buffer looking for matches # until we can't find any more. - for match in pattern.getCompiledRegExp().finditer(data, startIndex): - node, start, end = pattern.handleMatch(match, data) - if start is None or end is None: - startIndex += match.end(0) - match = None - continue - break + for try_match in new_pattern.getCompiledRegExp().finditer(data, startIndex): + try_node, try_start, try_end = new_pattern.handleMatch(try_match, data) + if try_start is not None and try_end is not None: + match, node, start, end = try_match, try_node, try_start, try_end + break else: # pragma: no cover - match = pattern.getCompiledRegExp().match(data[startIndex:]) + match = legacy_pattern.getCompiledRegExp().match(data[startIndex:]) leftData = data[:startIndex] if not match: return data, False, 0 if not new_style: # pragma: no cover - node = pattern.handleMatch(match) + node = legacy_pattern.handleMatch(match) start = match.start(0) end = match.end(0) if node is None: return data, True, end - if not isString(node): + if not isinstance(node, str): if not isinstance(node.text, util.AtomicString): # We need to process current node too for child in [node] + list(node): @@ -330,7 +334,7 @@ def __applyPattern( match.group(1), placeholder, match.groups()[-1]), True, 0 - def __build_ancestors(self, parent, parents): + def __build_ancestors(self, parent: etree.Element | None, parents: list[str]) -> None: """Build the ancestor list.""" ancestors = [] while parent is not None: @@ -373,7 +377,7 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. self.ancestors = parents self.__build_ancestors(currElement, self.ancestors) - insertQueue = [] + insertQueue: list[tuple[etree.Element, list[tuple[etree.Element, list[str]]]]] = [] for child in currElement: if child.text and not isinstance( child.text, util.AtomicString @@ -398,9 +402,9 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. child.tail = dumby.tail pos = list(currElement).index(child) + 1 tailResult.reverse() - for newChild in tailResult: - self.parent_map[newChild[0]] = currElement - currElement.insert(pos, newChild[0]) + for subChild in tailResult: + self.parent_map[subChild[0]] = currElement + currElement.insert(pos, subChild[0]) if len(child): self.parent_map[child] = currElement stack.append((child, self.ancestors[:])) diff --git a/markdown/util.py b/markdown/util.py index 827befd8..7e3405fc 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -134,6 +134,16 @@ def deprecated_func(*args, **kwargs): return wrapper +@overload +def parseBoolValue(value: str) -> bool: + ... # pragma: no cover + + +@overload +def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None: + ... # pragma: no cover + + def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none: bool = False) -> bool | None: """Parses a string representing a boolean value. If parsing was successful, returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`, @@ -151,6 +161,7 @@ def parseBoolValue(value: str | None, fail_on_errors: bool = True, preserve_none return False elif fail_on_errors: raise ValueError('Cannot parse bool value: %r' % value) + return None def code_escape(text: str) -> str: @@ -300,9 +311,9 @@ class Registry(Generic[_T]): an item using that item's assigned "name". """ - def __init__(self): + def __init__(self) -> None: self._data: dict[str, _T] = {} - self._priority = [] + self._priority: list[_PriorityItem] = [] self._is_sorted = False def __contains__(self, item: str | _T) -> bool: @@ -318,11 +329,11 @@ def __iter__(self) -> Iterator[_T]: @overload def __getitem__(self, key: str | int) -> _T: # pragma: no cover - ... + ... # pragma: no cover @overload def __getitem__(self, key: slice) -> Registry[_T]: # pragma: no cover - ... + ... # pragma: no cover def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]: self._sort() diff --git a/pyproject.toml b/pyproject.toml index 8c9e9bcf..c9fa250f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,3 +97,7 @@ packages = ['markdown', 'markdown.extensions'] [tool.setuptools.dynamic] version = {attr = 'markdown.__meta__.__version__'} + +[tool.mypy] +warn_unreachable = true +show_error_codes = true diff --git a/tests/test_apis.py b/tests/test_apis.py index 1305c547..d613a822 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -33,7 +33,7 @@ from logging import DEBUG, WARNING, CRITICAL import yaml import tempfile -from io import BytesIO +from io import BytesIO, StringIO, TextIOWrapper import xml.etree.ElementTree as etree from xml.etree.ElementTree import ProcessingInstruction @@ -80,8 +80,8 @@ class TestConvertFile(unittest.TestCase): def setUp(self): self.saved = sys.stdin, sys.stdout - sys.stdin = BytesIO(bytes('foo', encoding='utf-8')) - sys.stdout = BytesIO() + sys.stdin = StringIO('foo') + sys.stdout = TextIOWrapper(BytesIO()) def tearDown(self): sys.stdin, sys.stdout = self.saved @@ -111,7 +111,7 @@ def testFileObjects(self): def testStdinStdout(self): markdown.markdownFromFile() sys.stdout.seek(0) - self.assertEqual(sys.stdout.read().decode('utf-8'), '

foo

') + self.assertEqual(sys.stdout.read(), '

foo

') class TestBlockParser(unittest.TestCase): diff --git a/tox.ini b/tox.ini index d071054e..7e528d47 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{38, 39, 310, 311, 312}, pypy{38, 39, 310}, pygments, flake8, checkspelling, pep517check, checklinks +envlist = py{38, 39, 310, 311, 312}, pypy{38, 39, 310}, pygments, mypy, flake8, checkspelling, pep517check, checklinks isolated_build = True [testenv] @@ -19,6 +19,15 @@ deps = pytidylib pygments=={env:PYGMENTS_VERSION} +[testenv:mypy] +deps = + mypy + types-PyYAML + types-Pygments +allowlist_externals = mypy +commands = mypy {toxinidir}/markdown +skip_install = true + [testenv:flake8] deps = flake8 allowlist_externals = flake8 From e743883a76c9fa479ddc9bdfce1ccbc906426f37 Mon Sep 17 00:00:00 2001 From: Oleh Prypin Date: Wed, 1 Nov 2023 00:46:34 +0100 Subject: [PATCH 2/5] Add a lot of type annotations and fixes --- markdown/__main__.py | 4 +- markdown/blockprocessors.py | 44 +++++++------- markdown/core.py | 9 ++- markdown/extensions/__init__.py | 4 +- markdown/extensions/abbr.py | 16 +++-- markdown/extensions/admonition.py | 26 +++++--- markdown/extensions/attr_list.py | 4 +- markdown/extensions/codehilite.py | 25 +++++--- markdown/extensions/def_list.py | 43 +++++++------ markdown/extensions/extra.py | 2 +- markdown/extensions/fenced_code.py | 25 +++++--- markdown/extensions/footnotes.py | 63 ++++++++++--------- markdown/extensions/legacy_attrs.py | 14 +++-- markdown/extensions/legacy_em.py | 6 +- markdown/extensions/md_in_html.py | 78 +++++++++++++----------- markdown/extensions/meta.py | 12 +++- markdown/extensions/nl2br.py | 6 +- markdown/extensions/sane_lists.py | 11 +++- markdown/extensions/smarty.py | 35 ++++++----- markdown/extensions/tables.py | 30 +++++---- markdown/extensions/toc.py | 94 ++++++++++++++++++----------- markdown/extensions/wikilinks.py | 19 ++++-- markdown/htmlparser.py | 41 +++++++------ markdown/inlinepatterns.py | 67 ++++++++++---------- markdown/postprocessors.py | 13 ++-- markdown/preprocessors.py | 2 + markdown/serializers.py | 19 +++--- markdown/treeprocessors.py | 22 ++++--- markdown/util.py | 32 +++++++--- 29 files changed, 456 insertions(+), 310 deletions(-) diff --git a/markdown/__main__.py b/markdown/__main__.py index 3ffb9ae4..b907108c 100644 --- a/markdown/__main__.py +++ b/markdown/__main__.py @@ -26,7 +26,7 @@ import markdown import logging from logging import DEBUG, WARNING, CRITICAL -from typing import Any, Callable, IO +from typing import Any, Callable, IO, Mapping yaml_load: Callable[[IO], Any] try: @@ -46,7 +46,7 @@ logger = logging.getLogger('MARKDOWN') -def parse_options(args=None, values=None): +def parse_options(args=None, values=None) -> tuple[Mapping[str, Any], bool]: """ Define and parse `optparse` options for command-line usage. """ diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py index d32e7eca..79897778 100644 --- a/markdown/blockprocessors.py +++ b/markdown/blockprocessors.py @@ -167,18 +167,18 @@ class ListIndentProcessor(BlockProcessor): LIST_TYPES = ['ul', 'ol'] """ Types of lists this processor can operate on. """ - def __init__(self, *args): + def __init__(self, *args) -> None: super().__init__(*args) self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return block.startswith(' '*self.tab_length) and \ not self.parser.state.isstate('detabbed') and \ (parent.tag in self.ITEM_TYPES or - (len(parent) and parent[-1] is not None and + (len(parent) > 0 and parent[-1] is not None and (parent[-1].tag in self.LIST_TYPES))) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) level, sibling = self.get_level(parent, block) block = self.looseDetab(block, level) @@ -251,10 +251,10 @@ def get_level(self, parent: etree.Element, block: str) -> tuple[int, etree.Eleme class CodeBlockProcessor(BlockProcessor): """ Process code blocks. """ - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return block.startswith(' '*self.tab_length) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: sibling = self.lastChild(parent) block = blocks.pop(0) theRest = '' @@ -286,10 +286,10 @@ class BlockQuoteProcessor(BlockProcessor): RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)') - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return bool(self.RE.search(block)) and not util.nearing_recursion_limit() - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) m = self.RE.search(block) if m: @@ -353,10 +353,10 @@ def __init__(self, parser: BlockParser): self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' % (self.tab_length, self.tab_length * 2 - 1)) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return bool(self.RE.match(block)) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: # Check for multiple items in one block. items = self.get_items(blocks.pop(0)) sibling = self.lastChild(parent) @@ -462,10 +462,10 @@ class HashHeaderProcessor(BlockProcessor): # Detect a header at start of any line in block RE = re.compile(r'(?:^|\n)(?P#{1,6})(?P
(?:\\.|[^\\])*?)#*(?:\n|$)') - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return bool(self.RE.search(block)) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) m = self.RE.search(block) if m: @@ -493,10 +493,10 @@ class SetextHeaderProcessor(BlockProcessor): # Detect Setext-style header. Must be first 2 lines of block. RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return bool(self.RE.match(block)) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: lines = blocks.pop(0).split('\n') # Determine level. `=` is 1 and `-` is 2. if lines[1].startswith('='): @@ -519,7 +519,7 @@ class HRProcessor(BlockProcessor): # Detect hr on any line of a block. SEARCH_RE = re.compile(RE, re.MULTILINE) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: m = self.SEARCH_RE.search(block) if m: # Save match object on class instance so we can use it later. @@ -527,7 +527,7 @@ def test(self, parent, block): return True return False - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) match = self.match # Check for lines in block before `hr`. @@ -547,10 +547,10 @@ def run(self, parent, blocks): class EmptyBlockProcessor(BlockProcessor): """ Process blocks that are empty or start with an empty line. """ - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return not block or block.startswith('\n') - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) filler = '\n\n' if block: @@ -577,10 +577,10 @@ class ReferenceProcessor(BlockProcessor): r'^[ ]{0,3}\[([^\[\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE ) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return True - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> bool: block = blocks.pop(0) m = self.RE.search(block) if m: @@ -603,10 +603,10 @@ def run(self, parent, blocks): class ParagraphProcessor(BlockProcessor): """ Process Paragraph blocks. """ - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return True - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) if block.strip(): # Not a blank block. Add to parent, otherwise throw it away. diff --git a/markdown/core.py b/markdown/core.py index 42e1faba..0e21928f 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -36,6 +36,7 @@ if TYPE_CHECKING: # pragma: no cover from xml.etree.ElementTree import Element + from markdown.extensions.toc import TocToken __all__ = ['Markdown', 'markdown', 'markdownFromFile'] @@ -85,6 +86,10 @@ class Markdown: callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`. """ + toc_tokens: list[TocToken] + toc: str + Meta: dict[str, Any] + def __init__(self, **kwargs: Any): """ Creates a new Markdown instance. @@ -159,7 +164,7 @@ def build_parser(self) -> Markdown: def registerExtensions( self, extensions: Sequence[Extension | str], - configs: Mapping[str, Mapping[str, Any]] + configs: Mapping[str, dict[str, Any]] ) -> Markdown: """ Load a list of extensions into an instance of the `Markdown` class. @@ -489,7 +494,7 @@ def markdownFromFile( output: str | BinaryIO | None = None, encoding: str | None = None, **kwargs: Any -): +) -> None: """ Read Markdown text from a file and write output to a file or a stream. diff --git a/markdown/extensions/__init__.py b/markdown/extensions/__init__.py index 18c88a8a..32ba5565 100644 --- a/markdown/extensions/__init__.py +++ b/markdown/extensions/__init__.py @@ -53,7 +53,7 @@ class Extension: if a default is not set for each option. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: """ Initiate Extension and set up configs. """ self.setConfigs(kwargs) @@ -112,7 +112,7 @@ def setConfig(self, key: str, value: Any) -> None: value = parseBoolValue(value, preserve_none=True) self.config[key][0] = value - def setConfigs(self, items: Mapping[str, Any] | Iterable[tuple[str, Any]]): + def setConfigs(self, items: Mapping[str, Any] | Iterable[tuple[str, Any]]) -> None: """ Loop through a collection of configuration options, passing each to [`setConfig`][markdown.extensions.Extension.setConfig]. diff --git a/markdown/extensions/abbr.py b/markdown/extensions/abbr.py index c060f475..3d89fefa 100644 --- a/markdown/extensions/abbr.py +++ b/markdown/extensions/abbr.py @@ -28,12 +28,16 @@ from ..util import AtomicString import re import xml.etree.ElementTree as etree +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown class AbbrExtension(Extension): """ Abbreviation Extension for Python-Markdown. """ - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Insert `AbbrPreprocessor` before `ReferencePreprocessor`. """ md.parser.blockprocessors.register(AbbrPreprocessor(md.parser), 'abbr', 16) @@ -43,10 +47,10 @@ class AbbrPreprocessor(BlockProcessor): RE = re.compile(r'^[*]\[(?P[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P.*)$', re.MULTILINE) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return True - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> bool: """ Find and remove all Abbreviation references from the text. Each reference is set as a new `AbbrPattern` in the markdown instance. @@ -71,7 +75,7 @@ def run(self, parent, blocks): blocks.insert(0, block) return False - def _generate_pattern(self, text): + def _generate_pattern(self, text: str) -> str: """ Given a string, returns an regex pattern to match that string. @@ -90,11 +94,11 @@ def _generate_pattern(self, text): class AbbrInlineProcessor(InlineProcessor): """ Abbreviation inline pattern. """ - def __init__(self, pattern, title): + def __init__(self, pattern: str, title: str): super().__init__(pattern) self.title = title - def handleMatch(self, m, data): + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: abbr = etree.Element('abbr') abbr.text = AtomicString(m.group('abbr')) abbr.set('title', self.title) diff --git a/markdown/extensions/admonition.py b/markdown/extensions/admonition.py index f05d0896..9346f3c2 100644 --- a/markdown/extensions/admonition.py +++ b/markdown/extensions/admonition.py @@ -30,12 +30,17 @@ from ..blockprocessors import BlockProcessor import xml.etree.ElementTree as etree import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown + from markdown import blockparser class AdmonitionExtension(Extension): """ Admonition extension for Python-Markdown. """ - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add Admonition to Markdown instance. """ md.registerExtension(self) @@ -49,15 +54,15 @@ class AdmonitionProcessor(BlockProcessor): RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)') RE_SPACES = re.compile(' +') - def __init__(self, parser): + def __init__(self, parser: blockparser.BlockParser): """Initialization.""" super().__init__(parser) - self.current_sibling = None - self.content_indention = 0 + self.current_sibling: etree.Element | None = None + self.content_indent = 0 - def parse_content(self, parent, block): + def parse_content(self, parent: etree.Element, block: str) -> tuple[etree.Element | None, str, str]: """Get sibling admonition. Retrieve the appropriate sibling element. This can get tricky when @@ -70,11 +75,11 @@ def parse_content(self, parent, block): # We already acquired the block via test if self.current_sibling is not None: - sibling = self.current_sibling + prev_sibling = self.current_sibling block, the_rest = self.detab(block, self.content_indent) self.current_sibling = None self.content_indent = 0 - return sibling, block, the_rest + return prev_sibling, block, the_rest sibling = self.lastChild(parent) @@ -115,14 +120,14 @@ def parse_content(self, parent, block): return sibling, block, the_rest - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: if self.RE.search(block): return True else: return self.parse_content(parent, block)[0] is not None - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: block = blocks.pop(0) m = self.RE.search(block) @@ -143,6 +148,7 @@ def run(self, parent, blocks): p.text = title p.set('class', self.CLASSNAME_TITLE) else: + assert sibling is not None # Sibling is a list item, but we need to wrap it's content should be wrapped in <p> if sibling.tag in ('li', 'dd') and sibling.text: text = sibling.text @@ -160,7 +166,7 @@ def run(self, parent, blocks): # list for future processing. blocks.insert(0, theRest) - def get_class_and_title(self, match): + def get_class_and_title(self, match: re.Match[str]) -> tuple[str, str | None]: klass, title = match.group(1).lower(), match.group(2) klass = self.RE_SPACES.sub(' ', klass) if title is None: diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py index d15d24df..a7276e74 100644 --- a/markdown/extensions/attr_list.py +++ b/markdown/extensions/attr_list.py @@ -89,7 +89,7 @@ class AttrListTreeprocessor(Treeprocessor): md: Markdown - def run(self, doc: Element): + def run(self, doc: Element) -> None: for elem in doc.iter(): if self.md.is_block_level(elem.tag): # Block level: check for `attrs` on last line of text @@ -173,7 +173,7 @@ def sanitize_name(self, name: str) -> str: class AttrListExtension(Extension): """ Attribute List extension for Python-Markdown """ - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8) md.registerExtension(self) diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py index 06acaec6..a33e7be1 100644 --- a/markdown/extensions/codehilite.py +++ b/markdown/extensions/codehilite.py @@ -24,7 +24,11 @@ from . import Extension from ..treeprocessors import Treeprocessor from ..util import parseBoolValue -from typing import Callable +from typing import TYPE_CHECKING, Callable, Any + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown + import xml.etree.ElementTree as etree try: # pragma: no cover from pygments import highlight @@ -129,7 +133,7 @@ def __init__(self, src: str, **options): self.options = options - def hilite(self, shebang=True) -> str: + def hilite(self, shebang: bool = True) -> str: """ Pass code to the [Pygments](https://pygments.org/) highlighter with optional line numbers. The output should then be styled with CSS to @@ -188,7 +192,7 @@ def hilite(self, shebang=True) -> str: txt ) - def _parseHeader(self): + def _parseHeader(self) -> None: """ Determines language of a code block from shebang line and whether the said line should be removed or left in place. If the shebang line @@ -250,7 +254,10 @@ def _parseHeader(self): class HiliteTreeprocessor(Treeprocessor): """ Highlight source code in code blocks. """ - def code_unescape(self, text): + config: dict[str, Any] + md: Markdown + + def code_unescape(self, text: str) -> str: """Unescape code.""" text = text.replace("<", "<") text = text.replace(">", ">") @@ -259,14 +266,16 @@ def code_unescape(self, text): text = text.replace("&", "&") return text - def run(self, root): + def run(self, root: etree.Element) -> None: """ Find code blocks and store in `htmlStash`. """ blocks = root.iter('pre') for block in blocks: if len(block) == 1 and block[0].tag == 'code': local_config = self.config.copy() + text = block[0].text + assert text is not None code = CodeHilite( - self.code_unescape(block[0].text), + self.code_unescape(text), tab_length=self.md.tab_length, style=local_config.pop('pygments_style', 'default'), **local_config @@ -283,7 +292,7 @@ def run(self, root): class CodeHiliteExtension(Extension): """ Add source code highlighting to markdown code blocks. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: # define default configs self.config = { 'linenums': [ @@ -326,7 +335,7 @@ def __init__(self, **kwargs): pass # Assume it's not a boolean value. Use as-is. self.config[key] = [value, ''] - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add `HilitePostprocessor` to Markdown instance. """ hiliter = HiliteTreeprocessor(md) hiliter.config = self.getConfigs() diff --git a/markdown/extensions/def_list.py b/markdown/extensions/def_list.py index 54273b60..8fb5c3db 100644 --- a/markdown/extensions/def_list.py +++ b/markdown/extensions/def_list.py @@ -25,6 +25,10 @@ from ..blockprocessors import BlockProcessor, ListIndentProcessor import xml.etree.ElementTree as etree import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown class DefListProcessor(BlockProcessor): @@ -33,13 +37,14 @@ class DefListProcessor(BlockProcessor): RE = re.compile(r'(^|\n)[ ]{0,3}:[ ]{1,3}(.*?)(\n|$)') NO_INDENT_RE = re.compile(r'^[ ]{0,3}[^ :]') - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return bool(self.RE.search(block)) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> bool | None: raw_block = blocks.pop(0) m = self.RE.search(raw_block) + assert m is not None terms = [term.strip() for term in raw_block[:m.start()].split('\n') if term.strip()] block = raw_block[m.end():] @@ -53,20 +58,21 @@ def run(self, parent, blocks): else: d = m.group(2) sibling = self.lastChild(parent) - if not terms and sibling is None: - # This is not a definition item. Most likely a paragraph that - # starts with a colon at the beginning of a document or list. - blocks.insert(0, raw_block) - return False - if not terms and sibling.tag == 'p': - # The previous paragraph contains the terms - state = 'looselist' - terms = sibling.text.split('\n') - parent.remove(sibling) - # Acquire new sibling - sibling = self.lastChild(parent) - else: - state = 'list' + state = 'list' + if not terms: + if sibling is None: + # This is not a definition item. Most likely a paragraph that + # starts with a colon at the beginning of a document or list. + blocks.insert(0, raw_block) + return False + if sibling.tag == 'p': + # The previous paragraph contains the terms + state = 'looselist' + assert sibling.text is not None + terms = sibling.text.split('\n') + parent.remove(sibling) + # Acquire new sibling + sibling = self.lastChild(parent) if sibling is not None and sibling.tag == 'dl': # This is another item on an existing list @@ -88,6 +94,7 @@ def run(self, parent, blocks): if theRest: blocks.insert(0, theRest) + return None class DefListIndentProcessor(ListIndentProcessor): @@ -99,7 +106,7 @@ class DefListIndentProcessor(ListIndentProcessor): LIST_TYPES = ['dl', 'ol', 'ul'] """ Include `dl` is list types. """ - def create_item(self, parent, block): + def create_item(self, parent: etree.Element, block: str): """ Create a new `dd` or `li` (depending on parent) and parse the block with it as the parent. """ dd = etree.SubElement(parent, 'dd') @@ -109,7 +116,7 @@ def create_item(self, parent, block): class DefListExtension(Extension): """ Add definition lists to Markdown. """ - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add an instance of `DefListProcessor` to `BlockParser`. """ md.parser.blockprocessors.register(DefListIndentProcessor(md.parser), 'defindent', 85) md.parser.blockprocessors.register(DefListProcessor(md.parser), 'deflist', 25) diff --git a/markdown/extensions/extra.py b/markdown/extensions/extra.py index 74ebc192..d33a0969 100644 --- a/markdown/extensions/extra.py +++ b/markdown/extensions/extra.py @@ -53,7 +53,7 @@ class ExtraExtension(Extension): """ Add various extensions to Markdown class.""" - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: """ `config` is a dumb holder which gets passed to the actual extension later. """ self.config = kwargs diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py index 241bb6d4..7cd33c85 100644 --- a/markdown/extensions/fenced_code.py +++ b/markdown/extensions/fenced_code.py @@ -29,17 +29,21 @@ from ..util import parseBoolValue from ..serializers import _escape_attrib_html import re +from typing import TYPE_CHECKING, Any, Iterable + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown class FencedCodeExtension(Extension): - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.config = { 'lang_prefix': ['language-', 'Prefix prepended to the language. Default: "language-"'] } """ Default configuration options. """ super().__init__(**kwargs) - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add `FencedBlockPreprocessor` to the Markdown instance. """ md.registerExtension(self) @@ -49,6 +53,8 @@ def extendMarkdown(self, md): class FencedBlockPreprocessor(Preprocessor): """ Find and extract fenced code blocks. """ + md: Markdown + FENCED_BLOCK_RE = re.compile( dedent(r''' (?P<fence>^(?:~{3,}|`{3,}))[ ]* # opening fence @@ -62,11 +68,11 @@ class FencedBlockPreprocessor(Preprocessor): re.MULTILINE | re.DOTALL | re.VERBOSE ) - def __init__(self, md, config): + def __init__(self, md: Markdown, config: dict[str, Any]): super().__init__(md) self.config = config self.checked_for_deps = False - self.codehilite_conf = {} + self.codehilite_conf: dict[str, Any] = {} self.use_attr_list = False # List of options to convert to boolean values self.bool_options = [ @@ -76,7 +82,7 @@ def __init__(self, md, config): 'use_pygments' ] - def run(self, lines): + def run(self, lines: list[str]) -> list[str]: """ Match and store Fenced Code Blocks in the `HtmlStash`. """ # Check for dependent extensions @@ -93,12 +99,13 @@ def run(self, lines): while 1: m = self.FENCED_BLOCK_RE.search(text) if m: - lang, id, classes, config = None, '', [], {} + lang = None if m.group('attrs'): id, classes, config = self.handle_attrs(get_attrs(m.group('attrs'))) if len(classes): lang = classes.pop(0) else: + id, classes, config = '', [], {} if m.group('lang'): lang = m.group('lang') if m.group('hl_lines'): @@ -151,11 +158,11 @@ def run(self, lines): break return text.split("\n") - def handle_attrs(self, attrs): + def handle_attrs(self, attrs: Iterable[tuple[str, str]]) -> tuple[str, list[str], dict[str, Any]]: """ Return tuple: `(id, [list, of, classes], {configs})` """ id = '' classes = [] - configs = {} + configs: dict[str, Any] = {} for k, v in attrs: if k == 'id': id = v @@ -169,7 +176,7 @@ def handle_attrs(self, attrs): configs[k] = v return id, classes, configs - def _escape(self, txt): + def _escape(self, txt: str) -> str: """ basic html escaping """ txt = txt.replace('&', '&') txt = txt.replace('<', '<') diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py index 2424dbc8..ac35cf48 100644 --- a/markdown/extensions/footnotes.py +++ b/markdown/extensions/footnotes.py @@ -29,6 +29,10 @@ import re import copy import xml.etree.ElementTree as etree +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX @@ -38,7 +42,7 @@ class FootnoteExtension(Extension): """ Footnote Extension. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: """ Setup configs. """ self.config = { @@ -68,12 +72,12 @@ def __init__(self, **kwargs): # In multiple invocations, emit links that don't get tangled. self.unique_prefix = 0 - self.found_refs = {} - self.used_refs = set() + self.found_refs: dict[str, int] = {} + self.used_refs: set[str] = set() self.reset() - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add pieces to Markdown. """ md.registerExtension(self) self.parser = md.parser @@ -105,7 +109,7 @@ def reset(self) -> None: self.found_refs = {} self.used_refs = set() - def unique_ref(self, reference, found: bool = False): + def unique_ref(self, reference: str, found: bool = False) -> str: """ Get a unique reference if there are duplicates. """ if not found: return reference @@ -126,9 +130,11 @@ def unique_ref(self, reference, found: bool = False): self.found_refs[original_ref] = 1 return reference - def findFootnotesPlaceholder(self, root): + def findFootnotesPlaceholder( + self, root: etree.Element + ) -> tuple[etree.Element, etree.Element, bool] | None: """ Return ElementTree Element that contains Footnote placeholder. """ - def finder(element): + def finder(element: etree.Element) -> tuple[etree.Element, etree.Element, bool] | None: for child in element: if child.text: if child.text.find(self.getConfig("PLACE_MARKER")) > -1: @@ -144,29 +150,29 @@ def finder(element): res = finder(root) return res - def setFootnote(self, id, text) -> None: + def setFootnote(self, id: str, text: str) -> None: """ Store a footnote for later retrieval. """ self.footnotes[id] = text - def get_separator(self): + def get_separator(self) -> str: """ Get the footnote separator. """ return self.getConfig("SEPARATOR") - def makeFootnoteId(self, id): + def makeFootnoteId(self, id: str) -> str: """ Return footnote link id. """ if self.getConfig("UNIQUE_IDS"): return 'fn%s%d-%s' % (self.get_separator(), self.unique_prefix, id) else: return 'fn{}{}'.format(self.get_separator(), id) - def makeFootnoteRefId(self, id, found: bool = False): + def makeFootnoteRefId(self, id: str, found: bool = False) -> str: """ Return footnote back-link id. """ if self.getConfig("UNIQUE_IDS"): return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found) else: return self.unique_ref('fnref{}{}'.format(self.get_separator(), id), found) - def makeFootnotesDiv(self, root): + def makeFootnotesDiv(self, root: etree.Element) -> etree.Element | None: """ Return `div` of footnotes as `etree` Element. """ if not list(self.footnotes.keys()): @@ -203,6 +209,7 @@ def makeFootnotesDiv(self, root): if len(li): node = li[-1] if node.tag == "p": + assert node.text is not None node.text = node.text + NBSP_PLACEHOLDER node.append(backlink) else: @@ -216,14 +223,14 @@ class FootnoteBlockProcessor(BlockProcessor): RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE) - def __init__(self, footnotes): + def __init__(self, footnotes: FootnoteExtension): super().__init__(footnotes.parser) self.footnotes = footnotes - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: return True - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> bool: """ Find, set, and remove footnote definitions. """ block = blocks.pop(0) m = self.RE.search(block) @@ -259,7 +266,7 @@ def run(self, parent, blocks): blocks.insert(0, block) return False - def detectTabbed(self, blocks) -> list[str]: + def detectTabbed(self, blocks: list[str]) -> list[str]: """ Find indented text and remove indent before further processing. Returns: @@ -288,7 +295,7 @@ def detectTabbed(self, blocks) -> list[str]: break return fn_blocks - def detab(self, block): + def detab(self, block: str) -> str: # type: ignore[override] """ Remove one level of indent from a block. Preserve lazily indented blocks by only removing indent from indented lines. @@ -303,11 +310,11 @@ def detab(self, block): class FootnoteInlineProcessor(InlineProcessor): """ `InlineProcessor` for footnote markers in a document's body text. """ - def __init__(self, pattern, footnotes): + def __init__(self, pattern: str, footnotes: FootnoteExtension): super().__init__(pattern) self.footnotes = footnotes - def handleMatch(self, m, data): + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]: id = m.group(1) if id in self.footnotes.footnotes.keys(): sup = etree.Element("sup") @@ -326,10 +333,10 @@ def handleMatch(self, m, data): class FootnotePostTreeprocessor(Treeprocessor): """ Amend footnote div with duplicates. """ - def __init__(self, footnotes): + def __init__(self, footnotes: FootnoteExtension): self.footnotes = footnotes - def add_duplicates(self, li, duplicates) -> None: + def add_duplicates(self, li: etree.Element, duplicates: int) -> None: """ Adjust current `li` and add the duplicates: `fnref2`, `fnref3`, etc. """ for link in li.iter('a'): # Find the link that needs to be duplicated. @@ -349,13 +356,13 @@ def add_duplicates(self, li, duplicates) -> None: el.append(link) break - def get_num_duplicates(self, li): + def get_num_duplicates(self, li: etree.Element) -> int: """ Get the number of duplicate refs of the footnote. """ fn, rest = li.attrib.get('id', '').split(self.footnotes.get_separator(), 1) link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest) return self.footnotes.found_refs.get(link_id, 0) - def handle_duplicates(self, parent) -> None: + def handle_duplicates(self, parent: etree.Element) -> None: """ Find duplicate footnotes and format and add the duplicates. """ for li in list(parent): # Check number of duplicates footnotes and insert @@ -364,7 +371,7 @@ def handle_duplicates(self, parent) -> None: if count > 1: self.add_duplicates(li, count) - def run(self, root): + def run(self, root: etree.Element) -> None: """ Crawl the footnote div and add missing duplicate footnotes. """ self.offset = 0 for div in root.iter('div'): @@ -379,10 +386,10 @@ def run(self, root): class FootnoteTreeprocessor(Treeprocessor): """ Build and append footnote div to end of document. """ - def __init__(self, footnotes): + def __init__(self, footnotes: FootnoteExtension): self.footnotes = footnotes - def run(self, root): + def run(self, root: etree.Element) -> None: footnotesDiv = self.footnotes.makeFootnotesDiv(root) if footnotesDiv is not None: result = self.footnotes.findFootnotesPlaceholder(root) @@ -401,10 +408,10 @@ def run(self, root): class FootnotePostprocessor(Postprocessor): """ Replace placeholders with html entities. """ - def __init__(self, footnotes): + def __init__(self, footnotes: FootnoteExtension): self.footnotes = footnotes - def run(self, text): + def run(self, text: str) -> str: text = text.replace( FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT") ) diff --git a/markdown/extensions/legacy_attrs.py b/markdown/extensions/legacy_attrs.py index 56ad2e89..3f1c7a6d 100644 --- a/markdown/extensions/legacy_attrs.py +++ b/markdown/extensions/legacy_attrs.py @@ -33,13 +33,18 @@ import re from markdown.treeprocessors import Treeprocessor, isString from markdown.extensions import Extension +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + import xml.etree.ElementTree as etree + from markdown import Markdown ATTR_RE = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123} class LegacyAttrs(Treeprocessor): - def run(self, doc): + def run(self, doc: etree.Element) -> None: """Find and set values of attributes ({@key=value}). """ for el in doc.iter(): alt = el.get('alt', None) @@ -50,15 +55,16 @@ def run(self, doc): if el.tail and isString(el.tail): el.tail = self.handleAttributes(el, el.tail) - def handleAttributes(self, el, txt): + def handleAttributes(self, el: etree.Element, txt: str) -> str: """ Set attributes and return text without definitions. """ - def attributeCallback(match): + def attributeCallback(match: re.Match[str]) -> str: el.set(match.group(1), match.group(2).replace('\n', ' ')) + return '' return ATTR_RE.sub(attributeCallback, txt) class LegacyAttrExtension(Extension): - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add `LegacyAttrs` to Markdown instance. """ md.treeprocessors.register(LegacyAttrs(md), 'legacyattrs', 15) diff --git a/markdown/extensions/legacy_em.py b/markdown/extensions/legacy_em.py index a6f67b7e..6fbff4a0 100644 --- a/markdown/extensions/legacy_em.py +++ b/markdown/extensions/legacy_em.py @@ -16,6 +16,10 @@ from . import Extension from ..inlinepatterns import UnderscoreProcessor, EmStrongItem, EM_STRONG2_RE, STRONG_EM2_RE import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown # _emphasis_ EMPHASIS_RE = r'(_)([^_]+)\1' @@ -42,7 +46,7 @@ class LegacyUnderscoreProcessor(UnderscoreProcessor): class LegacyEmExtension(Extension): """ Add legacy_em extension to Markdown class.""" - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Modify inline patterns. """ md.inlinePatterns.register(LegacyUnderscoreProcessor(r'_'), 'em_strong2', 50) diff --git a/markdown/extensions/md_in_html.py b/markdown/extensions/md_in_html.py index 982d6039..3c46d3e1 100644 --- a/markdown/extensions/md_in_html.py +++ b/markdown/extensions/md_in_html.py @@ -28,6 +28,10 @@ from .. import util from ..htmlparser import HTMLExtractor, blank_line_re import xml.etree.ElementTree as etree +from typing import TYPE_CHECKING, Literal, Mapping, Sequence + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown class HTMLExtractorExtra(HTMLExtractor): @@ -36,7 +40,7 @@ class HTMLExtractorExtra(HTMLExtractor): Markdown. """ - def __init__(self, md, *args, **kwargs): + def __init__(self, md: Markdown, *args, **kwargs): # All block-level tags. self.block_level_tags = set(md.block_level_elements.copy()) # Block-level tags in which the content only gets span level parsing @@ -52,14 +56,14 @@ def __init__(self, md, *args, **kwargs): self.block_tags = set(self.block_level_tags) - (self.span_tags | self.raw_tags | self.empty_tags) self.span_and_blocks_tags = self.block_tags | self.span_tags - def reset(self): + def reset(self) -> None: """Reset this instance. Loses all unprocessed data.""" - self.mdstack = [] # When markdown=1, stack contains a list of tags + self.mdstack: list[str] = [] # When markdown=1, stack contains a list of tags self.treebuilder = etree.TreeBuilder() - self.mdstate = [] # one of 'block', 'span', 'off', or None + self.mdstate: list[Literal['block', 'span', 'off', None]] = [] super().reset() - def close(self): + def close(self) -> None: """Handle any buffered data.""" super().close() # Handle any unclosed tags. @@ -67,13 +71,13 @@ def close(self): # Close the outermost parent. `handle_endtag` will close all unclosed children. self.handle_endtag(self.mdstack[0]) - def get_element(self): + def get_element(self) -> etree.Element: """ Return element from `treebuilder` and reset `treebuilder` for later use. """ element = self.treebuilder.close() self.treebuilder = etree.TreeBuilder() return element - def get_state(self, tag, attrs): + def get_state(self, tag, attrs: Mapping[str, str]) -> Literal['block', 'span', 'off', None]: """ Return state from tag and `markdown` attribute. One of 'block', 'span', or 'off'. """ md_attr = attrs.get('markdown', '0') if md_attr == 'markdown': @@ -94,13 +98,13 @@ def get_state(self, tag, attrs): else: # pragma: no cover return None - def handle_starttag(self, tag, attrs): + def handle_starttag(self, tag, attrs: Sequence[tuple[str, str]]) -> None: # Handle tags that should always be empty and do not specify a closing tag if tag in self.empty_tags and (self.at_line_start() or self.intail): - attrs = {key: value if value is not None else key for key, value in attrs} - if "markdown" in attrs: - attrs.pop('markdown') - element = etree.Element(tag, attrs) + attrs_dict = {key: value if value is not None else key for key, value in attrs} + if "markdown" in attrs_dict: + attrs_dict.pop('markdown') + element = etree.Element(tag, attrs_dict) data = etree.tostring(element, encoding='unicode', method='html') else: data = self.get_starttag_text() @@ -110,20 +114,20 @@ def handle_starttag(self, tag, attrs): if tag in self.block_level_tags and (self.at_line_start() or self.intail): # Valueless attribute (ex: `<tag checked>`) results in `[('checked', None)]`. # Convert to `{'checked': 'checked'}`. - attrs = {key: value if value is not None else key for key, value in attrs} - state = self.get_state(tag, attrs) - if self.inraw or (state in [None, 'off'] and not self.mdstack): + attrs_dict = {key: value if value is not None else key for key, value in attrs} + state = self.get_state(tag, attrs_dict) + if self.inraw or ((state is None or state == 'off') and not self.mdstack): # fall back to default behavior - attrs.pop('markdown', None) - super().handle_starttag(tag, attrs) + attrs_dict.pop('markdown', None) + super().handle_starttag(tag, attrs_dict) # type: ignore[arg-type] else: if 'p' in self.mdstack and tag in self.block_level_tags: # Close unclosed 'p' tag self.handle_endtag('p') self.mdstate.append(state) self.mdstack.append(tag) - attrs['markdown'] = state - self.treebuilder.start(tag, attrs) + attrs_dict['markdown'] = state # type: ignore[assignment] + self.treebuilder.start(tag, attrs_dict) else: # Span level tag if self.inraw: @@ -138,7 +142,7 @@ def handle_starttag(self, tag, attrs): # This is presumably a standalone tag in a code span (see #1036). self.clear_cdata_mode() - def handle_endtag(self, tag): + def handle_endtag(self, tag: str) -> None: if tag in self.block_level_tags: if self.inraw: super().handle_endtag(tag) @@ -161,7 +165,7 @@ def handle_endtag(self, tag): self.cleandoc.append('\n') self.cleandoc.append(self.md.htmlStash.store(element)) self.cleandoc.append('\n\n') - self.state = [] + self.state: list = [] # Check if element has a tail if not blank_line_re.match( self.rawdata[self.line_offset + self.offset + len(self.get_endtag_text(tag)):]): @@ -185,12 +189,12 @@ def handle_endtag(self, tag): else: self.handle_data(text) - def handle_startendtag(self, tag, attrs): + def handle_startendtag(self, tag: str, attrs: Sequence[tuple[str, str]]) -> None: if tag in self.empty_tags: - attrs = {key: value if value is not None else key for key, value in attrs} - if "markdown" in attrs: - attrs.pop('markdown') - element = etree.Element(tag, attrs) + attrs_dict = {key: value if value is not None else key for key, value in attrs} + if "markdown" in attrs_dict: + attrs_dict.pop('markdown') + element = etree.Element(tag, attrs_dict) data = etree.tostring(element, encoding='unicode', method='html') else: data = self.get_starttag_text() @@ -198,7 +202,7 @@ def handle_startendtag(self, tag, attrs): data = self.get_starttag_text() self.handle_empty_tag(data, is_block=self.md.is_block_level(tag)) - def handle_data(self, data): + def handle_data(self, data: str) -> None: if self.intail and '\n' in data: self.intail = False if self.inraw or not self.mdstack: @@ -206,7 +210,7 @@ def handle_data(self, data): else: self.treebuilder.data(data) - def handle_empty_tag(self, data, is_block): + def handle_empty_tag(self, data: str, is_block: bool) -> None: if self.inraw or not self.mdstack: super().handle_empty_tag(data, is_block) else: @@ -215,7 +219,7 @@ def handle_empty_tag(self, data, is_block): else: self.handle_data(self.md.htmlStash.store(data)) - def parse_pi(self, i): + def parse_pi(self, i: int) -> int: if self.at_line_start() or self.intail or self.mdstack: # The same override exists in `HTMLExtractor` without the check # for `mdstack`. Therefore, use parent of `HTMLExtractor` instead. @@ -225,7 +229,7 @@ def parse_pi(self, i): self.handle_data('<?') return i + 2 - def parse_html_declaration(self, i): + def parse_html_declaration(self, i: int) -> int: if self.at_line_start() or self.intail or self.mdstack: # The same override exists in `HTMLExtractor` without the check # for `mdstack`. Therefore, use parent of `HTMLExtractor` instead. @@ -239,7 +243,9 @@ def parse_html_declaration(self, i): class HtmlBlockPreprocessor(Preprocessor): """Remove html blocks from the text and store them for later retrieval.""" - def run(self, lines): + md: Markdown + + def run(self, lines: list[str]) -> list[str]: source = '\n'.join(lines) parser = HTMLExtractorExtra(self.md) parser.feed(source) @@ -250,11 +256,11 @@ def run(self, lines): class MarkdownInHtmlProcessor(BlockProcessor): """Process Markdown Inside HTML Blocks which have been stored in the `HtmlStash`.""" - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: # Always return True. `run` will return `False` it not a valid match. return True - def parse_element_content(self, element): + def parse_element_content(self, element: etree.Element) -> None: """ Recursively parse the text content of an `etree` Element as Markdown. @@ -324,7 +330,7 @@ def parse_element_content(self, element): if child.tail: child.tail = util.AtomicString(child.tail) - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> bool: m = util.HTML_PLACEHOLDER_RE.match(blocks[0]) if m: index = int(m.group(1)) @@ -344,7 +350,7 @@ def run(self, parent, blocks): class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor): - def stash_to_string(self, text): + def stash_to_string(self, text: str | etree.Element) -> str: """ Override default to handle any `etree` elements still in the stash. """ if isinstance(text, etree.Element): return self.md.serializer(text) @@ -355,7 +361,7 @@ def stash_to_string(self, text): class MarkdownInHtmlExtension(Extension): """Add Markdown parsing in HTML to Markdown class.""" - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Register extension instances. """ # Replace raw HTML preprocessor diff --git a/markdown/extensions/meta.py b/markdown/extensions/meta.py index 82179273..32b6c76c 100644 --- a/markdown/extensions/meta.py +++ b/markdown/extensions/meta.py @@ -25,6 +25,10 @@ from ..preprocessors import Preprocessor import re import logging +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown log = logging.getLogger('MARKDOWN') @@ -38,7 +42,7 @@ class MetaExtension (Extension): """ Meta-Data extension for Python-Markdown. """ - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add `MetaPreprocessor` to Markdown instance. """ md.registerExtension(self) self.md = md @@ -51,9 +55,11 @@ def reset(self) -> None: class MetaPreprocessor(Preprocessor): """ Get Meta-Data. """ - def run(self, lines): + md: Markdown + + def run(self, lines: list[str]) -> list[str]: """ Parse Meta-Data and store in Markdown.Meta. """ - meta = {} + meta: dict[str, Any] = {} key = None if lines and BEGIN_RE.match(lines[0]): lines.pop(0) diff --git a/markdown/extensions/nl2br.py b/markdown/extensions/nl2br.py index 177df1ee..eb715def 100644 --- a/markdown/extensions/nl2br.py +++ b/markdown/extensions/nl2br.py @@ -25,13 +25,17 @@ from . import Extension from ..inlinepatterns import SubstituteTagInlineProcessor +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown BR_RE = r'\n' class Nl2BrExtension(Extension): - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add a `SubstituteTagInlineProcessor` to Markdown. """ br_tag = SubstituteTagInlineProcessor(BR_RE, 'br') md.inlinePatterns.register(br_tag, 'nl', 5) diff --git a/markdown/extensions/sane_lists.py b/markdown/extensions/sane_lists.py index 305bd992..cf2b5ccc 100644 --- a/markdown/extensions/sane_lists.py +++ b/markdown/extensions/sane_lists.py @@ -24,6 +24,11 @@ from . import Extension from ..blockprocessors import OListProcessor, UListProcessor import re +from typing import TYPE_CHECKING + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown + from .. import blockparser class SaneOListProcessor(OListProcessor): @@ -34,7 +39,7 @@ class SaneOListProcessor(OListProcessor): LAZY_OL = False """ Disable lazy list behavior. """ - def __init__(self, parser): + def __init__(self, parser: blockparser.BlockParser): super().__init__(parser) self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.))[ ]+(.*)' % (self.tab_length - 1)) @@ -46,7 +51,7 @@ class SaneUListProcessor(UListProcessor): SIBLING_TAGS = ['ul'] """ Exclude `ol` from list of siblings. """ - def __init__(self, parser): + def __init__(self, parser: blockparser.BlockParser): super().__init__(parser) self.CHILD_RE = re.compile(r'^[ ]{0,%d}(([*+-]))[ ]+(.*)' % (self.tab_length - 1)) @@ -55,7 +60,7 @@ def __init__(self, parser): class SaneListExtension(Extension): """ Add sane lists to Markdown. """ - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Override existing Processors. """ md.parser.blockprocessors.register(SaneOListProcessor(md.parser), 'olist', 40) md.parser.blockprocessors.register(SaneUListProcessor(md.parser), 'ulist', 30) diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py index e480df4f..d1198afd 100644 --- a/markdown/extensions/smarty.py +++ b/markdown/extensions/smarty.py @@ -86,15 +86,17 @@ from __future__ import annotations +import re from . import Extension from ..inlinepatterns import HtmlInlineProcessor, HTML_RE from ..treeprocessors import InlineProcessor from ..util import Registry from markdown import Markdown -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Sequence if TYPE_CHECKING: # pragma: no cover from .. import inlinepatterns + import xml.etree.ElementTree as etree # Constants for quote education. punctClass = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" @@ -159,13 +161,13 @@ class SubstituteTextPattern(HtmlInlineProcessor): - def __init__(self, pattern, replace, md): + def __init__(self, pattern: str, replace: Sequence[int | str | etree.Element], md: Markdown): """ Replaces matches with some text. """ HtmlInlineProcessor.__init__(self, pattern) self.replace = replace self.md = md - def handleMatch(self, m, data): + def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: result = '' for part in self.replace: if isinstance(part, int): @@ -177,7 +179,7 @@ def handleMatch(self, m, data): class SmartyExtension(Extension): """ Add Smarty to Markdown. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.config = { 'smart_quotes': [True, 'Educate quotes'], 'smart_angled_quotes': [False, 'Educate angled quotes'], @@ -187,17 +189,22 @@ def __init__(self, **kwargs): } """ Default configuration options. """ super().__init__(**kwargs) - self.substitutions = dict(substitutions) + self.substitutions: dict[str, str] = dict(substitutions) self.substitutions.update(self.getConfig('substitutions', default={})) - def _addPatterns(self, md, patterns, serie, priority): - for ind, pattern in enumerate(patterns): - pattern += (md,) - pattern = SubstituteTextPattern(*pattern) + def _addPatterns( + self, + md: Markdown, + patterns: Sequence[tuple[str, Sequence[int | str | etree.Element]]], + serie: str, + priority: int, + ) -> None: + for ind, pattern_parts in enumerate(patterns): + pattern = SubstituteTextPattern(*pattern_parts, md) name = 'smarty-%s-%d' % (serie, ind) self.inlinePatterns.register(pattern, name, priority-ind) - def educateDashes(self, md) -> None: + def educateDashes(self, md: Markdown) -> None: emDashesPattern = SubstituteTextPattern( r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md ) @@ -207,13 +214,13 @@ def educateDashes(self, md) -> None: self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50) self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45) - def educateEllipses(self, md) -> None: + def educateEllipses(self, md: Markdown) -> None: ellipsesPattern = SubstituteTextPattern( r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md ) self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10) - def educateAngledQuotes(self, md) -> None: + def educateAngledQuotes(self, md: Markdown) -> None: leftAngledQuotePattern = SubstituteTextPattern( r'\<\<', (self.substitutions['left-angle-quote'],), md ) @@ -223,7 +230,7 @@ def educateAngledQuotes(self, md) -> None: self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40) self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35) - def educateQuotes(self, md) -> None: + def educateQuotes(self, md: Markdown) -> None: lsquo = self.substitutions['left-single-quote'] rsquo = self.substitutions['right-single-quote'] ldquo = self.substitutions['left-double-quote'] @@ -245,7 +252,7 @@ def educateQuotes(self, md) -> None: ) self._addPatterns(md, patterns, 'quotes', 30) - def extendMarkdown(self, md: Markdown): + def extendMarkdown(self, md: Markdown) -> None: configs = self.getConfigs() self.inlinePatterns: Registry[inlinepatterns.InlineProcessor] = Registry() if configs['smart_ellipses']: diff --git a/markdown/extensions/tables.py b/markdown/extensions/tables.py index a9e5f13d..b1bc971c 100644 --- a/markdown/extensions/tables.py +++ b/markdown/extensions/tables.py @@ -25,6 +25,12 @@ from ..blockprocessors import BlockProcessor import xml.etree.ElementTree as etree import re +from typing import TYPE_CHECKING, Any, Sequence + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown + from .. import blockprocessors + PIPE_NONE = 0 PIPE_LEFT = 1 PIPE_RIGHT = 2 @@ -36,14 +42,14 @@ class TableProcessor(BlockProcessor): RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))') RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$') - def __init__(self, parser, config): - self.border = False - self.separator = '' + def __init__(self, parser: blockprocessors.BlockParser, config: dict[str, Any]): + self.border: bool | int = False + self.separator: Sequence[str] = '' self.config = config super().__init__(parser) - def test(self, parent, block): + def test(self, parent: etree.Element, block: str) -> bool: """ Ensure first two rows (column header and separator row) are valid table rows. @@ -79,14 +85,14 @@ def test(self, parent, block): return is_table - def run(self, parent, blocks): + def run(self, parent: etree.Element, blocks: list[str]) -> None: """ Parse a table block and build table. """ block = blocks.pop(0).split('\n') header = block[0].strip(' ') rows = [] if len(block) < 3 else block[2:] # Get alignment of columns - align = [] + align: list[str | None] = [] for c in self.separator: c = c.strip(' ') if c.startswith(':') and c.endswith(':'): @@ -110,7 +116,7 @@ def run(self, parent, blocks): for row in rows: self._build_row(row.strip(' '), tbody, align) - def _build_empty_row(self, parent, align): + def _build_empty_row(self, parent: etree.Element, align: Sequence[str | None]) -> None: """Build an empty row.""" tr = etree.SubElement(parent, 'tr') count = len(align) @@ -118,7 +124,7 @@ def _build_empty_row(self, parent, align): etree.SubElement(tr, 'td') count -= 1 - def _build_row(self, row, parent, align): + def _build_row(self, row: str, parent: etree.Element, align: Sequence[str | None]) -> None: """ Given a row of text, build table cells. """ tr = etree.SubElement(parent, 'tr') tag = 'td' @@ -139,7 +145,7 @@ def _build_row(self, row, parent, align): else: c.set('style', f'text-align: {a};') - def _split_row(self, row): + def _split_row(self, row: str) -> list[str]: """ split a row of text into list of cells. """ if self.border: if row.startswith('|'): @@ -147,7 +153,7 @@ def _split_row(self, row): row = self.RE_END_BORDER.sub('', row) return self._split(row) - def _split(self, row): + def _split(self, row: str) -> list[str]: """ split a row of text with some code into a list of cells. """ elements = [] pipes = [] @@ -223,7 +229,7 @@ def _split(self, row): class TableExtension(Extension): """ Add tables to Markdown. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.config = { 'use_align_attribute': [False, 'True to use align attribute instead of style.'], } @@ -231,7 +237,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add an instance of `TableProcessor` to `BlockParser`. """ if '|' not in md.ESCAPED_CHARS: md.ESCAPED_CHARS.append('|') diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index 64c20c80..b34d6606 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -27,9 +27,14 @@ import html import unicodedata import xml.etree.ElementTree as etree +from typing import TYPE_CHECKING, Any, Iterator, MutableSet, TypedDict +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown + from .. import treeprocessors -def slugify(value, separator, unicode=False): + +def slugify(value: str, separator: str, unicode: bool = False) -> str: """ Slugify a string, to make it URL friendly. """ if not unicode: # Replace Extended Latin characters with ASCII, i.e. `žlutý` => `zluty` @@ -39,7 +44,7 @@ def slugify(value, separator, unicode=False): return re.sub(r'[{}\s]+'.format(separator), separator, value) -def slugify_unicode(value, separator): +def slugify_unicode(value: str, separator: str) -> str: """ Slugify a string, to make it URL friendly while preserving Unicode characters. """ return slugify(value, separator, unicode=True) @@ -47,7 +52,7 @@ def slugify_unicode(value, separator): IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$') -def unique(id, ids): +def unique(id: str, ids: MutableSet[str]) -> str: """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """ while id in ids or not id: m = IDCOUNT_RE.match(id) @@ -59,7 +64,7 @@ def unique(id, ids): return id -def get_name(el): +def get_name(el: etree.Element) -> str: """Get title name.""" text = [] @@ -71,14 +76,15 @@ def get_name(el): return ''.join(text).strip() -def stashedHTML2text(text, md, strip_entities: bool = True): +def stashedHTML2text(text: str, md: Markdown, strip_entities: bool = True) -> str: """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """ - def _html_sub(m): + def _html_sub(m: re.Match[str]) -> str: """ Substitute raw html with plain text. """ try: raw = md.htmlStash.rawHtmlBlocks[int(m.group(1))] except (IndexError, TypeError): # pragma: no cover return m.group(0) + assert isinstance(raw, str) # Strip out tags and/or entities - leaving text res = re.sub(r'(<[^>]+>)', '', raw) if strip_entities: @@ -88,13 +94,23 @@ def _html_sub(m): return HTML_PLACEHOLDER_RE.sub(_html_sub, text) -def unescape(text): +def unescape(text: str) -> str: """ Unescape escaped text. """ c = UnescapeTreeprocessor() return c.unescape(text) -def nest_toc_tokens(toc_list): +if TYPE_CHECKING: + class FlatTocToken(TypedDict): + level: int + id: str + name: str + + class TocToken(FlatTocToken): + children: list[TocToken] + + +def nest_toc_tokens(toc_list: list[FlatTocToken]) -> list[TocToken]: """Given an unsorted list with errors and skips, return a nested one. [{'level': 1}, {'level': 2}] @@ -111,15 +127,15 @@ def nest_toc_tokens(toc_list): ordered_list = [] if len(toc_list): # Initialize everything by processing the first entry - last = toc_list.pop(0) + last: TocToken = toc_list.pop(0) # type: ignore[assignment] last['children'] = [] levels = [last['level']] ordered_list.append(last) - parents = [] + parents: list[TocToken] = [] # Walk the rest nesting the entries properly while toc_list: - t = toc_list.pop(0) + t: TocToken = toc_list.pop(0) # type: ignore[assignment] current_level = t['level'] t['children'] = [] @@ -162,24 +178,28 @@ def nest_toc_tokens(toc_list): class TocTreeprocessor(Treeprocessor): """ Step through document and build TOC. """ - def __init__(self, md, config): + md: Markdown + + def __init__(self, md: Markdown, config: dict[str, Any]): super().__init__(md) - self.marker = config["marker"] - self.title = config["title"] + self.marker: str = config["marker"] + self.title: str = config["title"] self.base_level = int(config["baselevel"]) - 1 self.slugify = config["slugify"] self.sep = config["separator"] self.toc_class = config["toc_class"] - self.title_class = config["title_class"] - self.use_anchors = parseBoolValue(config["anchorlink"]) - self.anchorlink_class = config["anchorlink_class"] - self.use_permalinks = parseBoolValue(config["permalink"], False) - if self.use_permalinks is None: + self.title_class: str = config["title_class"] + self.use_anchors: bool = parseBoolValue(config["anchorlink"]) + self.anchorlink_class: str = config["anchorlink_class"] + use_permalinks = parseBoolValue(config["permalink"], False) + if use_permalinks is None: self.use_permalinks = config["permalink"] - self.permalink_class = config["permalink_class"] - self.permalink_title = config["permalink_title"] - self.permalink_leading = parseBoolValue(config["permalink_leading"], False) + else: + self.use_permalinks = use_permalinks + self.permalink_class: str = config["permalink_class"] + self.permalink_title: str = config["permalink_title"] + self.permalink_leading: bool | None = parseBoolValue(config["permalink_leading"], False) self.header_rgx = re.compile("[Hh][123456]") if isinstance(config["toc_depth"], str) and '-' in config["toc_depth"]: self.toc_top, self.toc_bottom = [int(x) for x in config["toc_depth"].split('-')] @@ -187,7 +207,7 @@ def __init__(self, md, config): self.toc_top = 1 self.toc_bottom = int(config["toc_depth"]) - def iterparent(self, node): + def iterparent(self, node: etree.Element) -> Iterator[tuple[etree.Element, etree.Element]]: """ Iterator wrapper to get allowed parent and child all at once. """ # We do not allow the marker inside a header as that @@ -198,7 +218,7 @@ def iterparent(self, node): yield node, child yield from self.iterparent(child) - def replace_marker(self, root, elem) -> None: + def replace_marker(self, root: etree.Element, elem: etree.Element) -> None: """ Replace marker with elem. """ for (p, c) in self.iterparent(root): text = ''.join(c.itertext()).strip() @@ -219,14 +239,14 @@ def replace_marker(self, root, elem) -> None: p[i] = elem break - def set_level(self, elem) -> None: + def set_level(self, elem: etree.Element) -> None: """ Adjust header level according to base level. """ level = int(elem.tag[-1]) + self.base_level if level > 6: level = 6 elem.tag = 'h%d' % level - def add_anchor(self, c, elem_id) -> None: + def add_anchor(self, c: etree.Element, elem_id: str) -> None: anchor = etree.Element("a") anchor.text = c.text anchor.attrib["href"] = "#" + elem_id @@ -238,7 +258,7 @@ def add_anchor(self, c, elem_id) -> None: c.remove(c[0]) c.append(anchor) - def add_permalink(self, c, elem_id) -> None: + def add_permalink(self, c: etree.Element, elem_id: str) -> None: permalink = etree.Element("a") permalink.text = ("%spara;" % AMP_SUBSTITUTE if self.use_permalinks is True @@ -254,7 +274,7 @@ def add_permalink(self, c, elem_id) -> None: else: c.append(permalink) - def build_toc_div(self, toc_list): + def build_toc_div(self, toc_list: list[TocToken]) -> etree.Element: """ Return a string div given a toc list. """ div = etree.Element("div") div.attrib["class"] = self.toc_class @@ -266,7 +286,7 @@ def build_toc_div(self, toc_list): header.attrib["class"] = self.title_class header.text = self.title - def build_etree_ul(toc_list, parent): + def build_etree_ul(toc_list: list[TocToken], parent: etree.Element) -> etree.Element: ul = etree.SubElement(parent, "ul") for item in toc_list: # List item link, to be inserted into the toc div @@ -281,18 +301,20 @@ def build_etree_ul(toc_list, parent): build_etree_ul(toc_list, div) if 'prettify' in self.md.treeprocessors: - self.md.treeprocessors['prettify'].run(div) + prettify_processor: treeprocessors.PrettifyTreeprocessor + prettify_processor = self.md.treeprocessors['prettify'] # type: ignore[assignment] + prettify_processor.run(div) return div - def run(self, doc): + def run(self, doc: etree.Element) -> None: # Get a list of id attributes used_ids = set() for el in doc.iter(): if "id" in el.attrib: used_ids.add(el.attrib["id"]) - toc_tokens = [] + flat_toc_tokens: list[FlatTocToken] = [] for el in doc.iter(): if isinstance(el.tag, str) and self.header_rgx.match(el.tag): self.set_level(el) @@ -304,7 +326,7 @@ def run(self, doc): el.attrib["id"] = unique(self.slugify(innertext, self.sep), used_ids) if int(el.tag[-1]) >= self.toc_top and int(el.tag[-1]) <= self.toc_bottom: - toc_tokens.append({ + flat_toc_tokens.append({ 'level': int(el.tag[-1]), 'id': el.attrib["id"], 'name': unescape(stashedHTML2text( @@ -322,7 +344,7 @@ def run(self, doc): if self.use_permalinks not in [False, None]: self.add_permalink(el, el.attrib["id"]) - toc_tokens = nest_toc_tokens(toc_tokens) + toc_tokens = nest_toc_tokens(flat_toc_tokens) div = self.build_toc_div(toc_tokens) if self.marker: self.replace_marker(doc, div) @@ -339,7 +361,7 @@ class TocExtension(Extension): TreeProcessorClass = TocTreeprocessor - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.config = { 'marker': [ '[TOC]', @@ -391,7 +413,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: """ Add TOC tree processor to Markdown. """ md.registerExtension(self) self.md = md diff --git a/markdown/extensions/wikilinks.py b/markdown/extensions/wikilinks.py index 9d5acfa3..ed8e7335 100644 --- a/markdown/extensions/wikilinks.py +++ b/markdown/extensions/wikilinks.py @@ -25,9 +25,13 @@ from ..inlinepatterns import InlineProcessor import xml.etree.ElementTree as etree import re +from typing import TYPE_CHECKING, Any +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown -def build_url(label, base, end): + +def build_url(label: str, base: str, end: str) -> str: """ Build a URL from the label, a base, and an end. """ clean_label = re.sub(r'([ ]+_)|(_[ ]+)|([ ]+)', '_', label) return '{}{}{}'.format(base, clean_label, end) @@ -36,7 +40,7 @@ def build_url(label, base, end): class WikiLinkExtension(Extension): """ Add inline processor to Markdown. """ - def __init__(self, **kwargs): + def __init__(self, **kwargs) -> None: self.config = { 'base_url': ['/', 'String to append to beginning or URL.'], 'end_url': ['/', 'String to append to end of URL.'], @@ -46,7 +50,7 @@ def __init__(self, **kwargs): """ Default configuration options. """ super().__init__(**kwargs) - def extendMarkdown(self, md): + def extendMarkdown(self, md: Markdown) -> None: self.md = md # append to end of inline patterns @@ -59,11 +63,14 @@ def extendMarkdown(self, md): class WikiLinksInlineProcessor(InlineProcessor): """ Build link from `wikilink`. """ - def __init__(self, pattern, config): + md: Markdown + + def __init__(self, pattern: str, config: dict[str, Any]): super().__init__(pattern) self.config = config - def handleMatch(self, m, data): + def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str, int, int]: + a: etree.Element | str if m.group(1).strip(): base_url, end_url, html_class = self._getMeta() label = m.group(1).strip() @@ -77,7 +84,7 @@ def handleMatch(self, m, data): a = '' return a, m.start(0), m.end(0) - def _getMeta(self): + def _getMeta(self) -> tuple[str, str, str]: """ Return meta data or `config` data. """ base_url = self.config['base_url'] end_url = self.config['end_url'] diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py index f68d064c..faef4daa 100644 --- a/markdown/htmlparser.py +++ b/markdown/htmlparser.py @@ -28,7 +28,10 @@ import re import importlib.util import sys -from typing import Any +from typing import TYPE_CHECKING, Any, Sequence + +if TYPE_CHECKING: # pragma: no cover + from markdown import Markdown # Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it. @@ -79,7 +82,9 @@ class HTMLExtractor(htmlparser.HTMLParser): is stored in `cleandoc` as a list of strings. """ - def __init__(self, md, *args, **kwargs): + md: Markdown + + def __init__(self, md: Markdown, *args, **kwargs): if 'convert_charrefs' not in kwargs: kwargs['convert_charrefs'] = False @@ -92,18 +97,18 @@ def __init__(self, md, *args, **kwargs): super().__init__(*args, **kwargs) self.md = md - def reset(self): + def reset(self) -> None: """Reset this instance. Loses all unprocessed data.""" self.inraw = False self.intail = False - self.stack = [] # When `inraw==True`, stack contains a list of tags - self._cache = [] - self.cleandoc = [] + self.stack: list[str] = [] # When `inraw==True`, stack contains a list of tags + self._cache: list[str] = [] + self.cleandoc: list[str] = [] self.lineno_start_cache = [0] super().reset() - def close(self): + def close(self) -> None: """Handle any buffered data.""" super().close() if len(self.rawdata): @@ -159,7 +164,7 @@ def get_endtag_text(self, tag: str) -> str: # Failed to extract from raw data. Assume well formed and lowercase. return '</{}>'.format(tag) - def handle_starttag(self, tag: str, attrs: list[tuple[str, str]]): + def handle_starttag(self, tag: str, attrs: Sequence[tuple[str, str]]) -> None: # Handle tags that should always be empty and do not specify a closing tag if tag in self.empty_tags: self.handle_startendtag(tag, attrs) @@ -180,7 +185,7 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str]]): # This is presumably a standalone tag in a code span (see #1036). self.clear_cdata_mode() - def handle_endtag(self, tag: str): + def handle_endtag(self, tag: str) -> None: text = self.get_endtag_text(tag) if self.inraw: @@ -207,7 +212,7 @@ def handle_endtag(self, tag: str): else: self.cleandoc.append(text) - def handle_data(self, data: str): + def handle_data(self, data: str) -> None: if self.intail and '\n' in data: self.intail = False if self.inraw: @@ -215,7 +220,7 @@ def handle_data(self, data: str): else: self.cleandoc.append(data) - def handle_empty_tag(self, data: str, is_block: bool): + def handle_empty_tag(self, data: str, is_block: bool) -> None: """ Handle empty tags (`<data>`). """ if self.inraw or self.intail: # Append this to the existing raw block @@ -238,25 +243,25 @@ def handle_empty_tag(self, data: str, is_block: bool): else: self.cleandoc.append(data) - def handle_startendtag(self, tag: str, attrs: list[tuple[str, str]]): + def handle_startendtag(self, tag: str, attrs: Sequence[tuple[str, str]]) -> None: self.handle_empty_tag(self.get_starttag_text(), is_block=self.md.is_block_level(tag)) - def handle_charref(self, name: str): + def handle_charref(self, name: str) -> None: self.handle_empty_tag('&#{};'.format(name), is_block=False) - def handle_entityref(self, name: str): + def handle_entityref(self, name: str) -> None: self.handle_empty_tag('&{};'.format(name), is_block=False) - def handle_comment(self, data: str): + def handle_comment(self, data: str) -> None: self.handle_empty_tag('<!--{}-->'.format(data), is_block=True) - def handle_decl(self, data: str): + def handle_decl(self, data: str) -> None: self.handle_empty_tag('<!{}>'.format(data), is_block=True) - def handle_pi(self, data: str): + def handle_pi(self, data: str) -> None: self.handle_empty_tag('<?{}?>'.format(data), is_block=True) - def unknown_decl(self, data: str): + def unknown_decl(self, data: str) -> None: end = ']]>' if data.startswith('CDATA[') else ']>' self.handle_empty_tag('<![{}{}'.format(data, end), is_block=True) diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py index 23b39079..4becb298 100644 --- a/markdown/inlinepatterns.py +++ b/markdown/inlinepatterns.py @@ -253,15 +253,16 @@ def unescape(self, text: str) -> str: except KeyError: # pragma: no cover return text - def get_stash(m): + def get_stash(m: re.Match[str]) -> str: id = m.group(1) if id in stash: - value = stash.get(id) + value = stash[id] if isinstance(value, str): return value else: # An `etree` Element - return text content only return ''.join(value.itertext()) + return '' return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) @@ -443,7 +444,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, class BacktickInlineProcessor(InlineProcessor): """ Return a `<code>` element containing the escaped matching text. """ - def __init__(self, pattern): + def __init__(self, pattern: str): InlineProcessor.__init__(self, pattern) self.ESCAPED_BSLASH = '{}{}{}'.format(util.STX, ord('\\'), util.ETX) self.tag = 'code' @@ -519,32 +520,38 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]: place_holder = self.md.htmlStash.store(rawhtml) return place_holder, m.start(0), m.end(0) - def unescape(self, text): + def unescape(self, text: str) -> str: """ Return unescaped text given text with an inline placeholder. """ try: - stash = self.md.treeprocessors['inline'].stashed_nodes + inlineprocessor: treeprocessors.InlineProcessor + inlineprocessor = self.md.treeprocessors['inline'] # type: ignore[assignment] + stash = inlineprocessor.stashed_nodes except KeyError: # pragma: no cover return text - def get_stash(m): + def get_stash(m: re.Match[str]) -> str: id = m.group(1) value = stash.get(id) if value is not None: try: + assert isinstance(value, etree.Element) return self.md.serializer(value) except Exception: return r'\%s' % value + return '' return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text) - def backslash_unescape(self, text): + def backslash_unescape(self, text: str) -> str: """ Return text with backslash escapes undone (backslashes are restored). """ try: - RE = self.md.treeprocessors['unescape'].RE + unescape_processor: treeprocessors.UnescapeTreeprocessor + unescape_processor = self.md.treeprocessors['unescape'] # type: ignore[assignment] + RE = unescape_processor.RE except KeyError: # pragma: no cover return text - def _unescape(m): + def _unescape(m: re.Match[str]) -> str: return chr(int(m.group(1))) return RE.sub(_unescape, text) @@ -562,14 +569,14 @@ class AsteriskProcessor(InlineProcessor): ] """ The various strong and emphasis patterns handled by this processor. """ - def build_single(self, m, tag, idx): + def build_single(self, m: re.Match[str], tag: str, idx: int) -> etree.Element: """Return single tag.""" el1 = etree.Element(tag) text = m.group(2) self.parse_sub_patterns(text, el1, None, idx) return el1 - def build_double(self, m, tags, idx): + def build_double(self, m: re.Match[str], tags: str, idx: int) -> etree.Element: """Return double tag.""" tag1, tag2 = tags.split(",") @@ -583,7 +590,7 @@ def build_double(self, m, tags, idx): self.parse_sub_patterns(text, el1, el2, idx) return el1 - def build_double2(self, m, tags, idx): + def build_double2(self, m: re.Match[str], tags: str, idx: int) -> etree.Element: """Return double tags (variant 2): `<strong>text <em>text</em></strong>`.""" tag1, tag2 = tags.split(",") @@ -596,22 +603,19 @@ def build_double2(self, m, tags, idx): self.parse_sub_patterns(text, el2, None, idx) return el1 - def parse_sub_patterns(self, data, parent, last, idx) -> None: + def parse_sub_patterns( + self, data: str, parent: etree.Element, last: etree.Element | None, idx: int + ) -> None: """ Parses sub patterns. - `data` (`str`): - text to evaluate. - - `parent` (`etree.Element`): - Parent to attach text and sub elements to. + `data`: text to evaluate. - `last` (`etree.Element`): - Last appended child to parent. Can also be None if parent has no children. + `parent`: Parent to attach text and sub elements to. - `idx` (`int`): - Current pattern index that was used to evaluate the parent. + `last`: Last appended child to parent. Can also be None if parent has no children. + `idx`: Current pattern index that was used to evaluate the parent. """ offset = 0 @@ -660,7 +664,7 @@ def parse_sub_patterns(self, data, parent, last, idx) -> None: else: parent.text = text - def build_element(self, m, builder, tags, index): + def build_element(self, m: re.Match[str], builder: str, tags: str, index: int) -> etree.Element: """Element builder.""" if builder == 'double2': @@ -726,11 +730,11 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None return el, m.start(0), index - def getLink(self, data, index): + def getLink(self, data: str, index: int) -> tuple[str, str | None, int, bool]: """Parse data between `()` of `[Text]()` allowing recursive `()`. """ href = '' - title = None + title: str | None = None handled = False m = self.RE_LINK.match(data, pos=index) @@ -750,7 +754,7 @@ def getLink(self, data, index): last_bracket = -1 # Primary (first found) quote tracking. - quote = None + quote: str | None = None start_quote = -1 exit_quote = -1 ignore_matches = False @@ -842,7 +846,7 @@ def getLink(self, data, index): return href, title, index, handled - def getText(self, data, index): + def getText(self, data: str, index: int) -> tuple[str, int, bool]: """Parse the content between `[]` of the start of an image or link resolving nested square brackets. @@ -906,6 +910,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None id, end, handled = self.evalId(data, index, text) if not handled: return None, None, None + assert id is not None # Clean up line breaks in id id = self.NEWLINE_CLEANUP_RE.sub(' ', id) @@ -916,7 +921,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None return self.makeTag(href, title, text), m.start(0), end - def evalId(self, data, index, text): + def evalId(self, data: str, index: int, text: str) -> tuple[str | None, int, bool]: """ Evaluate the id portion of `[ref][id]`. @@ -946,7 +951,7 @@ def makeTag(self, href: str, title: str, text: str) -> etree.Element: class ShortReferenceInlineProcessor(ReferenceInlineProcessor): """Short form of reference: `[google]`. """ - def evalId(self, data, index, text): + def evalId(self, data: str, index: int, text: str) -> tuple[str, int, bool]: """Evaluate the id of `[ref]`. """ return text.lower(), index, True @@ -966,7 +971,7 @@ def makeTag(self, href: str, title: str, text: str) -> etree.Element: class ShortImageReferenceInlineProcessor(ImageReferenceInlineProcessor): """ Short form of image reference: `![ref]`. """ - def evalId(self, data, index, text): + def evalId(self, data: str, index: int, text: str) -> tuple[str, int, bool]: """Evaluate the id of `[ref]`. """ return text.lower(), index, True @@ -993,7 +998,7 @@ def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, if email.startswith("mailto:"): email = email[len("mailto:"):] - def codepoint2name(code): + def codepoint2name(code: int) -> str: """Return entity definition by code, or the code if not defined.""" entity = entities.codepoint2name.get(code) if entity: diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py index e558a23b..f05dc448 100644 --- a/markdown/postprocessors.py +++ b/markdown/postprocessors.py @@ -73,17 +73,18 @@ class RawHtmlPostprocessor(Postprocessor): md: Markdown - def run(self, text: str): + def run(self, text: str) -> str: """ Iterate over html stash and restore html. """ replacements = OrderedDict() for i in range(self.md.htmlStash.html_counter): - html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) + raw: str = self.md.htmlStash.rawHtmlBlocks[i] # type: ignore[assignment] + html = self.stash_to_string(raw) if self.isblocklevel(html): replacements["<p>{}</p>".format( self.md.htmlStash.get_placeholder(i))] = html replacements[self.md.htmlStash.get_placeholder(i)] = html - def substitute_match(m): + def substitute_match(m: re.Match[str]) -> str: key = m.group(0) if key not in replacements: @@ -124,7 +125,7 @@ def stash_to_string(self, text: str) -> str: class AndSubstitutePostprocessor(Postprocessor): """ Restore valid entities """ - def run(self, text): + def run(self, text: str) -> str: text = text.replace(util.AMP_SUBSTITUTE, "&") return text @@ -138,8 +139,8 @@ class UnescapePostprocessor(Postprocessor): RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX)) - def unescape(self, m): + def unescape(self, m: re.Match[str]) -> str: return chr(int(m.group(1))) - def run(self, text): + def run(self, text: str) -> str: return self.RE.sub(self.unescape, text) diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py index a49ae2c2..25cf86de 100644 --- a/markdown/preprocessors.py +++ b/markdown/preprocessors.py @@ -85,6 +85,8 @@ class HtmlBlockPreprocessor(Preprocessor): [`Markdown`][markdown.Markdown] instance. """ + md: Markdown + def run(self, lines: list[str]) -> list[str]: source = '\n'.join(lines) parser = HTMLExtractor(self.md) diff --git a/markdown/serializers.py b/markdown/serializers.py index 468538e1..29ce305b 100644 --- a/markdown/serializers.py +++ b/markdown/serializers.py @@ -48,19 +48,20 @@ from xml.etree.ElementTree import ProcessingInstruction, Comment, ElementTree, Element, QName from xml.etree.ElementTree import HTML_EMPTY # type: ignore[attr-defined] import re +from typing import Callable, Iterable, Literal, NoReturn __all__ = ['to_html_string', 'to_xhtml_string'] RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|\#x[0-9a-f]+|[0-9a-z]+);)', re.I) -def _raise_serialization_error(text): # pragma: no cover +def _raise_serialization_error(text: str) -> NoReturn: # pragma: no cover raise TypeError( "cannot serialize {!r} (type {})".format(text, type(text).__name__) ) -def _escape_cdata(text): +def _escape_cdata(text) -> str: # escape character data try: # it's worth avoiding do-nothing calls for strings that are @@ -78,7 +79,7 @@ def _escape_cdata(text): _raise_serialization_error(text) -def _escape_attrib(text): +def _escape_attrib(text: str) -> str: # escape attribute value try: if "&" in text: @@ -97,7 +98,7 @@ def _escape_attrib(text): _raise_serialization_error(text) -def _escape_attrib_html(text): +def _escape_attrib_html(text: str) -> str: # escape attribute value try: if "&" in text: @@ -114,7 +115,7 @@ def _escape_attrib_html(text): _raise_serialization_error(text) -def _serialize_html(write, elem, format): +def _serialize_html(write: Callable[[str], None], elem: Element, format: Literal["html", "xhtml"]) -> None: tag = elem.tag text = elem.text if tag is Comment: @@ -122,7 +123,7 @@ def _serialize_html(write, elem, format): elif tag is ProcessingInstruction: write("<?%s?>" % _escape_cdata(text)) elif tag is None: - if text: + if text: # type: ignore[unreachable] write(_escape_cdata(text)) for e in elem: _serialize_html(write, e, format) @@ -135,7 +136,7 @@ def _serialize_html(write, elem, format): else: raise ValueError('QName objects must define a tag.') write("<" + tag) - items = elem.items() + items: Iterable[tuple[str, str]] = elem.items() if items: items = sorted(items) # lexical order for k, v in items: @@ -171,9 +172,9 @@ def _serialize_html(write, elem, format): write(_escape_cdata(elem.tail)) -def _write_html(root, format="html"): +def _write_html(root: Element, format: Literal["html", "xhtml"] = "html") -> str: assert root is not None - data = [] + data: list[str] = [] write = data.append _serialize_html(write, root, format) return "".join(data) diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py index 0e8d4d5d..e9eb190c 100644 --- a/markdown/treeprocessors.py +++ b/markdown/treeprocessors.py @@ -88,7 +88,7 @@ def __init__(self, md: Markdown): self.inlinePatterns = md.inlinePatterns self.ancestors: list[str] = [] - def __makePlaceholder(self, type) -> tuple[str, str]: + def __makePlaceholder(self, type: str) -> tuple[str, str]: """ Generate a placeholder """ id = "%04d" % len(self.stashed_nodes) hash = util.INLINE_PLACEHOLDER % id @@ -112,7 +112,7 @@ def __findPlaceholder(self, data: str, index: int) -> tuple[str | None, int]: else: return None, index + 1 - def __stashNode(self, node, type) -> str: + def __stashNode(self, node: etree.Element | str, type: str) -> str: """ Add node to stash. """ placeholder, id = self.__makePlaceholder(type) self.stashed_nodes[id] = node @@ -141,7 +141,7 @@ def __handleInline(self, data: str, patternIndex: int = 0) -> str: patternIndex += 1 return data - def __processElementText(self, node: etree.Element, subnode: etree.Element, isText: bool = True): + def __processElementText(self, node: etree.Element, subnode: etree.Element, isText: bool = True) -> None: """ Process placeholders in `Element.text` or `Element.tail` of Elements popped from `self.stashed_nodes`. @@ -188,7 +188,7 @@ def __processPlaceholders( List with `ElementTree` elements with applied inline patterns. """ - def linkText(text: str | None): + def linkText(text: str | None) -> None: if text: if result: if result[-1][0].tail: @@ -205,7 +205,7 @@ def linkText(text: str | None): parent.text += text else: parent.text = text - result = [] + result: list[tuple[etree.Element, list[str]]] = [] strartIndex = 0 while data: index = data.find(self.__placeholder_prefix, strartIndex) @@ -219,7 +219,7 @@ def linkText(text: str | None): text = data[strartIndex:index] linkText(text) - if not isString(node): # it's Element + if not isinstance(node, str): # it's Element for child in [node] + list(node): if child.tail: if child.tail.strip(): @@ -362,7 +362,7 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. An element tree object with applied inline patterns. """ - self.stashed_nodes: dict[str, etree.Element] = {} + self.stashed_nodes: dict[str, etree.Element | str] = {} # Ensure a valid parent list, but copy passed in lists # to ensure we don't have the user accidentally change it on us. @@ -419,7 +419,9 @@ def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree. class PrettifyTreeprocessor(Treeprocessor): """ Add line breaks to the html document. """ - def _prettifyETree(self, elem): + md: Markdown + + def _prettifyETree(self, elem: etree.Element) -> None: """ Recursively add line breaks to `ElementTree` children. """ i = "\n" @@ -460,13 +462,13 @@ class UnescapeTreeprocessor(Treeprocessor): RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX)) - def _unescape(self, m): + def _unescape(self, m: re.Match[str]) -> str: return chr(int(m.group(1))) def unescape(self, text: str) -> str: return self.RE.sub(self._unescape, text) - def run(self, root): + def run(self, root: etree.Element) -> None: """ Loop over all elements and unescape all text. """ for elem in root.iter(): # Unescape text content diff --git a/markdown/util.py b/markdown/util.py index 7e3405fc..09cd495a 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -29,10 +29,11 @@ import warnings from functools import wraps, lru_cache from itertools import count -from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, overload +from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload if TYPE_CHECKING: # pragma: no cover from markdown import Markdown + import xml.etree.ElementTree as etree _T = TypeVar('_T') @@ -175,15 +176,18 @@ def code_escape(text: str) -> str: return text -def _get_stack_depth(size=2): +def _get_stack_depth(size: int = 2) -> int: """Get current stack depth, performantly. """ frame = sys._getframe(size) for size in count(size): - frame = frame.f_back - if not frame: + next_frame = frame.f_back + if next_frame is None: return size + frame = next_frame + + raise RuntimeError("Could not get stack depth") def nearing_recursion_limit() -> bool: @@ -214,20 +218,28 @@ def __init__(self, md: Markdown | None = None): self.md = md +if TYPE_CHECKING: + class TagData(TypedDict): + tag: str + attrs: dict[str, str] + left_index: int + right_index: int + + class HtmlStash: """ This class is used for stashing HTML objects that we extract in the beginning and replace with place-holders. """ - def __init__(self): + def __init__(self) -> None: """ Create an `HtmlStash`. """ self.html_counter = 0 # for counting inline html segments - self.rawHtmlBlocks = [] + self.rawHtmlBlocks: list[str | etree.Element] = [] self.tag_counter = 0 - self.tag_data = [] # list of dictionaries in the order tags appear + self.tag_data: list[TagData] = [] # list of dictionaries in the order tags appear - def store(self, html: str) -> str: + def store(self, html: str | etree.Element) -> str: """ Saves an HTML segment for later reinsertion. Returns a placeholder string that needs to be inserted into the @@ -253,7 +265,7 @@ def reset(self) -> None: def get_placeholder(self, key: int) -> str: return HTML_PLACEHOLDER % key - def store_tag(self, tag: str, attrs: list, left_index: int, right_index: int) -> str: + def store_tag(self, tag: str, attrs: dict[str, str], left_index: int, right_index: int) -> str: """Store tag data and return a placeholder.""" self.tag_data.append({'tag': tag, 'attrs': attrs, 'left_index': left_index, @@ -399,7 +411,7 @@ def deregister(self, name: str, strict: bool = True) -> None: if strict: raise - def _sort(self): + def _sort(self) -> None: """ Sort the registry by priority from highest to lowest. From 5db9b6fd280d06c4bc17e17ac230183ac653c1df Mon Sep 17 00:00:00 2001 From: Oleh Prypin <oleh@pryp.in> Date: Wed, 1 Nov 2023 00:47:24 +0100 Subject: [PATCH 3/5] Add py.typed file --- markdown/py.typed | 0 pyproject.toml | 3 +++ 2 files changed, 3 insertions(+) create mode 100644 markdown/py.typed diff --git a/markdown/py.typed b/markdown/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/pyproject.toml b/pyproject.toml index c9fa250f..45eb2050 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,6 +95,9 @@ legacy_em = 'markdown.extensions.legacy_em:LegacyEmExtension' [tool.setuptools] packages = ['markdown', 'markdown.extensions'] +[tool.setuptools.package-data] +"markdown" = ["py.typed"] + [tool.setuptools.dynamic] version = {attr = 'markdown.__meta__.__version__'} From c9121d7e3d5ec7947db8a35415cb93dba82f792b Mon Sep 17 00:00:00 2001 From: Oleh Prypin <oleh@pryp.in> Date: Thu, 2 Nov 2023 17:39:01 +0100 Subject: [PATCH 4/5] Revert extension-only attributes --- markdown/core.py | 5 ----- markdown/extensions/meta.py | 4 ++-- markdown/extensions/toc.py | 8 ++++---- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/markdown/core.py b/markdown/core.py index 9056ec84..92b72911 100644 --- a/markdown/core.py +++ b/markdown/core.py @@ -36,7 +36,6 @@ if TYPE_CHECKING: # pragma: no cover from xml.etree.ElementTree import Element - from markdown.extensions.toc import TocToken __all__ = ['Markdown', 'markdown', 'markdownFromFile'] @@ -86,10 +85,6 @@ class Markdown: callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`. """ - toc_tokens: list[TocToken] - toc: str - Meta: dict[str, Any] - def __init__(self, **kwargs: Any): """ Creates a new Markdown instance. diff --git a/markdown/extensions/meta.py b/markdown/extensions/meta.py index 32b6c76c..e43698c2 100644 --- a/markdown/extensions/meta.py +++ b/markdown/extensions/meta.py @@ -49,7 +49,7 @@ def extendMarkdown(self, md: Markdown) -> None: md.preprocessors.register(MetaPreprocessor(md), 'meta', 27) def reset(self) -> None: - self.md.Meta = {} + self.md.Meta = {} # type: ignore[attr-defined] class MetaPreprocessor(Preprocessor): @@ -83,7 +83,7 @@ def run(self, lines: list[str]) -> list[str]: else: lines.insert(0, line) break # no meta data - done - self.md.Meta = meta + self.md.Meta = meta # type: ignore[attr-defined] return lines diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py index b34d6606..41838287 100644 --- a/markdown/extensions/toc.py +++ b/markdown/extensions/toc.py @@ -353,8 +353,8 @@ def run(self, doc: etree.Element) -> None: toc = self.md.serializer(div) for pp in self.md.postprocessors: toc = pp.run(toc) - self.md.toc_tokens = toc_tokens - self.md.toc = toc + self.md.toc_tokens = toc_tokens # type: ignore[attr-defined] + self.md.toc = toc # type: ignore[attr-defined] class TocExtension(Extension): @@ -422,8 +422,8 @@ def extendMarkdown(self, md: Markdown) -> None: md.treeprocessors.register(tocext, 'toc', 5) def reset(self) -> None: - self.md.toc = '' - self.md.toc_tokens = [] + self.md.toc = '' # type: ignore[attr-defined] + self.md.toc_tokens = [] # type: ignore[attr-defined] def makeExtension(**kwargs): # pragma: no cover From 70676a35ee3d8a45d9755221c87e0ef6d588e429 Mon Sep 17 00:00:00 2001 From: Oleh Prypin <oleh@pryp.in> Date: Thu, 2 Nov 2023 17:48:14 +0100 Subject: [PATCH 5/5] Fix --- markdown/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/markdown/util.py b/markdown/util.py index 75c2c10d..0741b6ea 100644 --- a/markdown/util.py +++ b/markdown/util.py @@ -341,11 +341,11 @@ def __iter__(self) -> Iterator[_T]: @overload def __getitem__(self, key: str | int) -> _T: # pragma: no cover - ... # pragma: no cover + ... @overload def __getitem__(self, key: slice) -> Registry[_T]: # pragma: no cover - ... # pragma: no cover + ... def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]: self._sort()