Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix all mypy typechecking errors, add a lot of type annotations #1399

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ jobs:
fail-fast: false
max-parallel: 4
matrix:
tox-env: [flake8, pep517check, checkspelling]
tox-env: [mypy, flake8, pep517check, checkspelling]

env:
TOXENV: ${{ matrix.tox-env }}
Expand Down
11 changes: 7 additions & 4 deletions markdown/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
import codecs
import warnings
import markdown
import logging
from logging import DEBUG, WARNING, CRITICAL
from typing import Any, Callable, IO, Mapping

yaml_load: Callable[[IO], Any]
try:
# We use `unsafe_load` because users may need to pass in actual Python
# objects. As this is only available from the CLI, the user has much
Expand All @@ -32,18 +37,16 @@
except ImportError: # pragma: no cover
try:
# Fall back to PyYAML <5.1
from yaml import load as yaml_load
from yaml import load as yaml_load # type: ignore
except ImportError:
# Fall back to JSON
from json import load as yaml_load

import logging
from logging import DEBUG, WARNING, CRITICAL

logger = logging.getLogger('MARKDOWN')


def parse_options(args=None, values=None):
def parse_options(args=None, values=None) -> tuple[Mapping[str, Any], bool]:
"""
Define and parse `optparse` options for command-line usage.
"""
Expand Down
14 changes: 8 additions & 6 deletions markdown/blockprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test(self, parent: etree.Element, block: str) -> bool:
parent: An `etree` element which will be the parent of the block.
block: A block of text from the source which has been split at blank lines.
"""
pass # pragma: no cover
raise NotImplementedError() # pragma: no cover

def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
""" Run processor. Must be overridden by subclasses.
Expand All @@ -147,7 +147,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
parent: An `etree` element which is the parent of the current block.
blocks: A list of all remaining blocks of the document.
"""
pass # pragma: no cover
raise NotImplementedError() # pragma: no cover


class ListIndentProcessor(BlockProcessor):
Expand All @@ -167,15 +167,15 @@ class ListIndentProcessor(BlockProcessor):
LIST_TYPES = ['ul', 'ol']
""" Types of lists this processor can operate on. """

def __init__(self, *args):
def __init__(self, *args) -> None:
super().__init__(*args)
self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length)

def test(self, parent: etree.Element, block: str) -> bool:
return block.startswith(' '*self.tab_length) and \
not self.parser.state.isstate('detabbed') and \
(parent.tag in self.ITEM_TYPES or
(len(parent) and parent[-1] is not None and
(len(parent) > 0 and parent[-1] is not None and
(parent[-1].tag in self.LIST_TYPES)))

def run(self, parent: etree.Element, blocks: list[str]) -> None:
Expand Down Expand Up @@ -417,7 +417,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> None:

def get_items(self, block: str) -> list[str]:
""" Break a block into list items. """
items = []
items: list[str] = []
for line in block.split('\n'):
m = self.CHILD_RE.match(line)
if m:
Expand All @@ -426,7 +426,9 @@ def get_items(self, block: str) -> list[str]:
if not items and self.TAG == 'ol':
# Detect the integer value of first list item
INTEGER_RE = re.compile(r'(\d+)')
self.STARTSWITH = INTEGER_RE.match(m.group(1)).group()
int_match = INTEGER_RE.match(m.group(1))
assert int_match is not None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what you are trying to accomplish here. Does this make it possible for Markdown content to cause an error? If so, that would be unacceptable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With lines 429-431 reverted:

$ mypy markdown
markdown/blockprocessors.py:429: error: Item "None" of "Match[str] | None" has no attribute "group"  [union-attr]
Found 1 error in 1 file (checked 33 source files)

  • The code before:

    self.STARTSWITH = RE.match(string).group()

    where RE.match(string) can be None, and None.group() is an error

  • The code after:

    int_match = RE.match(string)
    assert int_match is not None
    self.STARTSWITH = int_match.group()

The old code doesn't care to check for the None case where it would violently error with AttributeError. mypy doesn't let it slide and exposes a potential bug.

The new code directly checks for it and errors with a clearer message.

There is no change regarding which situations an error does or does not happen.

TBD: It is worth checking whether this bug case can actually happen (in current code already). In any case we should be happy that mypy flagged this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, now we know about a bug which we didn't know about before. That is good. But this is not the way to address it. Under no circumstances should source text cause Markdown to raise an error. In fact, that is one of the primary goals of the project as documented on the home page. Therefore, this is not the proper way to fix the bug. The error needs to be silenced and some reasonable text needs to be included in the output (depending on the conditions under which the issue arises).

self.STARTSWITH = int_match.group()
# Append to the list
items.append(m.group(3))
elif self.INDENT_RE.match(line):
Expand Down
38 changes: 21 additions & 17 deletions markdown/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class Markdown:
callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`.
"""

def __init__(self, **kwargs):
def __init__(self, **kwargs: Any):
"""
Creates a new Markdown instance.

Expand Down Expand Up @@ -183,7 +183,7 @@ def registerExtensions(
'Successfully loaded extension "%s.%s".'
% (ext.__class__.__module__, ext.__class__.__name__)
)
elif ext is not None:
elif ext is not None: # type: ignore[unreachable]
raise TypeError(
'Extension "{}.{}" must be of type: "{}.{}"'.format(
ext.__class__.__module__, ext.__class__.__name__,
Expand Down Expand Up @@ -417,11 +417,11 @@ def convertFile(
# Read the source
if input:
if isinstance(input, str):
input_file = codecs.open(input, mode="r", encoding=encoding)
with codecs.open(input, mode="r", encoding=encoding) as input_file:
text = input_file.read()
else:
input_file = codecs.getreader(encoding)(input)
text = input_file.read()
input_file.close()
with codecs.getreader(encoding)(input) as input_file:
text = input_file.read()
else:
text = sys.stdin.read()

Expand All @@ -440,13 +440,13 @@ def convertFile(
output_file.close()
else:
writer = codecs.getwriter(encoding)
output_file = writer(output, errors="xmlcharrefreplace")
output_file.write(html)
output_writer = writer(output, errors="xmlcharrefreplace")
output_writer.write(html)
Comment on lines -443 to +444
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why this change was made at all. Note that the Contributing Guide states:

Legacy code which does not follow the guidelines should only be updated if and when other changes (bug fix, feature addition, etc.) are being made to that section of code. While new features should be given names that follow modern Python naming conventions, existing names should be preserved to avoid backward incompatible changes.

I realize that in this instance the variable name is not exposed outside of this method so there is no concern over a backward incompatible change, but the general principle remains. Please, let's refrain from making unnecessary changes just for personal preference.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

100% of code body changes in this pull request are in response to error messages produced by mypy. Don't need to tell me this regarding unrelated changes because I'm always the first one to say this as well.

This particular change is because mypy doesn't like that these two differently-typed values share the same variable name.


With lines 425-429 reverted:

$ mypy markdown                                             
markdown/core.py:427: error: Incompatible types in assignment (expression has type "StreamReader", variable has type "StreamReaderWriter")  [assignment]
Found 1 error in 1 file (checked 33 source files)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I embrace Python not being strongly typed. If that means making changes like this, then no thank you.

# Don't close here. User may want to write more.
else:
# Encode manually and write bytes to stdout.
html = html.encode(encoding, "xmlcharrefreplace")
sys.stdout.buffer.write(html)
html_bytes = html.encode(encoding, "xmlcharrefreplace")
sys.stdout.buffer.write(html_bytes)

return self

Expand Down Expand Up @@ -482,7 +482,13 @@ def markdown(text: str, **kwargs: Any) -> str:
return md.convert(text)


def markdownFromFile(**kwargs: Any):
def markdownFromFile(
*,
input: str | BinaryIO | None = None,
output: str | BinaryIO | None = None,
encoding: str | None = None,
**kwargs: Any
) -> None:
Comment on lines -485 to +491
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't recall why the specific keyword parameters were removed some years ago, but why did you feel the need to add them back in? I'm not opposed to it if there is a good reason related to this scope of this PR, but I would like to here the reason.

However, I am more concerned about why you added position arguments (*)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function's signature doesn't change, I'm just formalizing the de-facto signature. The * is to start named-only parameters, not positional-only parameters. They were also named-only before.

"""
Read Markdown text from a file and write output to a file or a stream.

Expand All @@ -491,13 +497,11 @@ def markdownFromFile(**kwargs: Any):
[`convert`][markdown.Markdown.convert].

Keyword arguments:
input (str | BinaryIO): A file name or readable object.
output (str | BinaryIO): A file name or writable object.
encoding (str): Encoding of input and output.
input: A file name or readable object.
output: A file name or writable object.
encoding: Encoding of input and output.
**kwargs: Any arguments accepted by the `Markdown` class.

"""
md = Markdown(**kwargs)
md.convertFile(kwargs.get('input', None),
kwargs.get('output', None),
kwargs.get('encoding', None))
md.convertFile(input, output, encoding)
2 changes: 1 addition & 1 deletion markdown/extensions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class Extension:
if a default is not set for each option.
"""

def __init__(self, **kwargs):
def __init__(self, **kwargs) -> None:
""" Initiate Extension and set up configs. """
self.setConfigs(kwargs)

Expand Down
6 changes: 5 additions & 1 deletion markdown/extensions/abbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,16 @@
from ..util import AtomicString
import re
import xml.etree.ElementTree as etree
from typing import TYPE_CHECKING

if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown


class AbbrExtension(Extension):
""" Abbreviation Extension for Python-Markdown. """

def extendMarkdown(self, md):
def extendMarkdown(self, md: Markdown) -> None:
""" Insert `AbbrPreprocessor` before `ReferencePreprocessor`. """
md.parser.blockprocessors.register(AbbrPreprocessor(md.parser), 'abbr', 16)

Expand Down
10 changes: 6 additions & 4 deletions markdown/extensions/admonition.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@
from typing import TYPE_CHECKING

if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
from markdown import blockparser


class AdmonitionExtension(Extension):
""" Admonition extension for Python-Markdown. """

def extendMarkdown(self, md):
def extendMarkdown(self, md: Markdown) -> None:
""" Add Admonition to Markdown instance. """
md.registerExtension(self)

Expand All @@ -59,7 +60,7 @@ def __init__(self, parser: blockparser.BlockParser):
super().__init__(parser)

self.current_sibling: etree.Element | None = None
self.content_indention = 0
self.content_indent = 0

def parse_content(self, parent: etree.Element, block: str) -> tuple[etree.Element | None, str, str]:
"""Get sibling admonition.
Expand All @@ -74,11 +75,11 @@ def parse_content(self, parent: etree.Element, block: str) -> tuple[etree.Elemen

# We already acquired the block via test
if self.current_sibling is not None:
sibling = self.current_sibling
prev_sibling = self.current_sibling
block, the_rest = self.detab(block, self.content_indent)
self.current_sibling = None
self.content_indent = 0
return sibling, block, the_rest
return prev_sibling, block, the_rest
Comment on lines -77 to +82
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, we are making unnecessary out-of-scope changes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change was made in response to mypy errors. Again this is because mypy doesn't like that a variable name is reused with different types in different places. Which is really a limitation of mypy, most other type checkers would deal with this perfectly fine.


With lines 78-82 reverted:

$ mypy markdown
markdown/extensions/admonition.py:84: error: Incompatible types in assignment (expression has type "Element | None", variable has type "Element")  [assignment]
markdown/extensions/admonition.py:87: error: Incompatible types in assignment (expression has type "None", variable has type "Element")  [assignment]
markdown/extensions/admonition.py:101: error: Incompatible types in assignment (expression has type "Element | None", variable has type "Element")  [assignment]
markdown/extensions/admonition.py:113: error: Incompatible types in assignment (expression has type "None", variable has type "Element")  [assignment]


sibling = self.lastChild(parent)

Expand Down Expand Up @@ -147,6 +148,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> None:
p.text = title
p.set('class', self.CLASSNAME_TITLE)
else:
assert sibling is not None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this raise an error based on Markdown input? If so, this is unacceptable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code already has a problem where sibling might be None and sibling.tag will be an error. mypy correctly detects and reports it, and requires us to make an explicit assertion, which indicates that we either know that this will never actually happen, or perhaps don't care if it does but then the error will be more explicit.

TBD: It is worth checking whether this bug case can actually happen (in current code already). In any case we should be happy that mypy flagged this.


With line 151 reverted:

$ mypy markdown
markdown/extensions/admonition.py:152: error: Item "None" of "Element | None" has no attribute "tag"  [union-attr]
markdown/extensions/admonition.py:152: error: Item "None" of "Element | None" has no attribute "text"  [union-attr]
markdown/extensions/admonition.py:153: error: Item "None" of "Element | None" has no attribute "text"  [union-attr]
markdown/extensions/admonition.py:154: error: Item "None" of "Element | None" has no attribute "text"  [union-attr]
markdown/extensions/admonition.py:155: error: Argument 1 to "SubElement" has incompatible type "Element | None"; expected "Element"  [arg-type]
markdown/extensions/admonition.py:158: error: Incompatible types in assignment (expression has type "Element | None", variable has type "Element")  [assignment]

# Sibling is a list item, but we need to wrap it's content should be wrapped in <p>
if sibling.tag in ('li', 'dd') and sibling.text:
text = sibling.text
Expand Down
19 changes: 11 additions & 8 deletions markdown/extensions/attr_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

if TYPE_CHECKING: # pragma: no cover
from xml.etree.ElementTree import Element
from markdown import Markdown


def _handle_double_quote(s, t):
Expand All @@ -56,7 +57,7 @@ def _handle_word(s, t):
return t, t


_scanner = re.Scanner([
_scanner = re.Scanner([ # type: ignore[attr-defined]
(r'[^ =]+=".*?"', _handle_double_quote),
(r"[^ =]+='.*?'", _handle_single_quote),
(r'[^ =]+=[^ =]+', _handle_key_value),
Expand Down Expand Up @@ -86,6 +87,8 @@ class AttrListTreeprocessor(Treeprocessor):
r'\uf900-\ufdcf\ufdf0-\ufffd'
r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')

md: Markdown

def run(self, doc: Element) -> None:
for elem in doc.iter():
if self.md.is_block_level(elem.tag):
Expand All @@ -102,18 +105,18 @@ def run(self, doc: Element) -> None:
if child.tag in ['ul', 'ol']:
pos = i
break
if pos is None and elem[-1].tail:
if pos is None and (tail := elem[-1].tail):
# use tail of last child. no `ul` or `ol`.
m = RE.search(elem[-1].tail)
m = RE.search(tail)
if m:
self.assign_attrs(elem, m.group(1))
elem[-1].tail = elem[-1].tail[:m.start()]
elif pos is not None and pos > 0 and elem[pos-1].tail:
elem[-1].tail = tail[:m.start()]
elif pos is not None and pos > 0 and (tail := elem[pos-1].tail):
# use tail of last child before `ul` or `ol`
m = RE.search(elem[pos-1].tail)
m = RE.search(tail)
if m:
self.assign_attrs(elem, m.group(1))
elem[pos-1].tail = elem[pos-1].tail[:m.start()]
elem[pos-1].tail = tail[:m.start()]
elif elem.text:
# use text. `ul` is first child.
m = RE.search(elem.text)
Expand Down Expand Up @@ -170,7 +173,7 @@ def sanitize_name(self, name: str) -> str:

class AttrListExtension(Extension):
""" Attribute List extension for Python-Markdown """
def extendMarkdown(self, md):
def extendMarkdown(self, md: Markdown) -> None:
md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
md.registerExtension(self)

Expand Down
14 changes: 9 additions & 5 deletions markdown/extensions/codehilite.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from typing import TYPE_CHECKING, Callable, Any

if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
import xml.etree.ElementTree as etree

try: # pragma: no cover
Expand Down Expand Up @@ -150,7 +151,7 @@ def hilite(self, shebang: bool = True) -> str:

if pygments and self.use_pygments:
try:
lexer = get_lexer_by_name(self.lang, **self.options)
lexer = get_lexer_by_name(self.lang or '', **self.options)
except ValueError:
try:
if self.guess_lang:
Expand All @@ -161,7 +162,7 @@ def hilite(self, shebang: bool = True) -> str:
lexer = get_lexer_by_name('text', **self.options)
if not self.lang:
# Use the guessed lexer's language instead
self.lang = lexer.aliases[0]
self.lang = lexer.aliases[0] # type: ignore[attr-defined]
lang_str = f'{self.lang_prefix}{self.lang}'
if isinstance(self.pygments_formatter, str):
try:
Expand Down Expand Up @@ -254,6 +255,7 @@ class HiliteTreeprocessor(Treeprocessor):
""" Highlight source code in code blocks. """

config: dict[str, Any]
md: Markdown

def code_unescape(self, text: str) -> str:
"""Unescape code."""
Expand All @@ -270,8 +272,10 @@ def run(self, root: etree.Element) -> None:
for block in blocks:
if len(block) == 1 and block[0].tag == 'code':
local_config = self.config.copy()
text = block[0].text
assert text is not None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another assert....

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another place where the code currently assumes that something is present there and violently errors if there's no match. mypy doesn't let it slide.

TBD: It is worth checking whether this bug case can actually happen (in current code already). In any case we should be happy that mypy flagged this.


With lines 275-278 reverted:

$ mypy markdown
markdown/extensions/codehilite.py:276: error: Argument 1 to "code_unescape" of "HiliteTreeprocessor" has incompatible type "str | None"; expected "str"  [arg-type]
Found 1 error in 1 file (checked 33 source files)

code = CodeHilite(
self.code_unescape(block[0].text),
self.code_unescape(text),
tab_length=self.md.tab_length,
style=local_config.pop('pygments_style', 'default'),
**local_config
Expand All @@ -288,7 +292,7 @@ def run(self, root: etree.Element) -> None:
class CodeHiliteExtension(Extension):
""" Add source code highlighting to markdown code blocks. """

def __init__(self, **kwargs):
def __init__(self, **kwargs) -> None:
# define default configs
self.config = {
'linenums': [
Expand Down Expand Up @@ -331,7 +335,7 @@ def __init__(self, **kwargs):
pass # Assume it's not a boolean value. Use as-is.
self.config[key] = [value, '']

def extendMarkdown(self, md):
def extendMarkdown(self, md: Markdown) -> None:
""" Add `HilitePostprocessor` to Markdown instance. """
hiliter = HiliteTreeprocessor(md)
hiliter.config = self.getConfigs()
Expand Down
Loading
Loading