Add type annotations to the project and use mypy (#2588)

Provide type annotations and use mypy for static type checking. Type checkers help ensure that the project is using variables and functions in the code correctly. With mypy, CI will warn when those types are used incorrectly. The mypy project and docs: https://github.com/python/mypy https://mypy.readthedocs.io/en/stable/index.html
codespell-project · Nov 7, 2022 · 8d0d82b · 8d0d82b
1 parent 2042e65
commit 8d0d82b
Show file tree

Hide file tree

Showing 7 changed files with 386 additions and 130 deletions.
diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml
@@ -0,0 +1,22 @@
+name: mypy
+
+on:
+  - push
+  - pull_request
+
+jobs:
+  mypy:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Setup python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.7'
+
+      - name: Install dependencies
+        run: pip install -e .[types]
+
+      - name: Run mypy
+        run: mypy .
diff --git a/codespell_lib/__init__.py b/codespell_lib/__init__.py
@@ -1,2 +1,4 @@
-from ._codespell import _script_main, main  # noqa
-from ._version import __version__  # noqa
+from ._codespell import _script_main, main
+from ._version import __version__
+
+__all__ = ["_script_main", "main", "__version__"]
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
@@ -23,6 +23,7 @@
 import re
 import sys
 import textwrap
+from typing import Dict, List, Optional, Pattern, Sequence, Set, Tuple
 
 # autogenerated by setuptools_scm
 from ._version import __version__ as VERSION
@@ -91,52 +92,53 @@ class QuietLevels:
 
 
 class GlobMatch:
-    def __init__(self, pattern):
+    def __init__(self, pattern: Optional[str]) -> None:
+        self.pattern_list: Optional[List[str]]
         if pattern:
             # Pattern might be a list of comma-delimited strings
             self.pattern_list = ','.join(pattern).split(',')
         else:
             self.pattern_list = None
 
-    def match(self, filename):
+    def match(self, filename: str) -> bool:
         if self.pattern_list is None:
             return False
 
         return any(fnmatch.fnmatch(filename, p) for p in self.pattern_list)
 
 
 class Misspelling:
-    def __init__(self, data, fix, reason):
+    def __init__(self, data: str, fix: bool, reason: str) -> None:
         self.data = data
         self.fix = fix
         self.reason = reason
 
 
 class TermColors:
-    def __init__(self):
+    def __init__(self) -> None:
         self.FILE = '\033[33m'
         self.WWORD = '\033[31m'
         self.FWORD = '\033[32m'
         self.DISABLE = '\033[0m'
 
-    def disable(self):
+    def disable(self) -> None:
         self.FILE = ''
         self.WWORD = ''
         self.FWORD = ''
         self.DISABLE = ''
 
 
 class Summary:
-    def __init__(self):
-        self.summary = {}
+    def __init__(self) -> None:
+        self.summary: Dict[str, int] = {}
 
-    def update(self, wrongword):
+    def update(self, wrongword: str) -> None:
         if wrongword in self.summary:
             self.summary[wrongword] += 1
         else:
             self.summary[wrongword] = 1
 
-    def __str__(self):
+    def __str__(self) -> str:
         keys = list(self.summary.keys())
         keys.sort()
 
@@ -147,13 +149,13 @@ def __str__(self):
 
 
 class FileOpener:
-    def __init__(self, use_chardet, quiet_level):
+    def __init__(self, use_chardet: bool, quiet_level: int) -> None:
         self.use_chardet = use_chardet
         if use_chardet:
             self.init_chardet()
         self.quiet_level = quiet_level
 
-    def init_chardet(self):
+    def init_chardet(self) -> None:
         try:
             from chardet.universaldetector import UniversalDetector
         except ImportError:
@@ -163,21 +165,22 @@ def init_chardet(self):
 
         self.encdetector = UniversalDetector()
 
-    def open(self, filename):
+    def open(self, filename: str) -> Tuple[List[str], str]:
         if self.use_chardet:
             return self.open_with_chardet(filename)
         else:
             return self.open_with_internal(filename)
 
-    def open_with_chardet(self, filename):
+    def open_with_chardet(self, filename: str) -> Tuple[List[str], str]:
         self.encdetector.reset()
-        with open(filename, 'rb') as f:
-            for line in f:
+        with open(filename, 'rb') as fb:
+            for line in fb:
                 self.encdetector.feed(line)
                 if self.encdetector.done:
                     break
         self.encdetector.close()
         encoding = self.encdetector.result['encoding']
+        assert encoding is not None
 
         try:
             f = open(filename, encoding=encoding, newline='')
@@ -195,7 +198,7 @@ def open_with_chardet(self, filename):
 
         return lines, encoding
 
-    def open_with_internal(self, filename):
+    def open_with_internal(self, filename: str) -> Tuple[List[str], str]:
         encoding = None
         first_try = True
         for encoding in encodings:
@@ -228,7 +231,7 @@ def open_with_internal(self, filename):
 class NewlineHelpFormatter(argparse.HelpFormatter):
     """Help formatter that preserves newlines and deals with lists."""
 
-    def _split_lines(self, text, width):
+    def _split_lines(self, text: str, width: int) -> List[str]:
         parts = text.split('\n')
         out = []
         for part in parts:
@@ -248,7 +251,9 @@ def _split_lines(self, text, width):
         return out
 
 
-def parse_options(args):
+def parse_options(
+    args: Sequence[str]
+) -> Tuple[argparse.Namespace, argparse.ArgumentParser, List[str]]:
     parser = argparse.ArgumentParser(formatter_class=NewlineHelpFormatter)
 
     parser.set_defaults(colors=sys.stdout.isatty())
@@ -452,7 +457,7 @@ def parse_options(args):
     return options, parser, used_cfg_files
 
 
-def parse_ignore_words_option(ignore_words_option):
+def parse_ignore_words_option(ignore_words_option: List[str]) -> Set[str]:
     ignore_words = set()
     if ignore_words_option:
         for comma_separated_words in ignore_words_option:
@@ -461,19 +466,23 @@ def parse_ignore_words_option(ignore_words_option):
     return ignore_words
 
 
-def build_exclude_hashes(filename, exclude_lines):
+def build_exclude_hashes(filename: str, exclude_lines: Set[str]) -> None:
     with open(filename, encoding='utf-8') as f:
         for line in f:
             exclude_lines.add(line)
 
 
-def build_ignore_words(filename, ignore_words):
+def build_ignore_words(filename: str, ignore_words: Set[str]) -> None:
     with open(filename, encoding='utf-8') as f:
         for line in f:
             ignore_words.add(line.strip())
 
 
-def build_dict(filename, misspellings, ignore_words):
+def build_dict(
+    filename: str,
+    misspellings: Dict[str, Misspelling],
+    ignore_words: Set[str],
+) -> None:
     with open(filename, encoding='utf-8') as f:
         for line in f:
             [key, data] = line.split('->')
@@ -501,20 +510,20 @@ def build_dict(filename, misspellings, ignore_words):
             misspellings[key] = Misspelling(data, fix, reason)
 
 
-def is_hidden(filename, check_hidden):
+def is_hidden(filename: str, check_hidden: bool) -> bool:
     bfilename = os.path.basename(filename)
 
     return bfilename not in ('', '.', '..') and \
         (not check_hidden and bfilename[0] == '.')
 
 
-def is_text_file(filename):
+def is_text_file(filename: str) -> bool:
     with open(filename, mode='rb') as f:
         s = f.read(1024)
     return b'\x00' not in s
 
 
-def fix_case(word, fixword):
+def fix_case(word: str, fixword: str) -> str:
     if word == word.capitalize():
         return ', '.join(w.strip().capitalize() for w in fixword.split(','))
     elif word == word.upper():
@@ -524,7 +533,12 @@ def fix_case(word, fixword):
     return fixword
 
 
-def ask_for_word_fix(line, wrongword, misspelling, interactivity):
+def ask_for_word_fix(
+    line: str,
+    wrongword: str,
+    misspelling: Misspelling,
+    interactivity: int,
+) -> Tuple[bool, str]:
     if interactivity <= 0:
         return misspelling.fix, fix_case(wrongword, misspelling.data)
 
@@ -562,8 +576,8 @@ def ask_for_word_fix(line, wrongword, misspelling, interactivity):
                 break
 
             try:
-                n = int(n)
-                r = opt[n]
+                i = int(n)
+                r = opt[i]
             except (ValueError, IndexError):
                 print("Not a valid option\n")
 
@@ -574,21 +588,35 @@ def ask_for_word_fix(line, wrongword, misspelling, interactivity):
     return misspelling.fix, fix_case(wrongword, misspelling.data)
 
 
-def print_context(lines, index, context):
+def print_context(
+    lines: List[str],
+    index: int,
+    context: Tuple[int, int],
+) -> None:
     # context = (context_before, context_after)
     for i in range(index - context[0], index + context[1] + 1):
         if 0 <= i < len(lines):
             print('%s %s' % ('>' if i == index else ':', lines[i].rstrip()))
 
 
-def extract_words(text, word_regex, ignore_word_regex):
+def extract_words(
+    text: str,
+    word_regex: Pattern[str],
+    ignore_word_regex: Optional[Pattern[str]],
+) -> List[str]:
     if ignore_word_regex:
         text = ignore_word_regex.sub(' ', text)
     return word_regex.findall(text)
 
 
-def apply_uri_ignore_words(check_words, line, word_regex, ignore_word_regex,
-                           uri_regex, uri_ignore_words):
+def apply_uri_ignore_words(
+    check_words: List[str],
+    line: str,
+    word_regex: Pattern[str],
+    ignore_word_regex: Optional[Pattern[str]],
+    uri_regex: Pattern[str],
+    uri_ignore_words: Set[str]
+) -> None:
     if not uri_ignore_words:
         return
     for uri in re.findall(uri_regex, line):
@@ -598,9 +626,20 @@ def apply_uri_ignore_words(check_words, line, word_regex, ignore_word_regex,
                 check_words.remove(uri_word)
 
 
-def parse_file(filename, colors, summary, misspellings, exclude_lines,
-               file_opener, word_regex, ignore_word_regex, uri_regex,
-               uri_ignore_words, context, options):
+def parse_file(
+    filename: str,
+    colors: TermColors,
+    summary: Optional[Summary],
+    misspellings: Dict[str, Misspelling],
+    exclude_lines: Set[str],
+    file_opener: FileOpener,
+    word_regex: Pattern[str],
+    ignore_word_regex: Optional[Pattern[str]],
+    uri_regex: Pattern[str],
+    uri_ignore_words: Set[str],
+    context: Optional[Tuple[int, int]],
+    options: argparse.Namespace,
+) -> int:
     bad_count = 0
     lines = None
     changed = False
@@ -770,12 +809,12 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines,
     return bad_count
 
 
-def _script_main():
+def _script_main() -> int:
     """Wrap to main() for setuptools."""
     return main(*sys.argv[1:])
 
 
-def main(*args):
+def main(*args: str) -> int:
     """Contains flow control"""
     options, parser, used_cfg_files = parse_options(args)
 
@@ -858,7 +897,7 @@ def main(*args):
                 parser.print_help()
                 return EX_USAGE
             use_dictionaries.append(dictionary)
-    misspellings = {}
+    misspellings: Dict[str, Misspelling] = {}
     for dictionary in use_dictionaries:
         build_dict(dictionary, misspellings, ignore_words)
     colors = TermColors()
@@ -891,7 +930,7 @@ def main(*args):
             context_after = max(0, options.after_context)
         context = (context_before, context_after)
 
-    exclude_lines = set()
+    exclude_lines: Set[str] = set()
     if options.exclude_file:
         build_exclude_hashes(options.exclude_file, exclude_lines)
 

diff --git a/codespell_lib/py.typed b/codespell_lib/py.typed