diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml new file mode 100644 index 00000000000..7091279815e --- /dev/null +++ b/.github/workflows/black.yml @@ -0,0 +1,12 @@ +name: black + +on: + - push + - pull_request + +jobs: + black: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: psf/black@stable diff --git a/codespell_lib/__main__.py b/codespell_lib/__main__.py index 2d8f4b629f8..bbadb84c5b6 100644 --- a/codespell_lib/__main__.py +++ b/codespell_lib/__main__.py @@ -1,4 +1,4 @@ from ._codespell import _script_main -if __name__ == '__main__': +if __name__ == "__main__": _script_main() diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index 3cabd2f3042..8811041b34f 100644 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -31,41 +31,84 @@ # While we want to treat characters like ( or " as okay for a starting break, # these may occur unescaped in URIs, and so we are more restrictive on the # endpoint. Emails are more restrictive, so the endpoint remains flexible. -uri_regex_def = ("(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|" - "\\b[\\w.%+-]+@[\\w.-]+\\b)") -encodings = ('utf-8', 'iso-8859-1') +uri_regex_def = ( + "(\\b(?:https?|[ts]?ftp|file|git|smb)://[^\\s]+(?=$|\\s)|" + "\\b[\\w.%+-]+@[\\w.-]+\\b)" +) +encodings = ("utf-8", "iso-8859-1") USAGE = """ \t%prog [OPTIONS] [file1 file2 ... fileN] """ -supported_languages_en = ('en', 'en_GB', 'en_US', 'en_CA', 'en_AU') +supported_languages_en = ("en", "en_GB", "en_US", "en_CA", "en_AU") supported_languages = supported_languages_en # Users might want to link this file into /usr/local/bin, so we resolve the # symbolic link path to the real path if necessary. -_data_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data') +_data_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data") _builtin_dictionaries = ( # name, desc, name, err in aspell, correction in aspell, \ # err dictionary array, rep dictionary array # The arrays must contain the names of aspell dictionaries # The aspell tests here aren't the ideal state, but the None's are # realistic for obscure words - ('clear', 'for unambiguous errors', '', - False, None, supported_languages_en, None), - ('rare', 'for rare (but valid) words that are likely to be errors', '_rare', # noqa: E501 - None, None, None, None), - ('informal', 'for making informal words more formal', '_informal', - True, True, supported_languages_en, supported_languages_en), - ('usage', 'for replacing phrasing with recommended terms', '_usage', - None, None, None, None), - ('code', 'for words from code and/or mathematics that are likely to be typos in other contexts (such as uint)', '_code', # noqa: E501 - None, None, None, None,), - ('names', 'for valid proper names that might be typos', '_names', - None, None, None, None,), - ('en-GB_to_en-US', 'for corrections from en-GB to en-US', '_en-GB_to_en-US', # noqa: E501 - True, True, ('en_GB',), ('en_US',)), + ("clear", "for unambiguous errors", "", False, None, supported_languages_en, None), + ( + "rare", + "for rare (but valid) words that are likely to be errors", + "_rare", + None, + None, + None, + None, + ), + ( + "informal", + "for making informal words more formal", + "_informal", + True, + True, + supported_languages_en, + supported_languages_en, + ), + ( + "usage", + "for replacing phrasing with recommended terms", + "_usage", + None, + None, + None, + None, + ), + ( + "code", + "for words from code and/or mathematics that are likely to be typos in other contexts (such as uint)", # noqa: E501 + "_code", + None, + None, + None, + None, + ), + ( + "names", + "for valid proper names that might be typos", + "_names", + None, + None, + None, + None, + ), + ( + "en-GB_to_en-US", + "for corrections from en-GB to en-US", + "_en-GB_to_en-US", + True, + True, + ("en_GB",), + ("en_US",), + ), ) -_builtin_default = 'clear,rare' +_builtin_default = "clear,rare" # docs say os.EX_USAGE et al. are only available on Unix systems, so to be safe # we protect and just use the values they are on macOS and Linux @@ -94,7 +137,7 @@ class GlobMatch: def __init__(self, pattern): if pattern: # Pattern might be a list of comma-delimited strings - self.pattern_list = ','.join(pattern).split(',') + self.pattern_list = ",".join(pattern).split(",") else: self.pattern_list = None @@ -114,16 +157,16 @@ def __init__(self, data, fix, reason): class TermColors: def __init__(self): - self.FILE = '\033[33m' - self.WWORD = '\033[31m' - self.FWORD = '\033[32m' - self.DISABLE = '\033[0m' + self.FILE = "\033[33m" + self.WWORD = "\033[31m" + self.FWORD = "\033[32m" + self.DISABLE = "\033[0m" def disable(self): - self.FILE = '' - self.WWORD = '' - self.FWORD = '' - self.DISABLE = '' + self.FILE = "" + self.WWORD = "" + self.FWORD = "" + self.DISABLE = "" class Summary: @@ -140,10 +183,12 @@ def __str__(self): keys = list(self.summary.keys()) keys.sort() - return "\n".join(["{0}{1:{width}}".format( - key, - self.summary.get(key), - width=15 - len(key)) for key in keys]) + return "\n".join( + [ + "{0}{1:{width}}".format(key, self.summary.get(key), width=15 - len(key)) + for key in keys + ] + ) class FileOpener: @@ -157,9 +202,11 @@ def init_chardet(self): try: from chardet.universaldetector import UniversalDetector except ImportError: - raise ImportError("There's no chardet installed to import from. " - "Please, install it and check your PYTHONPATH " - "environment variable") + raise ImportError( + "There's no chardet installed to import from. " + "Please, install it and check your PYTHONPATH " + "environment variable" + ) self.encdetector = UniversalDetector() @@ -171,23 +218,28 @@ def open(self, filename): def open_with_chardet(self, filename): self.encdetector.reset() - with open(filename, 'rb') as f: + with open(filename, "rb") as f: for line in f: self.encdetector.feed(line) if self.encdetector.done: break self.encdetector.close() - encoding = self.encdetector.result['encoding'] + encoding = self.encdetector.result["encoding"] try: - f = open(filename, encoding=encoding, newline='') + f = open(filename, encoding=encoding, newline="") except UnicodeDecodeError: - print("ERROR: Could not detect encoding: %s" % filename, - file=sys.stderr) + print("ERROR: Could not detect encoding: %s" % filename, file=sys.stderr) raise except LookupError: - print("ERROR: Don't know how to handle encoding %s: %s" - % (encoding, filename,), file=sys.stderr) + print( + "ERROR: Don't know how to handle encoding %s: %s" + % ( + encoding, + filename, + ), + file=sys.stderr, + ) raise else: lines = f.readlines() @@ -202,46 +254,53 @@ def open_with_internal(self, filename): if first_try: first_try = False elif not self.quiet_level & QuietLevels.ENCODING: - print("WARNING: Trying next encoding %s" - % encoding, file=sys.stderr) - with open(filename, encoding=encoding, newline='') as f: + print("WARNING: Trying next encoding %s" % encoding, file=sys.stderr) + with open(filename, encoding=encoding, newline="") as f: try: lines = f.readlines() except UnicodeDecodeError: if not self.quiet_level & QuietLevels.ENCODING: - print("WARNING: Decoding file using encoding=%s " - "failed: %s" % (encoding, filename,), - file=sys.stderr) + print( + "WARNING: Decoding file using encoding=%s " + "failed: %s" + % ( + encoding, + filename, + ), + file=sys.stderr, + ) else: break else: - raise Exception('Unknown encoding') + raise Exception("Unknown encoding") return lines, encoding + # -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:- # If someday this breaks, we can just switch to using RawTextHelpFormatter, # but it has the disadvantage of not wrapping our long lines. + class NewlineHelpFormatter(argparse.HelpFormatter): """Help formatter that preserves newlines and deals with lists.""" def _split_lines(self, text, width): - parts = text.split('\n') + parts = text.split("\n") out = [] for part in parts: # Eventually we could allow others... - indent_start = '- ' + indent_start = "- " if part.startswith(indent_start): offset = len(indent_start) else: offset = 0 part = part[offset:] - part = self._whitespace_matcher.sub(' ', part).strip() + part = self._whitespace_matcher.sub(" ", part).strip() parts = textwrap.wrap(part, width - offset) - parts = [' ' * offset + p for p in parts] + parts = [" " * offset + p for p in parts] if offset: parts[0] = indent_start + parts[0][offset:] out.extend(parts) @@ -252,157 +311,243 @@ def parse_options(args): parser = argparse.ArgumentParser(formatter_class=NewlineHelpFormatter) parser.set_defaults(colors=sys.stdout.isatty()) - parser.add_argument('--version', action='version', version=VERSION) - - parser.add_argument('-d', '--disable-colors', - action='store_false', dest='colors', - help='disable colors, even when printing to terminal ' - '(always set for Windows)') - parser.add_argument('-c', '--enable-colors', - action='store_true', dest='colors', - help='enable colors, even when not printing to ' - 'terminal') - - parser.add_argument('-w', '--write-changes', - action='store_true', default=False, - help='write changes in place if possible') - - parser.add_argument('-D', '--dictionary', - action='append', - help='custom dictionary file that contains spelling ' - 'corrections. If this flag is not specified or ' - 'equals "-" then the default dictionary is used. ' - 'This option can be specified multiple times.') - builtin_opts = '\n- '.join([''] + [ - '%r %s' % (d[0], d[1]) for d in _builtin_dictionaries]) - parser.add_argument('--builtin', - dest='builtin', default=_builtin_default, - metavar='BUILTIN-LIST', - help='comma-separated list of builtin dictionaries ' - 'to include (when "-D -" or no "-D" is passed). ' - 'Current options are:' + builtin_opts + '\n' - 'The default is %(default)r.') - parser.add_argument('--ignore-regex', - action='store', type=str, - help='regular expression that is used to find ' - 'patterns to ignore by treating as whitespace. ' - 'When writing regular expressions, consider ' - 'ensuring there are boundary non-word chars, ' - 'e.g., "\\bmatch\\b". Defaults to ' - 'empty/disabled.') - parser.add_argument('-I', '--ignore-words', - action='append', metavar='FILE', - help='file that contains words that will be ignored ' - 'by codespell. File must contain 1 word per line.' - ' Words are case sensitive based on how they are ' - 'written in the dictionary file') - parser.add_argument('-L', '--ignore-words-list', - action='append', metavar='WORDS', - help='comma separated list of words to be ignored ' - 'by codespell. Words are case sensitive based on ' - 'how they are written in the dictionary file') - parser.add_argument('--uri-ignore-words-list', - action='append', metavar='WORDS', - help='comma separated list of words to be ignored ' - 'by codespell in URIs and emails only. Words are ' - 'case sensitive based on how they are written in ' - 'the dictionary file. If set to "*", all ' - 'misspelling in URIs and emails will be ignored.') - parser.add_argument('-r', '--regex', - action='store', type=str, - help='regular expression that is used to find words. ' - 'By default any alphanumeric character, the ' - 'underscore, the hyphen, and the apostrophe is ' - 'used to build words. This option cannot be ' - 'specified together with --write-changes.') - parser.add_argument('--uri-regex', - action='store', type=str, - help='regular expression that is used to find URIs ' - 'and emails. A default expression is provided.') - parser.add_argument('-s', '--summary', - action='store_true', default=False, - help='print summary of fixes') - - parser.add_argument('--count', - action='store_true', default=False, - help='print the number of errors as the last line of ' - 'stderr') - - parser.add_argument('-S', '--skip', - action='append', - help='comma-separated list of files to skip. It ' - 'accepts globs as well. E.g.: if you want ' - 'codespell to skip .eps and .txt files, ' - 'you\'d give "*.eps,*.txt" to this option.') - - parser.add_argument('-x', '--exclude-file', type=str, metavar='FILE', - help='ignore whole lines that match those ' - 'in the file FILE. The lines in FILE ' - 'should match the to-be-excluded lines exactly') - - parser.add_argument('-i', '--interactive', - action='store', type=int, default=0, - help='set interactive mode when writing changes:\n' - '- 0: no interactivity.\n' - '- 1: ask for confirmation.\n' - '- 2: ask user to choose one fix when more than one is available.\n' # noqa: E501 - '- 3: both 1 and 2') - - parser.add_argument('-q', '--quiet-level', - action='store', type=int, default=2, - help='bitmask that allows suppressing messages:\n' - '- 0: print all messages.\n' - '- 1: disable warnings about wrong encoding.\n' - '- 2: disable warnings about binary files.\n' - '- 4: omit warnings about automatic fixes that were disabled in the dictionary.\n' # noqa: E501 - '- 8: don\'t print anything for non-automatic fixes.\n' # noqa: E501 - '- 16: don\'t print the list of fixed files.\n' - 'As usual with bitmasks, these levels can be ' - 'combined; e.g. use 3 for levels 1+2, 7 for ' - '1+2+4, 23 for 1+2+4+16, etc. ' - 'The default mask is %(default)s.') - - parser.add_argument('-e', '--hard-encoding-detection', - action='store_true', default=False, - help='use chardet to detect the encoding of each ' - 'file. This can slow down codespell, but is more ' - 'reliable in detecting encodings other than ' - 'utf-8, iso8859-1, and ascii.') - - parser.add_argument('-f', '--check-filenames', - action='store_true', default=False, - help='check file names as well') - - parser.add_argument('-H', '--check-hidden', - action='store_true', default=False, - help='check hidden files and directories (those ' - 'starting with ".") as well.') - parser.add_argument('-A', '--after-context', type=int, metavar='LINES', - help='print LINES of trailing context') - parser.add_argument('-B', '--before-context', type=int, metavar='LINES', - help='print LINES of leading context') - parser.add_argument('-C', '--context', type=int, metavar='LINES', - help='print LINES of surrounding context') - parser.add_argument('--config', type=str, - help='path to config file.') - parser.add_argument('--toml', type=str, - help='path to a pyproject.toml file.') - parser.add_argument('files', nargs='*', - help='files or directories to check') + parser.add_argument("--version", action="version", version=VERSION) + + parser.add_argument( + "-d", + "--disable-colors", + action="store_false", + dest="colors", + help="disable colors, even when printing to terminal " + "(always set for Windows)", + ) + parser.add_argument( + "-c", + "--enable-colors", + action="store_true", + dest="colors", + help="enable colors, even when not printing to terminal", + ) + + parser.add_argument( + "-w", + "--write-changes", + action="store_true", + default=False, + help="write changes in place if possible", + ) + + parser.add_argument( + "-D", + "--dictionary", + action="append", + help="custom dictionary file that contains spelling " + "corrections. If this flag is not specified or " + 'equals "-" then the default dictionary is used. ' + "This option can be specified multiple times.", + ) + builtin_opts = "\n- ".join( + [""] + ["%r %s" % (d[0], d[1]) for d in _builtin_dictionaries] + ) + parser.add_argument( + "--builtin", + dest="builtin", + default=_builtin_default, + metavar="BUILTIN-LIST", + help="comma-separated list of builtin dictionaries " + 'to include (when "-D -" or no "-D" is passed). ' + "Current options are:" + builtin_opts + "\n" + "The default is %(default)r.", + ) + parser.add_argument( + "--ignore-regex", + action="store", + type=str, + help="regular expression that is used to find " + "patterns to ignore by treating as whitespace. " + "When writing regular expressions, consider " + "ensuring there are boundary non-word chars, " + 'e.g., "\\bmatch\\b". Defaults to ' + "empty/disabled.", + ) + parser.add_argument( + "-I", + "--ignore-words", + action="append", + metavar="FILE", + help="file that contains words that will be ignored " + "by codespell. File must contain 1 word per line." + " Words are case sensitive based on how they are " + "written in the dictionary file", + ) + parser.add_argument( + "-L", + "--ignore-words-list", + action="append", + metavar="WORDS", + help="comma separated list of words to be ignored " + "by codespell. Words are case sensitive based on " + "how they are written in the dictionary file", + ) + parser.add_argument( + "--uri-ignore-words-list", + action="append", + metavar="WORDS", + help="comma separated list of words to be ignored " + "by codespell in URIs and emails only. Words are " + "case sensitive based on how they are written in " + 'the dictionary file. If set to "*", all ' + "misspelling in URIs and emails will be ignored.", + ) + parser.add_argument( + "-r", + "--regex", + action="store", + type=str, + help="regular expression that is used to find words. " + "By default any alphanumeric character, the " + "underscore, the hyphen, and the apostrophe is " + "used to build words. This option cannot be " + "specified together with --write-changes.", + ) + parser.add_argument( + "--uri-regex", + action="store", + type=str, + help="regular expression that is used to find URIs " + "and emails. A default expression is provided.", + ) + parser.add_argument( + "-s", + "--summary", + action="store_true", + default=False, + help="print summary of fixes", + ) + + parser.add_argument( + "--count", + action="store_true", + default=False, + help="print the number of errors as the last line of stderr", + ) + + parser.add_argument( + "-S", + "--skip", + action="append", + help="comma-separated list of files to skip. It " + "accepts globs as well. E.g.: if you want " + "codespell to skip .eps and .txt files, " + 'you\'d give "*.eps,*.txt" to this option.', + ) + + parser.add_argument( + "-x", + "--exclude-file", + type=str, + metavar="FILE", + help="ignore whole lines that match those " + "in the file FILE. The lines in FILE " + "should match the to-be-excluded lines exactly", + ) + + parser.add_argument( + "-i", + "--interactive", + action="store", + type=int, + default=0, + help="set interactive mode when writing changes:\n" + "- 0: no interactivity.\n" + "- 1: ask for confirmation.\n" + "- 2: ask user to choose one fix when more than one is available.\n" + "- 3: both 1 and 2", + ) + + parser.add_argument( + "-q", + "--quiet-level", + action="store", + type=int, + default=2, + help="bitmask that allows suppressing messages:\n" + "- 0: print all messages.\n" + "- 1: disable warnings about wrong encoding.\n" + "- 2: disable warnings about binary files.\n" + "- 4: omit warnings about automatic fixes that were disabled in the dictionary.\n" # noqa: E501 + "- 8: don't print anything for non-automatic fixes.\n" + "- 16: don't print the list of fixed files.\n" + "As usual with bitmasks, these levels can be " + "combined; e.g. use 3 for levels 1+2, 7 for " + "1+2+4, 23 for 1+2+4+16, etc. " + "The default mask is %(default)s.", + ) + + parser.add_argument( + "-e", + "--hard-encoding-detection", + action="store_true", + default=False, + help="use chardet to detect the encoding of each " + "file. This can slow down codespell, but is more " + "reliable in detecting encodings other than " + "utf-8, iso8859-1, and ascii.", + ) + + parser.add_argument( + "-f", + "--check-filenames", + action="store_true", + default=False, + help="check file names as well", + ) + + parser.add_argument( + "-H", + "--check-hidden", + action="store_true", + default=False, + help="check hidden files and directories (those " 'starting with ".") as well.', + ) + parser.add_argument( + "-A", + "--after-context", + type=int, + metavar="LINES", + help="print LINES of trailing context", + ) + parser.add_argument( + "-B", + "--before-context", + type=int, + metavar="LINES", + help="print LINES of leading context", + ) + parser.add_argument( + "-C", + "--context", + type=int, + metavar="LINES", + help="print LINES of surrounding context", + ) + parser.add_argument("--config", type=str, help="path to config file.") + parser.add_argument("--toml", type=str, help="path to a pyproject.toml file.") + parser.add_argument("files", nargs="*", help="files or directories to check") # Parse command line options. options = parser.parse_args(list(args)) # Load config files and look for ``codespell`` options. - cfg_files = ['setup.cfg', '.codespellrc'] + cfg_files = ["setup.cfg", ".codespellrc"] if options.config: cfg_files.append(options.config) config = configparser.ConfigParser(interpolation=None) # Read toml before other config files. toml_files_errors = [] - if os.path.isfile('pyproject.toml'): - toml_files_errors.append(('pyproject.toml', False)) + if os.path.isfile("pyproject.toml"): + toml_files_errors.append(("pyproject.toml", False)) if options.toml: toml_files_errors.append((options.toml, True)) for toml_file, raise_error in toml_files_errors: @@ -411,12 +556,13 @@ def parse_options(args): except Exception as exc: if raise_error: raise ImportError( - f'tomli is required to read pyproject.toml but could not ' - f'be imported, got: {exc}') from None + f"tomli is required to read pyproject.toml but could not " + f"be imported, got: {exc}" + ) from None else: continue - with open(toml_file, 'rb') as f: - data = tomli.load(f).get('tool', {}) + with open(toml_file, "rb") as f: + data = tomli.load(f).get("tool", {}) config.read_dict(data) # Collect which config files are going to be used @@ -424,19 +570,19 @@ def parse_options(args): for cfg_file in cfg_files: _cfg = configparser.ConfigParser() _cfg.read(cfg_file) - if _cfg.has_section('codespell'): + if _cfg.has_section("codespell"): used_cfg_files.append(cfg_file) # Use config files config.read(cfg_files) - if config.has_section('codespell'): + if config.has_section("codespell"): # Build a "fake" argv list using option name and value. cfg_args = [] - for key in config['codespell']: + for key in config["codespell"]: # Add option as arg. cfg_args.append("--%s" % key) # If value is blank, skip. - val = config['codespell'][key] + val = config["codespell"][key] if val != "": cfg_args.append(val) @@ -447,7 +593,7 @@ def parse_options(args): options = parser.parse_args(list(args), namespace=options) if not options.files: - options.files.append('.') + options.files.append(".") return options, parser, used_cfg_files @@ -456,27 +602,27 @@ def parse_ignore_words_option(ignore_words_option): ignore_words = set() if ignore_words_option: for comma_separated_words in ignore_words_option: - for word in comma_separated_words.split(','): + for word in comma_separated_words.split(","): ignore_words.add(word.strip()) return ignore_words def build_exclude_hashes(filename, exclude_lines): - with open(filename, encoding='utf-8') as f: + with open(filename, encoding="utf-8") as f: for line in f: exclude_lines.add(line) def build_ignore_words(filename, ignore_words): - with open(filename, encoding='utf-8') as f: + with open(filename, encoding="utf-8") as f: for line in f: ignore_words.add(line.strip()) def build_dict(filename, misspellings, ignore_words): - with open(filename, encoding='utf-8') as f: + with open(filename, encoding="utf-8") as f: for line in f: - [key, data] = line.split('->') + [key, data] = line.split("->") # TODO for now, convert both to lower. Someday we can maybe add # support for fixing caps. key = key.lower() @@ -484,17 +630,17 @@ def build_dict(filename, misspellings, ignore_words): if key in ignore_words: continue data = data.strip() - fix = data.rfind(',') + fix = data.rfind(",") if fix < 0: fix = True - reason = '' + reason = "" elif fix == (len(data) - 1): data = data[:fix] - reason = '' + reason = "" fix = False else: - reason = data[fix + 1:].strip() + reason = data[fix + 1 :].strip() data = data[:fix] fix = False @@ -504,19 +650,20 @@ def build_dict(filename, misspellings, ignore_words): def is_hidden(filename, check_hidden): bfilename = os.path.basename(filename) - return bfilename not in ('', '.', '..') and \ - (not check_hidden and bfilename[0] == '.') + return bfilename not in ("", ".", "..") and ( + not check_hidden and bfilename[0] == "." + ) def is_text_file(filename): - with open(filename, mode='rb') as f: + with open(filename, mode="rb") as f: s = f.read(1024) - return b'\x00' not in s + return b"\x00" not in s def fix_case(word, fixword): if word == word.capitalize(): - return ', '.join(w.strip().capitalize() for w in fixword.split(',')) + return ", ".join(w.strip().capitalize() for w in fixword.split(",")) elif word == word.upper(): return fixword.upper() # they are both lower case @@ -529,32 +676,32 @@ def ask_for_word_fix(line, wrongword, misspelling, interactivity): return misspelling.fix, fix_case(wrongword, misspelling.data) if misspelling.fix and interactivity & 1: - r = '' + r = "" fixword = fix_case(wrongword, misspelling.data) while not r: - print("%s\t%s ==> %s (Y/n) " % (line, wrongword, fixword), end='') + print("%s\t%s ==> %s (Y/n) " % (line, wrongword, fixword), end="") r = sys.stdin.readline().strip().upper() if not r: - r = 'Y' - if r not in ('Y', 'N'): + r = "Y" + if r not in ("Y", "N"): print("Say 'y' or 'n'") - r = '' + r = "" - if r == 'N': + if r == "N": misspelling.fix = False elif (interactivity & 2) and not misspelling.reason: # if it is not disabled, i.e. it just has more than one possible fix, # we ask the user which word to use - r = '' - opt = [w.strip() for w in misspelling.data.split(',')] + r = "" + opt = [w.strip() for w in misspelling.data.split(",")] while not r: - print("%s Choose an option (blank for none): " % line, end='') + print("%s Choose an option (blank for none): " % line, end="") for i, o in enumerate(opt): fixword = fix_case(wrongword, o) - print(" %d) %s" % (i, fixword), end='') - print(": ", end='') + print(" %d) %s" % (i, fixword), end="") + print(": ", end="") sys.stdout.flush() n = sys.stdin.readline().strip() @@ -578,35 +725,46 @@ def print_context(lines, index, context): # context = (context_before, context_after) for i in range(index - context[0], index + context[1] + 1): if 0 <= i < len(lines): - print('%s %s' % ('>' if i == index else ':', lines[i].rstrip())) + print("%s %s" % (">" if i == index else ":", lines[i].rstrip())) def extract_words(text, word_regex, ignore_word_regex): if ignore_word_regex: - text = ignore_word_regex.sub(' ', text) + text = ignore_word_regex.sub(" ", text) return word_regex.findall(text) -def apply_uri_ignore_words(check_words, line, word_regex, ignore_word_regex, - uri_regex, uri_ignore_words): +def apply_uri_ignore_words( + check_words, line, word_regex, ignore_word_regex, uri_regex, uri_ignore_words +): if not uri_ignore_words: return for uri in re.findall(uri_regex, line): - for uri_word in extract_words(uri, word_regex, - ignore_word_regex): + for uri_word in extract_words(uri, word_regex, ignore_word_regex): if uri_word in uri_ignore_words: check_words.remove(uri_word) -def parse_file(filename, colors, summary, misspellings, exclude_lines, - file_opener, word_regex, ignore_word_regex, uri_regex, - uri_ignore_words, context, options): +def parse_file( + filename, + colors, + summary, + misspellings, + exclude_lines, + file_opener, + word_regex, + ignore_word_regex, + uri_regex, + uri_ignore_words, + context, + options, +): bad_count = 0 lines = None changed = False encoding = encodings[0] # if not defined, use UTF-8 - if filename == '-': + if filename == "-": f = sys.stdin lines = f.readlines() else: @@ -628,21 +786,28 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines, if misspellings[lword].reason: if options.quiet_level & QuietLevels.DISABLED_FIXES: continue - creason = " | %s%s%s" % (colors.FILE, - misspellings[lword].reason, - colors.DISABLE) + creason = " | %s%s%s" % ( + colors.FILE, + misspellings[lword].reason, + colors.DISABLE, + ) else: if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES: continue - creason = '' + creason = "" bad_count += 1 - print("%(FILENAME)s: %(WRONGWORD)s" - " ==> %(RIGHTWORD)s%(REASON)s" - % {'FILENAME': cfilename, - 'WRONGWORD': cwrongword, - 'RIGHTWORD': crightword, 'REASON': creason}) + print( + "%(FILENAME)s: %(WRONGWORD)s" + " ==> %(RIGHTWORD)s%(REASON)s" + % { + "FILENAME": cfilename, + "WRONGWORD": cwrongword, + "RIGHTWORD": crightword, + "REASON": creason, + } + ) # ignore irregular files if not os.path.isfile(filename): @@ -651,8 +816,7 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines, try: text = is_text_file(filename) except PermissionError as e: - print("WARNING: %s: %s" % (e.strerror, filename), - file=sys.stderr) + print("WARNING: %s: %s" % (e.strerror, filename), file=sys.stderr) return bad_count except OSError: return bad_count @@ -678,12 +842,17 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines, # This ensures that if a URI ignore word occurs both inside a URI and # outside, it will still be a spelling error. if "*" in uri_ignore_words: - line = uri_regex.sub(' ', line) + line = uri_regex.sub(" ", line) check_words = extract_words(line, word_regex, ignore_word_regex) if "*" not in uri_ignore_words: - apply_uri_ignore_words(check_words, line, word_regex, - ignore_word_regex, uri_regex, - uri_ignore_words) + apply_uri_ignore_words( + check_words, + line, + word_regex, + ignore_word_regex, + uri_regex, + uri_ignore_words, + ) for word in check_words: lword = word.lower() @@ -697,8 +866,8 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines, context_shown = True print_context(lines, i, context) fix, fixword = ask_for_word_fix( - lines[i], word, misspellings[lword], - options.interactive) + lines[i], word, misspellings[lword], options.interactive + ) asked_for.add(lword) if summary and fix: @@ -709,13 +878,16 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines, if options.write_changes and fix: changed = True - lines[i] = re.sub(r'\b%s\b' % word, fixword, lines[i]) + lines[i] = re.sub(r"\b%s\b" % word, fixword, lines[i]) fixed_words.add(word) continue # otherwise warning was explicitly set by interactive mode - if (options.interactive & 2 and not fix and not - misspellings[lword].reason): + if ( + options.interactive & 2 + and not fix + and not misspellings[lword].reason + ): continue cfilename = "%s%s%s" % (colors.FILE, filename, colors.DISABLE) @@ -727,14 +899,16 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines, if options.quiet_level & QuietLevels.DISABLED_FIXES: continue - creason = " | %s%s%s" % (colors.FILE, - misspellings[lword].reason, - colors.DISABLE) + creason = " | %s%s%s" % ( + colors.FILE, + misspellings[lword].reason, + colors.DISABLE, + ) else: if options.quiet_level & QuietLevels.NON_AUTOMATIC_FIXES: continue - creason = '' + creason = "" # If we get to this point (uncorrected error) we should change # our bad_count and thus return value @@ -742,30 +916,43 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines, if (not context_shown) and (context is not None): print_context(lines, i, context) - if filename != '-': - print("%(FILENAME)s:%(LINE)s: %(WRONGWORD)s " - "==> %(RIGHTWORD)s%(REASON)s" - % {'FILENAME': cfilename, 'LINE': cline, - 'WRONGWORD': cwrongword, - 'RIGHTWORD': crightword, 'REASON': creason}) + if filename != "-": + print( + "%(FILENAME)s:%(LINE)s: %(WRONGWORD)s " + "==> %(RIGHTWORD)s%(REASON)s" + % { + "FILENAME": cfilename, + "LINE": cline, + "WRONGWORD": cwrongword, + "RIGHTWORD": crightword, + "REASON": creason, + } + ) else: - print("%(LINE)s: %(STRLINE)s\n\t%(WRONGWORD)s " - "==> %(RIGHTWORD)s%(REASON)s" - % {'LINE': cline, 'STRLINE': line.strip(), - 'WRONGWORD': cwrongword, - 'RIGHTWORD': crightword, 'REASON': creason}) + print( + "%(LINE)s: %(STRLINE)s\n\t%(WRONGWORD)s " + "==> %(RIGHTWORD)s%(REASON)s" + % { + "LINE": cline, + "STRLINE": line.strip(), + "WRONGWORD": cwrongword, + "RIGHTWORD": crightword, + "REASON": creason, + } + ) if changed: - if filename == '-': + if filename == "-": print("---") for line in lines: - print(line, end='') + print(line, end="") else: if not options.quiet_level & QuietLevels.FIXES: - print("%sFIXED:%s %s" - % (colors.FWORD, colors.DISABLE, filename), - file=sys.stderr) - with open(filename, 'w', encoding=encoding, newline='') as f: + print( + "%sFIXED:%s %s" % (colors.FWORD, colors.DISABLE, filename), + file=sys.stderr, + ) + with open(filename, "w", encoding=encoding, newline="") as f: f.writelines(lines) return bad_count @@ -781,21 +968,22 @@ def main(*args): # Report used config files if len(used_cfg_files) > 0: - print('Used config files:') + print("Used config files:") for ifile, cfg_file in enumerate(used_cfg_files, start=1): - print(' %i: %s' % (ifile, cfg_file)) + print(" %i: %s" % (ifile, cfg_file)) if options.regex and options.write_changes: - print("ERROR: --write-changes cannot be used together with " - "--regex", file=sys.stderr) + print( + "ERROR: --write-changes cannot be used together with --regex", + file=sys.stderr, + ) parser.print_help() return EX_USAGE word_regex = options.regex or word_regex_def try: word_regex = re.compile(word_regex) except re.error as err: - print("ERROR: invalid --regex \"%s\" (%s)" % - (word_regex, err), file=sys.stderr) + print('ERROR: invalid --regex "%s" (%s)' % (word_regex, err), file=sys.stderr) parser.print_help() return EX_USAGE @@ -803,8 +991,10 @@ def main(*args): try: ignore_word_regex = re.compile(options.ignore_regex) except re.error as err: - print("ERROR: invalid --ignore-regex \"%s\" (%s)" % - (options.ignore_regex, err), file=sys.stderr) + print( + 'ERROR: invalid --ignore-regex "%s" (%s)' % (options.ignore_regex, err), + file=sys.stderr, + ) parser.print_help() return EX_USAGE else: @@ -814,8 +1004,10 @@ def main(*args): ignore_words = parse_ignore_words_option(options.ignore_words_list) for ignore_words_file in ignore_words_files: if not os.path.isfile(ignore_words_file): - print("ERROR: cannot find ignore-words file: %s" % - ignore_words_file, file=sys.stderr) + print( + "ERROR: cannot find ignore-words file: %s" % ignore_words_file, + file=sys.stderr, + ) parser.print_help() return EX_USAGE build_ignore_words(ignore_words_file, ignore_words) @@ -824,8 +1016,9 @@ def main(*args): try: uri_regex = re.compile(uri_regex) except re.error as err: - print("ERROR: invalid --uri-regex \"%s\" (%s)" % - (uri_regex, err), file=sys.stderr) + print( + 'ERROR: invalid --uri-regex "%s" (%s)' % (uri_regex, err), file=sys.stderr + ) parser.print_help() return EX_USAGE uri_ignore_words = parse_ignore_words_option(options.uri_ignore_words_list) @@ -833,28 +1026,31 @@ def main(*args): if options.dictionary: dictionaries = options.dictionary else: - dictionaries = ['-'] + dictionaries = ["-"] use_dictionaries = [] for dictionary in dictionaries: if dictionary == "-": # figure out which builtin dictionaries to use - use = sorted(set(options.builtin.split(','))) + use = sorted(set(options.builtin.split(","))) for u in use: for builtin in _builtin_dictionaries: if builtin[0] == u: use_dictionaries.append( - os.path.join(_data_root, 'dictionary%s.txt' - % (builtin[2],))) + os.path.join(_data_root, "dictionary%s.txt" % (builtin[2],)) + ) break else: - print("ERROR: Unknown builtin dictionary: %s" % (u,), - file=sys.stderr) + print( + "ERROR: Unknown builtin dictionary: %s" % (u,), file=sys.stderr + ) parser.print_help() return EX_USAGE else: if not os.path.isfile(dictionary): - print("ERROR: cannot find dictionary file: %s" % dictionary, - file=sys.stderr) + print( + "ERROR: cannot find dictionary file: %s" % dictionary, + file=sys.stderr, + ) parser.print_help() return EX_USAGE use_dictionaries.append(dictionary) @@ -862,7 +1058,7 @@ def main(*args): for dictionary in use_dictionaries: build_dict(dictionary, misspellings, ignore_words) colors = TermColors() - if not options.colors or sys.platform == 'win32': + if not options.colors or sys.platform == "win32": colors.disable() if options.summary: @@ -872,17 +1068,17 @@ def main(*args): context = None if options.context is not None: - if (options.before_context is not None) or \ - (options.after_context is not None): - print("ERROR: --context/-C cannot be used together with " - "--context-before/-B or --context-after/-A", - file=sys.stderr) + if (options.before_context is not None) or (options.after_context is not None): + print( + "ERROR: --context/-C cannot be used together with " + "--context-before/-B or --context-after/-A", + file=sys.stderr, + ) parser.print_help() return EX_USAGE context_both = max(0, options.context) context = (context_both, context_both) - elif (options.before_context is not None) or \ - (options.after_context is not None): + elif (options.before_context is not None) or (options.after_context is not None): context_before = 0 context_after = 0 if options.before_context is not None: @@ -895,16 +1091,17 @@ def main(*args): if options.exclude_file: build_exclude_hashes(options.exclude_file, exclude_lines) - file_opener = FileOpener(options.hard_encoding_detection, - options.quiet_level) + file_opener = FileOpener(options.hard_encoding_detection, options.quiet_level) glob_match = GlobMatch(options.skip) try: glob_match.match("/random/path") # does not need a real path except re.error: - print("ERROR: --skip/-S has been fed an invalid glob, " - "try escaping special characters", - file=sys.stderr) + print( + "ERROR: --skip/-S has been fed an invalid glob, " + "try escaping special characters", + file=sys.stderr, + ) return EX_USAGE bad_count = 0 @@ -930,18 +1127,38 @@ def main(*args): if glob_match.match(fname): # skip paths continue bad_count += parse_file( - fname, colors, summary, misspellings, exclude_lines, - file_opener, word_regex, ignore_word_regex, uri_regex, - uri_ignore_words, context, options) + fname, + colors, + summary, + misspellings, + exclude_lines, + file_opener, + word_regex, + ignore_word_regex, + uri_regex, + uri_ignore_words, + context, + options, + ) # skip (relative) directories dirs[:] = [dir_ for dir_ in dirs if not glob_match.match(dir_)] elif not glob_match.match(filename): # skip files bad_count += parse_file( - filename, colors, summary, misspellings, exclude_lines, - file_opener, word_regex, ignore_word_regex, uri_regex, - uri_ignore_words, context, options) + filename, + colors, + summary, + misspellings, + exclude_lines, + file_opener, + word_regex, + ignore_word_regex, + uri_regex, + uri_ignore_words, + context, + options, + ) if summary: print("\n-------8<-------\nSUMMARY:") diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index a9401b2a744..ed6b17159b6 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -27,15 +27,15 @@ class MainWrapper: @staticmethod def main(*args, count=True, std=False, **kwargs): if count: - args = ('--count',) + args + args = ("--count",) + args code = cs_.main(*args, **kwargs) - capsys = inspect.currentframe().f_back.f_locals['capsys'] + capsys = inspect.currentframe().f_back.f_locals["capsys"] stdout, stderr = capsys.readouterr() assert code in (EX_OK, EX_USAGE, EX_DATAERR) if code == EX_DATAERR: # have some misspellings - code = int(stderr.split('\n')[-2]) + code = int(stderr.split("\n")[-2]) elif code == EX_OK and count: - code = int(stderr.split('\n')[-2]) + code = int(stderr.split("\n")[-2]) assert code == 0 if std: return (code, stdout, stderr) @@ -48,11 +48,11 @@ def main(*args, count=True, std=False, **kwargs): def run_codespell(args=(), cwd=None): """Run codespell.""" - args = ('--count',) + args + args = ("--count",) + args proc = subprocess.run( - ['codespell'] + list(args), cwd=cwd, - capture_output=True, encoding='utf-8') - count = int(proc.stderr.split('\n')[-2]) + ["codespell"] + list(args), cwd=cwd, capture_output=True, encoding="utf-8" + ) + count = int(proc.stderr.split("\n")[-2]) return count @@ -61,327 +61,330 @@ def test_command(tmpdir): # With no arguments does "." d = str(tmpdir) assert run_codespell(cwd=d) == 0 - with open(op.join(d, 'bad.txt'), 'w') as f: - f.write('abandonned\nAbandonned\nABANDONNED\nAbAnDoNnEd') + with open(op.join(d, "bad.txt"), "w") as f: + f.write("abandonned\nAbandonned\nABANDONNED\nAbAnDoNnEd") assert run_codespell(cwd=d) == 4 def test_basic(tmpdir, capsys): """Test some basic functionality.""" - assert cs.main('_does_not_exist_') == 0 - fname = op.join(str(tmpdir), 'tmp') - with open(fname, 'w') as f: + assert cs.main("_does_not_exist_") == 0 + fname = op.join(str(tmpdir), "tmp") + with open(fname, "w") as f: pass - code, _, stderr = cs.main('-D', 'foo', f.name, std=True) - assert code == EX_USAGE, 'missing dictionary' - assert 'cannot find dictionary' in stderr - assert cs.main(fname) == 0, 'empty file' - with open(fname, 'a') as f: - f.write('this is a test file\n') - assert cs.main(fname) == 0, 'good' - with open(fname, 'a') as f: - f.write('abandonned\n') - assert cs.main(fname) == 1, 'bad' - with open(fname, 'a') as f: - f.write('abandonned\n') - assert cs.main(fname) == 2, 'worse' - with open(fname, 'a') as f: - f.write('tim\ngonna\n') - assert cs.main(fname) == 2, 'with a name' - assert cs.main('--builtin', 'clear,rare,names,informal', fname) == 4 - code, _, stderr = cs.main(fname, '--builtin', 'foo', std=True) + code, _, stderr = cs.main("-D", "foo", f.name, std=True) + assert code == EX_USAGE, "missing dictionary" + assert "cannot find dictionary" in stderr + assert cs.main(fname) == 0, "empty file" + with open(fname, "a") as f: + f.write("this is a test file\n") + assert cs.main(fname) == 0, "good" + with open(fname, "a") as f: + f.write("abandonned\n") + assert cs.main(fname) == 1, "bad" + with open(fname, "a") as f: + f.write("abandonned\n") + assert cs.main(fname) == 2, "worse" + with open(fname, "a") as f: + f.write("tim\ngonna\n") + assert cs.main(fname) == 2, "with a name" + assert cs.main("--builtin", "clear,rare,names,informal", fname) == 4 + code, _, stderr = cs.main(fname, "--builtin", "foo", std=True) assert code == EX_USAGE # bad type - assert 'Unknown builtin dictionary' in stderr + assert "Unknown builtin dictionary" in stderr d = str(tmpdir) - code, _, stderr = cs.main(fname, '-D', op.join(d, 'foo'), std=True) + code, _, stderr = cs.main(fname, "-D", op.join(d, "foo"), std=True) assert code == EX_USAGE # bad dict - assert 'cannot find dictionary' in stderr + assert "cannot find dictionary" in stderr os.remove(fname) - with open(op.join(d, 'bad.txt'), 'w', newline='') as f: - f.write('abandonned\nAbandonned\nABANDONNED\nAbAnDoNnEd\nabandonned\rAbandonned\r\nABANDONNED \n AbAnDoNnEd') # noqa: E501 + with open(op.join(d, "bad.txt"), "w", newline="") as f: + f.write( + "abandonned\nAbandonned\nABANDONNED\nAbAnDoNnEd\nabandonned\rAbandonned\r\nABANDONNED \n AbAnDoNnEd" # noqa: E501 + ) assert cs.main(d) == 8 - code, _, stderr = cs.main('-w', d, std=True) + code, _, stderr = cs.main("-w", d, std=True) assert code == 0 - assert 'FIXED:' in stderr - with open(op.join(d, 'bad.txt'), newline='') as f: + assert "FIXED:" in stderr + with open(op.join(d, "bad.txt"), newline="") as f: new_content = f.read() assert cs.main(d) == 0 - assert new_content == 'abandoned\nAbandoned\nABANDONED\nabandoned\nabandoned\rAbandoned\r\nABANDONED \n abandoned' # noqa: E501 + assert ( + new_content + == "abandoned\nAbandoned\nABANDONED\nabandoned\nabandoned\rAbandoned\r\nABANDONED \n abandoned" # noqa: E501 + ) - with open(op.join(d, 'bad.txt'), 'w') as f: - f.write('abandonned abandonned\n') + with open(op.join(d, "bad.txt"), "w") as f: + f.write("abandonned abandonned\n") assert cs.main(d) == 2 - code, stdout, stderr = cs.main( - '-q', '16', '-w', d, count=False, std=True) + code, stdout, stderr = cs.main("-q", "16", "-w", d, count=False, std=True) assert code == 0 - assert stdout == stderr == '' + assert stdout == stderr == "" assert cs.main(d) == 0 # empty directory - os.mkdir(op.join(d, 'empty')) + os.mkdir(op.join(d, "empty")) assert cs.main(d) == 0 def test_bad_glob(tmpdir, capsys): # disregard invalid globs, properly handle escaped globs - g = op.join(tmpdir, 'glob') + g = op.join(tmpdir, "glob") os.mkdir(g) - fname = op.join(g, '[b-a].txt') - with open(fname, 'a') as f: - f.write('abandonned\n') + fname = op.join(g, "[b-a].txt") + with open(fname, "a") as f: + f.write("abandonned\n") assert cs.main(g) == 1 # bad glob is invalid - code, _, stderr = cs.main('--skip', '[b-a].txt', - g, std=True) + code, _, stderr = cs.main("--skip", "[b-a].txt", g, std=True) if sys.hexversion < 0x030A05F0: # Python < 3.10.5 raises re.error - assert code == EX_USAGE, 'invalid glob' - assert 'invalid glob' in stderr + assert code == EX_USAGE, "invalid glob" + assert "invalid glob" in stderr else: # Python >= 3.10.5 does not match assert code == 1 # properly escaped glob is valid, and matches glob-like file name - assert cs.main('--skip', '[[]b-a[]].txt', g) == 0 + assert cs.main("--skip", "[[]b-a[]].txt", g) == 0 -@pytest.mark.skipif( - not sys.platform == 'linux', reason='Only supported on Linux') +@pytest.mark.skipif(not sys.platform == "linux", reason="Only supported on Linux") def test_permission_error(tmp_path, capsys): """Test permission error handling.""" d = tmp_path - with open(d / 'unreadable.txt', 'w') as f: - f.write('abandonned\n') + with open(d / "unreadable.txt", "w") as f: + f.write("abandonned\n") code, _, stderr = cs.main(f.name, std=True) - assert 'WARNING:' not in stderr + assert "WARNING:" not in stderr os.chmod(f.name, 0o000) code, _, stderr = cs.main(f.name, std=True) - assert 'WARNING:' in stderr + assert "WARNING:" in stderr def test_interactivity(tmpdir, capsys): """Test interaction""" # Windows can't read a currently-opened file, so here we use # NamedTemporaryFile just to get a good name - with open(op.join(str(tmpdir), 'tmp'), 'w') as f: + with open(op.join(str(tmpdir), "tmp"), "w") as f: pass try: - assert cs.main(f.name) == 0, 'empty file' - with open(f.name, 'w') as f: - f.write('abandonned\n') - assert cs.main('-i', '-1', f.name) == 1, 'bad' - with FakeStdin('y\n'): - assert cs.main('-i', '3', f.name) == 1 - with FakeStdin('n\n'): - code, stdout, _ = cs.main('-w', '-i', '3', f.name, std=True) + assert cs.main(f.name) == 0, "empty file" + with open(f.name, "w") as f: + f.write("abandonned\n") + assert cs.main("-i", "-1", f.name) == 1, "bad" + with FakeStdin("y\n"): + assert cs.main("-i", "3", f.name) == 1 + with FakeStdin("n\n"): + code, stdout, _ = cs.main("-w", "-i", "3", f.name, std=True) assert code == 0 - assert '==>' in stdout - with FakeStdin('x\ny\n'): - assert cs.main('-w', '-i', '3', f.name) == 0 + assert "==>" in stdout + with FakeStdin("x\ny\n"): + assert cs.main("-w", "-i", "3", f.name) == 0 assert cs.main(f.name) == 0 finally: os.remove(f.name) # New example - with open(op.join(str(tmpdir), 'tmp2'), 'w') as f: + with open(op.join(str(tmpdir), "tmp2"), "w") as f: pass try: - with open(f.name, 'w') as f: - f.write('abandonned\n') + with open(f.name, "w") as f: + f.write("abandonned\n") assert cs.main(f.name) == 1 - with FakeStdin(' '): # blank input -> Y - assert cs.main('-w', '-i', '3', f.name) == 0 + with FakeStdin(" "): # blank input -> Y + assert cs.main("-w", "-i", "3", f.name) == 0 assert cs.main(f.name) == 0 finally: os.remove(f.name) # multiple options - with open(op.join(str(tmpdir), 'tmp3'), 'w') as f: + with open(op.join(str(tmpdir), "tmp3"), "w") as f: pass try: - with open(f.name, 'w') as f: - f.write('ackward\n') + with open(f.name, "w") as f: + f.write("ackward\n") assert cs.main(f.name) == 1 - with FakeStdin(' \n'): # blank input -> nothing - assert cs.main('-w', '-i', '3', f.name) == 0 + with FakeStdin(" \n"): # blank input -> nothing + assert cs.main("-w", "-i", "3", f.name) == 0 assert cs.main(f.name) == 1 - with FakeStdin('0\n'): # blank input -> nothing - assert cs.main('-w', '-i', '3', f.name) == 0 + with FakeStdin("0\n"): # blank input -> nothing + assert cs.main("-w", "-i", "3", f.name) == 0 assert cs.main(f.name) == 0 with open(f.name) as f_read: - assert f_read.read() == 'awkward\n' - with open(f.name, 'w') as f: - f.write('ackward\n') + assert f_read.read() == "awkward\n" + with open(f.name, "w") as f: + f.write("ackward\n") assert cs.main(f.name) == 1 - with FakeStdin('x\n1\n'): # blank input -> nothing - code, stdout, _ = cs.main('-w', '-i', '3', f.name, std=True) + with FakeStdin("x\n1\n"): # blank input -> nothing + code, stdout, _ = cs.main("-w", "-i", "3", f.name, std=True) assert code == 0 - assert 'a valid option' in stdout + assert "a valid option" in stdout assert cs.main(f.name) == 0 with open(f.name) as f: - assert f.read() == 'backward\n' + assert f.read() == "backward\n" finally: os.remove(f.name) def test_summary(tmpdir, capsys): """Test summary functionality.""" - with open(op.join(str(tmpdir), 'tmp'), 'w') as f: + with open(op.join(str(tmpdir), "tmp"), "w") as f: pass code, stdout, stderr = cs.main(f.name, std=True, count=False) assert code == 0 - assert stdout == stderr == '', 'no output' - code, stdout, stderr = cs.main(f.name, '--summary', std=True) + assert stdout == stderr == "", "no output" + code, stdout, stderr = cs.main(f.name, "--summary", std=True) assert code == 0 - assert stderr == '0\n' - assert 'SUMMARY' in stdout - assert len(stdout.split('\n')) == 5 - with open(f.name, 'w') as f: - f.write('abandonned\nabandonned') + assert stderr == "0\n" + assert "SUMMARY" in stdout + assert len(stdout.split("\n")) == 5 + with open(f.name, "w") as f: + f.write("abandonned\nabandonned") assert code == 0 - code, stdout, stderr = cs.main(f.name, '--summary', std=True) - assert stderr == '2\n' - assert 'SUMMARY' in stdout - assert len(stdout.split('\n')) == 7 - assert 'abandonned' in stdout.split()[-2] + code, stdout, stderr = cs.main(f.name, "--summary", std=True) + assert stderr == "2\n" + assert "SUMMARY" in stdout + assert len(stdout.split("\n")) == 7 + assert "abandonned" in stdout.split()[-2] def test_ignore_dictionary(tmpdir, capsys): """Test ignore dictionary functionality.""" d = str(tmpdir) - with open(op.join(d, 'bad.txt'), 'w') as f: - f.write('1 abandonned 1\n2 abandonned 2\nabondon\n') + with open(op.join(d, "bad.txt"), "w") as f: + f.write("1 abandonned 1\n2 abandonned 2\nabondon\n") bad_name = f.name assert cs.main(bad_name) == 3 - with open(op.join(d, 'ignore.txt'), 'w') as f: - f.write('abandonned\n') - assert cs.main('-I', f.name, bad_name) == 1 + with open(op.join(d, "ignore.txt"), "w") as f: + f.write("abandonned\n") + assert cs.main("-I", f.name, bad_name) == 1 def test_ignore_word_list(tmpdir, capsys): """Test ignore word list functionality.""" d = str(tmpdir) - with open(op.join(d, 'bad.txt'), 'w') as f: - f.write('abandonned\nabondon\nabilty\n') + with open(op.join(d, "bad.txt"), "w") as f: + f.write("abandonned\nabondon\nabilty\n") assert cs.main(d) == 3 - assert cs.main('-Labandonned,someword', '-Labilty', d) == 1 + assert cs.main("-Labandonned,someword", "-Labilty", d) == 1 def test_custom_regex(tmpdir, capsys): """Test custom word regex.""" d = str(tmpdir) - with open(op.join(d, 'bad.txt'), 'w') as f: - f.write('abandonned_abondon\n') + with open(op.join(d, "bad.txt"), "w") as f: + f.write("abandonned_abondon\n") assert cs.main(d) == 0 - assert cs.main('-r', "[a-z]+", d) == 2 - code, _, stderr = cs.main('-r', '[a-z]+', '--write-changes', d, std=True) + assert cs.main("-r", "[a-z]+", d) == 2 + code, _, stderr = cs.main("-r", "[a-z]+", "--write-changes", d, std=True) assert code == EX_USAGE - assert 'ERROR:' in stderr + assert "ERROR:" in stderr def test_exclude_file(tmpdir, capsys): """Test exclude file functionality.""" d = str(tmpdir) - with open(op.join(d, 'bad.txt'), 'wb') as f: - f.write(b'1 abandonned 1\n2 abandonned 2\n') + with open(op.join(d, "bad.txt"), "wb") as f: + f.write(b"1 abandonned 1\n2 abandonned 2\n") bad_name = f.name assert cs.main(bad_name) == 2 - with open(op.join(d, 'tmp.txt'), 'wb') as f: - f.write(b'1 abandonned 1\n') + with open(op.join(d, "tmp.txt"), "wb") as f: + f.write(b"1 abandonned 1\n") assert cs.main(bad_name) == 2 - assert cs.main('-x', f.name, bad_name) == 1 + assert cs.main("-x", f.name, bad_name) == 1 def test_encoding(tmpdir, capsys): """Test encoding handling.""" # Some simple Unicode things - with open(op.join(str(tmpdir), 'tmp'), 'w') as f: + with open(op.join(str(tmpdir), "tmp"), "w") as f: pass # with CaptureStdout() as sio: assert cs.main(f.name) == 0 - with open(f.name, 'wb') as f: - f.write('naïve\n'.encode()) + with open(f.name, "wb") as f: + f.write("naïve\n".encode()) assert cs.main(f.name) == 0 - assert cs.main('-e', f.name) == 0 - with open(f.name, 'ab') as f: - f.write(b'naieve\n') + assert cs.main("-e", f.name) == 0 + with open(f.name, "ab") as f: + f.write(b"naieve\n") assert cs.main(f.name) == 1 # Encoding detection (only try ISO 8859-1 because UTF-8 is the default) - with open(f.name, 'wb') as f: - f.write(b'Speling error, non-ASCII: h\xe9t\xe9rog\xe9n\xe9it\xe9\n') + with open(f.name, "wb") as f: + f.write(b"Speling error, non-ASCII: h\xe9t\xe9rog\xe9n\xe9it\xe9\n") # check warnings about wrong encoding are enabled with "-q 0" - code, stdout, stderr = cs.main('-q', '0', f.name, std=True, count=True) + code, stdout, stderr = cs.main("-q", "0", f.name, std=True, count=True) assert code == 1 - assert 'Speling' in stdout - assert 'iso-8859-1' in stderr + assert "Speling" in stdout + assert "iso-8859-1" in stderr # check warnings about wrong encoding are disabled with "-q 1" - code, stdout, stderr = cs.main('-q', '1', f.name, std=True, count=True) + code, stdout, stderr = cs.main("-q", "1", f.name, std=True, count=True) assert code == 1 - assert 'Speling' in stdout - assert 'iso-8859-1' not in stderr + assert "Speling" in stdout + assert "iso-8859-1" not in stderr # Binary file warning - with open(f.name, 'wb') as f: - f.write(b'\x00\x00naiive\x00\x00') + with open(f.name, "wb") as f: + f.write(b"\x00\x00naiive\x00\x00") code, stdout, stderr = cs.main(f.name, std=True, count=False) assert code == 0 - assert stdout == stderr == '' - code, stdout, stderr = cs.main('-q', '0', f.name, std=True, count=False) + assert stdout == stderr == "" + code, stdout, stderr = cs.main("-q", "0", f.name, std=True, count=False) assert code == 0 - assert stdout == '' - assert 'WARNING: Binary file' in stderr + assert stdout == "" + assert "WARNING: Binary file" in stderr def test_ignore(tmpdir, capsys): """Test ignoring of files and directories.""" d = str(tmpdir) - goodtxt = op.join(d, 'good.txt') - with open(goodtxt, 'w') as f: - f.write('this file is okay') + goodtxt = op.join(d, "good.txt") + with open(goodtxt, "w") as f: + f.write("this file is okay") assert cs.main(d) == 0 - badtxt = op.join(d, 'bad.txt') - with open(badtxt, 'w') as f: - f.write('abandonned') + badtxt = op.join(d, "bad.txt") + with open(badtxt, "w") as f: + f.write("abandonned") assert cs.main(d) == 1 - assert cs.main('--skip=bad*', d) == 0 - assert cs.main('--skip=bad.txt', d) == 0 - subdir = op.join(d, 'ignoredir') + assert cs.main("--skip=bad*", d) == 0 + assert cs.main("--skip=bad.txt", d) == 0 + subdir = op.join(d, "ignoredir") os.mkdir(subdir) - with open(op.join(subdir, 'bad.txt'), 'w') as f: - f.write('abandonned') + with open(op.join(subdir, "bad.txt"), "w") as f: + f.write("abandonned") assert cs.main(d) == 2 - assert cs.main('--skip=bad*', d) == 0 - assert cs.main('--skip=*ignoredir*', d) == 1 - assert cs.main('--skip=ignoredir', d) == 1 - assert cs.main('--skip=*ignoredir/bad*', d) == 1 - badjs = op.join(d, 'bad.js') + assert cs.main("--skip=bad*", d) == 0 + assert cs.main("--skip=*ignoredir*", d) == 1 + assert cs.main("--skip=ignoredir", d) == 1 + assert cs.main("--skip=*ignoredir/bad*", d) == 1 + badjs = op.join(d, "bad.js") copyfile(badtxt, badjs) - assert cs.main('--skip=*.js', goodtxt, badtxt, badjs) == 1 + assert cs.main("--skip=*.js", goodtxt, badtxt, badjs) == 1 def test_check_filename(tmpdir, capsys): """Test filename check.""" d = str(tmpdir) # Empty file - with open(op.join(d, 'abandonned.txt'), 'w') as f: - f.write('') - assert cs.main('-f', d) == 1 + with open(op.join(d, "abandonned.txt"), "w") as f: + f.write("") + assert cs.main("-f", d) == 1 # Normal file with contents - with open(op.join(d, 'abandonned.txt'), 'w') as f: - f.write('.') - assert cs.main('-f', d) == 1 + with open(op.join(d, "abandonned.txt"), "w") as f: + f.write(".") + assert cs.main("-f", d) == 1 # Normal file with binary contents - with open(op.join(d, 'abandonned.txt'), 'wb') as f: - f.write(b'\x00\x00naiive\x00\x00') - assert cs.main('-f', d) == 1 + with open(op.join(d, "abandonned.txt"), "wb") as f: + f.write(b"\x00\x00naiive\x00\x00") + assert cs.main("-f", d) == 1 -@pytest.mark.skipif((not hasattr(os, "mkfifo") or not callable(os.mkfifo)), - reason='requires os.mkfifo') +@pytest.mark.skipif( + (not hasattr(os, "mkfifo") or not callable(os.mkfifo)), reason="requires os.mkfifo" +) def test_check_filename_irregular_file(tmpdir, capsys): """Test irregular file filename check.""" # Irregular file (!isfile()) d = str(tmpdir) - os.mkfifo(op.join(d, 'abandonned')) - assert cs.main('-f', d) == 1 + os.mkfifo(op.join(d, "abandonned")) + assert cs.main("-f", d) == 1 d = str(tmpdir) @@ -389,122 +392,124 @@ def test_check_hidden(tmpdir, capsys): """Test ignoring of hidden files.""" d = str(tmpdir) # visible file - with open(op.join(d, 'test.txt'), 'w') as f: - f.write('abandonned\n') - assert cs.main(op.join(d, 'test.txt')) == 1 + with open(op.join(d, "test.txt"), "w") as f: + f.write("abandonned\n") + assert cs.main(op.join(d, "test.txt")) == 1 assert cs.main(d) == 1 # hidden file - os.rename(op.join(d, 'test.txt'), op.join(d, '.test.txt')) - assert cs.main(op.join(d, '.test.txt')) == 0 + os.rename(op.join(d, "test.txt"), op.join(d, ".test.txt")) + assert cs.main(op.join(d, ".test.txt")) == 0 assert cs.main(d) == 0 - assert cs.main('--check-hidden', op.join(d, '.test.txt')) == 1 - assert cs.main('--check-hidden', d) == 1 + assert cs.main("--check-hidden", op.join(d, ".test.txt")) == 1 + assert cs.main("--check-hidden", d) == 1 # hidden file with typo in name - os.rename(op.join(d, '.test.txt'), op.join(d, '.abandonned.txt')) - assert cs.main(op.join(d, '.abandonned.txt')) == 0 + os.rename(op.join(d, ".test.txt"), op.join(d, ".abandonned.txt")) + assert cs.main(op.join(d, ".abandonned.txt")) == 0 assert cs.main(d) == 0 - assert cs.main('--check-hidden', op.join(d, '.abandonned.txt')) == 1 - assert cs.main('--check-hidden', d) == 1 - assert cs.main('--check-hidden', '--check-filenames', - op.join(d, '.abandonned.txt')) == 2 - assert cs.main('--check-hidden', '--check-filenames', d) == 2 + assert cs.main("--check-hidden", op.join(d, ".abandonned.txt")) == 1 + assert cs.main("--check-hidden", d) == 1 + assert ( + cs.main("--check-hidden", "--check-filenames", op.join(d, ".abandonned.txt")) + == 2 + ) + assert cs.main("--check-hidden", "--check-filenames", d) == 2 # hidden directory assert cs.main(d) == 0 - assert cs.main('--check-hidden', d) == 1 - assert cs.main('--check-hidden', '--check-filenames', d) == 2 - os.mkdir(op.join(d, '.abandonned')) - copyfile(op.join(d, '.abandonned.txt'), - op.join(d, '.abandonned', 'abandonned.txt')) + assert cs.main("--check-hidden", d) == 1 + assert cs.main("--check-hidden", "--check-filenames", d) == 2 + os.mkdir(op.join(d, ".abandonned")) + copyfile(op.join(d, ".abandonned.txt"), op.join(d, ".abandonned", "abandonned.txt")) assert cs.main(d) == 0 - assert cs.main('--check-hidden', d) == 2 - assert cs.main('--check-hidden', '--check-filenames', d) == 5 + assert cs.main("--check-hidden", d) == 2 + assert cs.main("--check-hidden", "--check-filenames", d) == 5 # check again with a relative path rel = op.relpath(tmpdir) assert cs.main(rel) == 0 - assert cs.main('--check-hidden', rel) == 2 - assert cs.main('--check-hidden', '--check-filenames', rel) == 5 + assert cs.main("--check-hidden", rel) == 2 + assert cs.main("--check-hidden", "--check-filenames", rel) == 5 # hidden subdirectory assert cs.main(d) == 0 - assert cs.main('--check-hidden', d) == 2 - assert cs.main('--check-hidden', '--check-filenames', d) == 5 - subdir = op.join(d, 'subdir') + assert cs.main("--check-hidden", d) == 2 + assert cs.main("--check-hidden", "--check-filenames", d) == 5 + subdir = op.join(d, "subdir") os.mkdir(subdir) - os.mkdir(op.join(subdir, '.abandonned')) - copyfile(op.join(d, '.abandonned.txt'), - op.join(subdir, '.abandonned', 'abandonned.txt')) + os.mkdir(op.join(subdir, ".abandonned")) + copyfile( + op.join(d, ".abandonned.txt"), op.join(subdir, ".abandonned", "abandonned.txt") + ) assert cs.main(d) == 0 - assert cs.main('--check-hidden', d) == 3 - assert cs.main('--check-hidden', '--check-filenames', d) == 8 + assert cs.main("--check-hidden", d) == 3 + assert cs.main("--check-hidden", "--check-filenames", d) == 8 def test_case_handling(tmpdir, capsys): """Test that capitalized entries get detected properly.""" # Some simple Unicode things - with open(op.join(str(tmpdir), 'tmp'), 'w') as f: + with open(op.join(str(tmpdir), "tmp"), "w") as f: pass # with CaptureStdout() as sio: assert cs.main(f.name) == 0 - with open(f.name, 'wb') as f: - f.write(b'this has an ACII error') + with open(f.name, "wb") as f: + f.write(b"this has an ACII error") code, stdout, _ = cs.main(f.name, std=True) assert code == 1 - assert 'ASCII' in stdout - code, _, stderr = cs.main('-w', f.name, std=True) + assert "ASCII" in stdout + code, _, stderr = cs.main("-w", f.name, std=True) assert code == 0 - assert 'FIXED' in stderr - with open(f.name, 'rb') as f: - assert f.read().decode('utf-8') == 'this has an ASCII error' + assert "FIXED" in stderr + with open(f.name, "rb") as f: + assert f.read().decode("utf-8") == "this has an ASCII error" def _helper_test_case_handling_in_fixes(tmpdir, capsys, reason): d = str(tmpdir) - with open(op.join(d, 'dictionary.txt'), 'w') as f: + with open(op.join(d, "dictionary.txt"), "w") as f: if reason: - f.write('adoptor->adopter, adaptor, reason\n') + f.write("adoptor->adopter, adaptor, reason\n") else: - f.write('adoptor->adopter, adaptor,\n') + f.write("adoptor->adopter, adaptor,\n") dictionary_name = f.name # the mispelled word is entirely lowercase - with open(op.join(d, 'bad.txt'), 'w') as f: - f.write('early adoptor\n') - code, stdout, _ = cs.main('-D', dictionary_name, f.name, std=True) + with open(op.join(d, "bad.txt"), "w") as f: + f.write("early adoptor\n") + code, stdout, _ = cs.main("-D", dictionary_name, f.name, std=True) # all suggested fixes must be lowercase too - assert 'adopter, adaptor' in stdout + assert "adopter, adaptor" in stdout # the reason, if any, must not be modified if reason: - assert 'reason' in stdout + assert "reason" in stdout # the mispelled word is capitalized - with open(op.join(d, 'bad.txt'), 'w') as f: - f.write('Early Adoptor\n') - code, stdout, _ = cs.main('-D', dictionary_name, f.name, std=True) + with open(op.join(d, "bad.txt"), "w") as f: + f.write("Early Adoptor\n") + code, stdout, _ = cs.main("-D", dictionary_name, f.name, std=True) # all suggested fixes must be capitalized too - assert 'Adopter, Adaptor' in stdout + assert "Adopter, Adaptor" in stdout # the reason, if any, must not be modified if reason: - assert 'reason' in stdout + assert "reason" in stdout # the mispelled word is entirely uppercase - with open(op.join(d, 'bad.txt'), 'w') as f: - f.write('EARLY ADOPTOR\n') - code, stdout, _ = cs.main('-D', dictionary_name, f.name, std=True) + with open(op.join(d, "bad.txt"), "w") as f: + f.write("EARLY ADOPTOR\n") + code, stdout, _ = cs.main("-D", dictionary_name, f.name, std=True) # all suggested fixes must be uppercase too - assert 'ADOPTER, ADAPTOR' in stdout + assert "ADOPTER, ADAPTOR" in stdout # the reason, if any, must not be modified if reason: - assert 'reason' in stdout + assert "reason" in stdout # the mispelled word mixes lowercase and uppercase - with open(op.join(d, 'bad.txt'), 'w') as f: - f.write('EaRlY AdOpToR\n') - code, stdout, _ = cs.main('-D', dictionary_name, f.name, std=True) + with open(op.join(d, "bad.txt"), "w") as f: + f.write("EaRlY AdOpToR\n") + code, stdout, _ = cs.main("-D", dictionary_name, f.name, std=True) # all suggested fixes should be lowercase - assert 'adopter, adaptor' in stdout + assert "adopter, adaptor" in stdout # the reason, if any, must not be modified if reason: - assert 'reason' in stdout + assert "reason" in stdout def test_case_handling_in_fixes(tmpdir, capsys): @@ -516,67 +521,67 @@ def test_case_handling_in_fixes(tmpdir, capsys): def test_context(tmpdir, capsys): """Test context options.""" d = str(tmpdir) - with open(op.join(d, 'context.txt'), 'w') as f: - f.write('line 1\nline 2\nline 3 abandonned\nline 4\nline 5') + with open(op.join(d, "context.txt"), "w") as f: + f.write("line 1\nline 2\nline 3 abandonned\nline 4\nline 5") # symmetric context, fully within file - code, stdout, _ = cs.main('-C', '1', d, std=True) + code, stdout, _ = cs.main("-C", "1", d, std=True) assert code == 1 - lines = stdout.split('\n') + lines = stdout.split("\n") assert len(lines) == 5 - assert lines[0] == ': line 2' - assert lines[1] == '> line 3 abandonned' - assert lines[2] == ': line 4' + assert lines[0] == ": line 2" + assert lines[1] == "> line 3 abandonned" + assert lines[2] == ": line 4" # requested context is bigger than the file - code, stdout, _ = cs.main('-C', '10', d, std=True) + code, stdout, _ = cs.main("-C", "10", d, std=True) assert code == 1 - lines = stdout.split('\n') + lines = stdout.split("\n") assert len(lines) == 7 - assert lines[0] == ': line 1' - assert lines[1] == ': line 2' - assert lines[2] == '> line 3 abandonned' - assert lines[3] == ': line 4' - assert lines[4] == ': line 5' + assert lines[0] == ": line 1" + assert lines[1] == ": line 2" + assert lines[2] == "> line 3 abandonned" + assert lines[3] == ": line 4" + assert lines[4] == ": line 5" # only before context - code, stdout, _ = cs.main('-B', '2', d, std=True) + code, stdout, _ = cs.main("-B", "2", d, std=True) assert code == 1 - lines = stdout.split('\n') + lines = stdout.split("\n") assert len(lines) == 5 - assert lines[0] == ': line 1' - assert lines[1] == ': line 2' - assert lines[2] == '> line 3 abandonned' + assert lines[0] == ": line 1" + assert lines[1] == ": line 2" + assert lines[2] == "> line 3 abandonned" # only after context - code, stdout, _ = cs.main('-A', '1', d, std=True) + code, stdout, _ = cs.main("-A", "1", d, std=True) assert code == 1 - lines = stdout.split('\n') + lines = stdout.split("\n") assert len(lines) == 4 - assert lines[0] == '> line 3 abandonned' - assert lines[1] == ': line 4' + assert lines[0] == "> line 3 abandonned" + assert lines[1] == ": line 4" # asymmetric context - code, stdout, _ = cs.main('-B', '2', '-A', '1', d, std=True) + code, stdout, _ = cs.main("-B", "2", "-A", "1", d, std=True) assert code == 1 - lines = stdout.split('\n') + lines = stdout.split("\n") assert len(lines) == 6 - assert lines[0] == ': line 1' - assert lines[1] == ': line 2' - assert lines[2] == '> line 3 abandonned' - assert lines[3] == ': line 4' + assert lines[0] == ": line 1" + assert lines[1] == ": line 2" + assert lines[2] == "> line 3 abandonned" + assert lines[3] == ": line 4" # both '-C' and '-A' on the command line - code, _, stderr = cs.main('-C', '2', '-A', '1', d, std=True) + code, _, stderr = cs.main("-C", "2", "-A", "1", d, std=True) assert code == EX_USAGE - lines = stderr.split('\n') - assert 'ERROR' in lines[0] + lines = stderr.split("\n") + assert "ERROR" in lines[0] # both '-C' and '-B' on the command line - code, _, stderr = cs.main('-C', '2', '-B', '1', d, std=True) + code, _, stderr = cs.main("-C", "2", "-B", "1", d, std=True) assert code == EX_USAGE - lines = stderr.split('\n') - assert 'ERROR' in lines[0] + lines = stderr.split("\n") + assert "ERROR" in lines[0] def test_ignore_regex_option(tmpdir, capsys): @@ -584,32 +589,32 @@ def test_ignore_regex_option(tmpdir, capsys): d = str(tmpdir) # Invalid regex. - code, stdout, _ = cs.main('--ignore-regex=(', std=True) + code, stdout, _ = cs.main("--ignore-regex=(", std=True) assert code == EX_USAGE - assert 'usage:' in stdout + assert "usage:" in stdout - with open(op.join(d, 'flag.txt'), 'w') as f: - f.write('# Please see http://example.com/abandonned for info\n') + with open(op.join(d, "flag.txt"), "w") as f: + f.write("# Please see http://example.com/abandonned for info\n") # Test file has 1 invalid entry, and it's not ignored by default. assert cs.main(f.name) == 1 # An empty regex is the default value, and nothing is ignored. - assert cs.main(f.name, '--ignore-regex=') == 1 + assert cs.main(f.name, "--ignore-regex=") == 1 assert cs.main(f.name, '--ignore-regex=""') == 1 # Non-matching regex results in nothing being ignored. - assert cs.main(f.name, '--ignore-regex=^$') == 1 + assert cs.main(f.name, "--ignore-regex=^$") == 1 # A word can be ignored. - assert cs.main(f.name, '--ignore-regex=abandonned') == 0 + assert cs.main(f.name, "--ignore-regex=abandonned") == 0 # Ignoring part of the word can result in odd behavior. - assert cs.main(f.name, '--ignore-regex=nn') == 0 + assert cs.main(f.name, "--ignore-regex=nn") == 0 - with open(op.join(d, 'flag.txt'), 'w') as f: - f.write('abandonned donn\n') + with open(op.join(d, "flag.txt"), "w") as f: + f.write("abandonned donn\n") # Test file has 2 invalid entries. assert cs.main(f.name) == 2 # Ignoring donn breaks them both. - assert cs.main(f.name, '--ignore-regex=donn') == 0 + assert cs.main(f.name, "--ignore-regex=donn") == 0 # Adding word breaks causes only one to be ignored. - assert cs.main(f.name, r'--ignore-regex=\bdonn\b') == 1 + assert cs.main(f.name, r"--ignore-regex=\bdonn\b") == 1 def test_uri_regex_option(tmpdir, capsys): @@ -617,92 +622,98 @@ def test_uri_regex_option(tmpdir, capsys): d = str(tmpdir) # Invalid regex. - code, stdout, _ = cs.main('--uri-regex=(', std=True) + code, stdout, _ = cs.main("--uri-regex=(", std=True) assert code == EX_USAGE - assert 'usage:' in stdout + assert "usage:" in stdout - with open(op.join(d, 'flag.txt'), 'w') as f: - f.write('# Please see http://abandonned.com for info\n') + with open(op.join(d, "flag.txt"), "w") as f: + f.write("# Please see http://abandonned.com for info\n") # By default, the standard regex is used. assert cs.main(f.name) == 1 - assert cs.main(f.name, '--uri-ignore-words-list=abandonned') == 0 + assert cs.main(f.name, "--uri-ignore-words-list=abandonned") == 0 # If empty, nothing matches. - assert cs.main(f.name, '--uri-regex=', - '--uri-ignore-words-list=abandonned') == 0 + assert cs.main(f.name, "--uri-regex=", "--uri-ignore-words-list=abandonned") == 0 # Can manually match urls. - assert cs.main(f.name, '--uri-regex=\\bhttp.*\\b', - '--uri-ignore-words-list=abandonned') == 0 + assert ( + cs.main( + f.name, "--uri-regex=\\bhttp.*\\b", "--uri-ignore-words-list=abandonned" + ) + == 0 + ) # Can also match arbitrary content. - with open(op.join(d, 'flag.txt'), 'w') as f: - f.write('abandonned') + with open(op.join(d, "flag.txt"), "w") as f: + f.write("abandonned") assert cs.main(f.name) == 1 - assert cs.main(f.name, '--uri-ignore-words-list=abandonned') == 1 - assert cs.main(f.name, '--uri-regex=.*') == 1 - assert cs.main(f.name, '--uri-regex=.*', - '--uri-ignore-words-list=abandonned') == 0 + assert cs.main(f.name, "--uri-ignore-words-list=abandonned") == 1 + assert cs.main(f.name, "--uri-regex=.*") == 1 + assert cs.main(f.name, "--uri-regex=.*", "--uri-ignore-words-list=abandonned") == 0 def test_uri_ignore_words_list_option_uri(tmpdir, capsys): """Test ignore regex option functionality.""" d = str(tmpdir) - with open(op.join(d, 'flag.txt'), 'w') as f: - f.write('# Please see http://example.com/abandonned for info\n') + with open(op.join(d, "flag.txt"), "w") as f: + f.write("# Please see http://example.com/abandonned for info\n") # Test file has 1 invalid entry, and it's not ignored by default. assert cs.main(f.name) == 1 # An empty list is the default value, and nothing is ignored. - assert cs.main(f.name, '--uri-ignore-words-list=') == 1 + assert cs.main(f.name, "--uri-ignore-words-list=") == 1 # Non-matching regex results in nothing being ignored. - assert cs.main(f.name, '--uri-ignore-words-list=foo,example') == 1 + assert cs.main(f.name, "--uri-ignore-words-list=foo,example") == 1 # A word can be ignored. - assert cs.main(f.name, '--uri-ignore-words-list=abandonned') == 0 - assert cs.main(f.name, '--uri-ignore-words-list=foo,abandonned,bar') == 0 - assert cs.main(f.name, '--uri-ignore-words-list=*') == 0 + assert cs.main(f.name, "--uri-ignore-words-list=abandonned") == 0 + assert cs.main(f.name, "--uri-ignore-words-list=foo,abandonned,bar") == 0 + assert cs.main(f.name, "--uri-ignore-words-list=*") == 0 # The match must be for the complete word. - assert cs.main(f.name, '--uri-ignore-words-list=abandonn') == 1 + assert cs.main(f.name, "--uri-ignore-words-list=abandonn") == 1 - with open(op.join(d, 'flag.txt'), 'w') as f: - f.write('abandonned http://example.com/abandonned\n') + with open(op.join(d, "flag.txt"), "w") as f: + f.write("abandonned http://example.com/abandonned\n") # Test file has 2 invalid entries. assert cs.main(f.name) == 2 # Ignoring the value in the URI won't ignore the word completely. - assert cs.main(f.name, '--uri-ignore-words-list=abandonned') == 1 - assert cs.main(f.name, '--uri-ignore-words-list=*') == 1 + assert cs.main(f.name, "--uri-ignore-words-list=abandonned") == 1 + assert cs.main(f.name, "--uri-ignore-words-list=*") == 1 # The regular --ignore-words-list will ignore both. - assert cs.main(f.name, '--ignore-words-list=abandonned') == 0 + assert cs.main(f.name, "--ignore-words-list=abandonned") == 0 - variation_option = '--uri-ignore-words-list=abandonned' + variation_option = "--uri-ignore-words-list=abandonned" # Variations where an error is ignored. - for variation in ('# Please see http://abandonned for info\n', - '# Please see "http://abandonned" for info\n', - # This variation could be un-ignored, but it'd require a - # more complex regex as " is valid in parts of URIs. - '# Please see "http://foo"abandonned for info\n', - '# Please see https://abandonned for info\n', - '# Please see ftp://abandonned for info\n', - '# Please see http://example/abandonned for info\n', - '# Please see http://example.com/abandonned for info\n', - '# Please see http://exam.com/ple#abandonned for info\n', - '# Please see http://exam.com/ple?abandonned for info\n', - '# Please see http://127.0.0.1/abandonned for info\n', - '# Please see http://[2001:0db8:85a3:0000:0000:8a2e:0370' - ':7334]/abandonned for info\n'): - with open(op.join(d, 'flag.txt'), 'w') as f: + for variation in ( + "# Please see http://abandonned for info\n", + '# Please see "http://abandonned" for info\n', + # This variation could be un-ignored, but it'd require a + # more complex regex as " is valid in parts of URIs. + '# Please see "http://foo"abandonned for info\n', + "# Please see https://abandonned for info\n", + "# Please see ftp://abandonned for info\n", + "# Please see http://example/abandonned for info\n", + "# Please see http://example.com/abandonned for info\n", + "# Please see http://exam.com/ple#abandonned for info\n", + "# Please see http://exam.com/ple?abandonned for info\n", + "# Please see http://127.0.0.1/abandonned for info\n", + "# Please see http://[2001:0db8:85a3:0000:0000:8a2e:0370" + ":7334]/abandonned for info\n", + ): + with open(op.join(d, "flag.txt"), "w") as f: f.write(variation) assert cs.main(f.name) == 1, variation assert cs.main(f.name, variation_option) == 0, variation # Variations where no error is ignored. - for variation in ('# Please see abandonned/ for info\n', - '# Please see http:abandonned for info\n', - '# Please see foo/abandonned for info\n', - '# Please see http://foo abandonned for info\n'): - with open(op.join(d, 'flag.txt'), 'w') as f: + for variation in ( + "# Please see abandonned/ for info\n", + "# Please see http:abandonned for info\n", + "# Please see foo/abandonned for info\n", + "# Please see http://foo abandonned for info\n", + ): + with open(op.join(d, "flag.txt"), "w") as f: f.write(variation) assert cs.main(f.name) == 1, variation assert cs.main(f.name, variation_option) == 1, variation @@ -712,50 +723,52 @@ def test_uri_ignore_words_list_option_email(tmpdir, capsys): """Test ignore regex option functionality.""" d = str(tmpdir) - with open(op.join(d, 'flag.txt'), 'w') as f: - f.write('# Please see example@abandonned.com for info\n') + with open(op.join(d, "flag.txt"), "w") as f: + f.write("# Please see example@abandonned.com for info\n") # Test file has 1 invalid entry, and it's not ignored by default. assert cs.main(f.name) == 1 # An empty list is the default value, and nothing is ignored. - assert cs.main(f.name, '--uri-ignore-words-list=') == 1 + assert cs.main(f.name, "--uri-ignore-words-list=") == 1 # Non-matching regex results in nothing being ignored. - assert cs.main(f.name, '--uri-ignore-words-list=foo,example') == 1 + assert cs.main(f.name, "--uri-ignore-words-list=foo,example") == 1 # A word can be ignored. - assert cs.main(f.name, '--uri-ignore-words-list=abandonned') == 0 - assert cs.main(f.name, '--uri-ignore-words-list=foo,abandonned,bar') == 0 - assert cs.main(f.name, '--uri-ignore-words-list=*') == 0 + assert cs.main(f.name, "--uri-ignore-words-list=abandonned") == 0 + assert cs.main(f.name, "--uri-ignore-words-list=foo,abandonned,bar") == 0 + assert cs.main(f.name, "--uri-ignore-words-list=*") == 0 # The match must be for the complete word. - assert cs.main(f.name, '--uri-ignore-words-list=abandonn') == 1 + assert cs.main(f.name, "--uri-ignore-words-list=abandonn") == 1 - with open(op.join(d, 'flag.txt'), 'w') as f: - f.write('abandonned example@abandonned.com\n') + with open(op.join(d, "flag.txt"), "w") as f: + f.write("abandonned example@abandonned.com\n") # Test file has 2 invalid entries. assert cs.main(f.name) == 2 # Ignoring the value in the URI won't ignore the word completely. - assert cs.main(f.name, '--uri-ignore-words-list=abandonned') == 1 - assert cs.main(f.name, '--uri-ignore-words-list=*') == 1 + assert cs.main(f.name, "--uri-ignore-words-list=abandonned") == 1 + assert cs.main(f.name, "--uri-ignore-words-list=*") == 1 # The regular --ignore-words-list will ignore both. - assert cs.main(f.name, '--ignore-words-list=abandonned') == 0 + assert cs.main(f.name, "--ignore-words-list=abandonned") == 0 - variation_option = '--uri-ignore-words-list=abandonned' + variation_option = "--uri-ignore-words-list=abandonned" # Variations where an error is ignored. - for variation in ('# Please see example@abandonned for info\n', - '# Please see abandonned@example for info\n', - '# Please see abandonned@example.com for info\n', - '# Please see mailto:abandonned@example.com?subject=Test' - ' for info\n'): - with open(op.join(d, 'flag.txt'), 'w') as f: + for variation in ( + "# Please see example@abandonned for info\n", + "# Please see abandonned@example for info\n", + "# Please see abandonned@example.com for info\n", + "# Please see mailto:abandonned@example.com?subject=Test for info\n", + ): + with open(op.join(d, "flag.txt"), "w") as f: f.write(variation) assert cs.main(f.name) == 1, variation assert cs.main(f.name, variation_option) == 0, variation # Variations where no error is ignored. - for variation in ('# Please see example @ abandonned for info\n', - '# Please see abandonned@ example for info\n', - '# Please see mailto:foo@example.com?subject=Test' - ' abandonned for info\n'): - with open(op.join(d, 'flag.txt'), 'w') as f: + for variation in ( + "# Please see example @ abandonned for info\n", + "# Please see abandonned@ example for info\n", + "# Please see mailto:foo@example.com?subject=Test abandonned for info\n", + ): + with open(op.join(d, "flag.txt"), "w") as f: f.write(variation) assert cs.main(f.name) == 1, variation assert cs.main(f.name, variation_option) == 1, variation @@ -766,84 +779,84 @@ def test_uri_regex_def(): # Tests based on https://mathiasbynens.be/demo/url-regex true_positives = ( - 'http://foo.com/blah_blah', - 'http://foo.com/blah_blah/', - 'http://foo.com/blah_blah_(wikipedia)', - 'http://foo.com/blah_blah_(wikipedia)_(again)', - 'http://www.example.com/wpstyle/?p=364', - 'https://www.example.com/foo/?bar=baz&inga=42&quux', - 'http://✪df.ws/123', - 'http://userid:password@example.com:8080', - 'http://userid:password@example.com:8080/', - 'http://userid@example.com', - 'http://userid@example.com/', - 'http://userid@example.com:8080', - 'http://userid@example.com:8080/', - 'http://userid:password@example.com', - 'http://userid:password@example.com/', - 'http://142.42.1.1/', - 'http://142.42.1.1:8080/', - 'http://➡.ws/䨹', - 'http://⌘.ws', - 'http://⌘.ws/', - 'http://foo.com/blah_(wikipedia)#cite-1', - 'http://foo.com/blah_(wikipedia)_blah#cite-1', - 'http://foo.com/unicode_(✪)_in_parens', - 'http://foo.com/(something)?after=parens', - 'http://☺.damowmow.com/', - 'http://code.google.com/events/#&product=browser', - 'http://j.mp', - 'ftp://foo.bar/baz', - 'http://foo.bar/?q=Test%20URL-encoded%20stuff', - 'http://مثال.إختبار', - 'http://例子.测试', - 'http://उदाहरण.परीक्षा', + "http://foo.com/blah_blah", + "http://foo.com/blah_blah/", + "http://foo.com/blah_blah_(wikipedia)", + "http://foo.com/blah_blah_(wikipedia)_(again)", + "http://www.example.com/wpstyle/?p=364", + "https://www.example.com/foo/?bar=baz&inga=42&quux", + "http://✪df.ws/123", + "http://userid:password@example.com:8080", + "http://userid:password@example.com:8080/", + "http://userid@example.com", + "http://userid@example.com/", + "http://userid@example.com:8080", + "http://userid@example.com:8080/", + "http://userid:password@example.com", + "http://userid:password@example.com/", + "http://142.42.1.1/", + "http://142.42.1.1:8080/", + "http://➡.ws/䨹", + "http://⌘.ws", + "http://⌘.ws/", + "http://foo.com/blah_(wikipedia)#cite-1", + "http://foo.com/blah_(wikipedia)_blah#cite-1", + "http://foo.com/unicode_(✪)_in_parens", + "http://foo.com/(something)?after=parens", + "http://☺.damowmow.com/", + "http://code.google.com/events/#&product=browser", + "http://j.mp", + "ftp://foo.bar/baz", + "http://foo.bar/?q=Test%20URL-encoded%20stuff", + "http://مثال.إختبار", + "http://例子.测试", + "http://उदाहरण.परीक्षा", "http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com", - 'http://1337.net', - 'http://a.b-c.de', - 'http://223.255.255.254', + "http://1337.net", + "http://a.b-c.de", + "http://223.255.255.254", ) true_negatives = ( - 'http://', - '//', - '//a', - '///a', - '///', - 'foo.com', - 'rdar://1234', - 'h://test', - '://should.fail', - 'ftps://foo.bar/', + "http://", + "//", + "//a", + "///a", + "///", + "foo.com", + "rdar://1234", + "h://test", + "://should.fail", + "ftps://foo.bar/", ) false_positives = ( - 'http://.', - 'http://..', - 'http://../', - 'http://?', - 'http://??', - 'http://??/', - 'http://#', - 'http://##', - 'http://##/', - 'http:///a', - 'http://-error-.invalid/', - 'http://a.b--c.de/', - 'http://-a.b.co', - 'http://a.b-.co', - 'http://0.0.0.0', - 'http://10.1.1.0', - 'http://10.1.1.255', - 'http://224.1.1.1', - 'http://1.1.1.1.1', - 'http://123.123.123', - 'http://3628126748', - 'http://.www.foo.bar/', - 'http://www.foo.bar./', - 'http://.www.foo.bar./', - 'http://10.1.1.1', + "http://.", + "http://..", + "http://../", + "http://?", + "http://??", + "http://??/", + "http://#", + "http://##", + "http://##/", + "http:///a", + "http://-error-.invalid/", + "http://a.b--c.de/", + "http://-a.b.co", + "http://a.b-.co", + "http://0.0.0.0", + "http://10.1.1.0", + "http://10.1.1.255", + "http://224.1.1.1", + "http://1.1.1.1.1", + "http://123.123.123", + "http://3628126748", + "http://.www.foo.bar/", + "http://www.foo.bar./", + "http://.www.foo.bar./", + "http://10.1.1.1", ) - boilerplate = 'Surrounding text %s more text' + boilerplate = "Surrounding text %s more text" for uri in true_positives + false_positives: assert uri_regex.findall(uri) == [uri], uri @@ -854,47 +867,51 @@ def test_uri_regex_def(): assert not uri_regex.findall(boilerplate % uri), uri -@pytest.mark.parametrize('kind', ('toml', 'cfg')) +@pytest.mark.parametrize("kind", ("toml", "cfg")) def test_config_toml(tmp_path, capsys, kind): """Test loading options from a config file or toml.""" - d = tmp_path / 'files' + d = tmp_path / "files" d.mkdir() - with open(d / 'bad.txt', 'w') as f: - f.write('abandonned donn\n') - with open(d / 'good.txt', 'w') as f: + with open(d / "bad.txt", "w") as f: + f.write("abandonned donn\n") + with open(d / "good.txt", "w") as f: f.write("good") # Should fail when checking both. code, stdout, _ = cs.main(str(d), count=True, std=True) # Code in this case is not exit code, but count of misspellings. assert code == 2 - assert 'bad.txt' in stdout - - if kind == 'cfg': - conffile = str(tmp_path / 'setup.cfg') - args = ('--config', conffile) - with open(conffile, 'w') as f: - f.write("""\ + assert "bad.txt" in stdout + + if kind == "cfg": + conffile = str(tmp_path / "setup.cfg") + args = ("--config", conffile) + with open(conffile, "w") as f: + f.write( + """\ [codespell] skip = bad.txt, whatever.txt count = -""") +""" + ) else: - assert kind == 'toml' - pytest.importorskip('tomli') - tomlfile = str(tmp_path / 'pyproject.toml') - args = ('--toml', tomlfile) - with open(tomlfile, 'w') as f: - f.write("""\ + assert kind == "toml" + pytest.importorskip("tomli") + tomlfile = str(tmp_path / "pyproject.toml") + args = ("--toml", tomlfile) + with open(tomlfile, "w") as f: + f.write( + """\ [tool.codespell] skip = 'bad.txt,whatever.txt' count = false -""") +""" + ) # Should pass when skipping bad.txt code, stdout, _ = cs.main(str(d), *args, count=True, std=True) assert code == 0 - assert 'bad.txt' not in stdout + assert "bad.txt" not in stdout # And both should automatically work if they're in cwd cwd = os.getcwd() @@ -904,7 +921,7 @@ def test_config_toml(tmp_path, capsys, kind): finally: os.chdir(cwd) assert code == 0 - assert 'bad.txt' not in stdout + assert "bad.txt" not in stdout @contextlib.contextmanager diff --git a/codespell_lib/tests/test_dictionary.py b/codespell_lib/tests/test_dictionary.py index 745b4ab72cf..73072c7f104 100644 --- a/codespell_lib/tests/test_dictionary.py +++ b/codespell_lib/tests/test_dictionary.py @@ -12,35 +12,39 @@ try: import aspell + for lang in supported_languages: - spellers[lang] = aspell.Speller('lang', lang) + spellers[lang] = aspell.Speller("lang", lang) except Exception as exp: # probably ImportError, but maybe also language - if os.getenv('REQUIRE_ASPELL', 'false').lower() == 'true': + if os.getenv("REQUIRE_ASPELL", "false").lower() == "true": raise RuntimeError( - 'Cannot run complete tests without aspell when ' - 'REQUIRE_ASPELL=true. Got error during import:\n%s' - % (exp,)) + "Cannot run complete tests without aspell when " + "REQUIRE_ASPELL=true. Got error during import:\n%s" % (exp,) + ) else: warnings.warn( - 'aspell not found, but not required, skipping aspell tests. Got ' - 'error during import:\n%s' % (exp,)) + "aspell not found, but not required, skipping aspell tests. Got " + "error during import:\n%s" % (exp,) + ) global_err_dicts = {} global_pairs = set() # Filename, should be seen as errors in aspell or not -_data_dir = op.join(op.dirname(__file__), '..', 'data') +_data_dir = op.join(op.dirname(__file__), "..", "data") _fnames_in_aspell = [ - (op.join(_data_dir, 'dictionary%s.txt' % d[2]), d[3:5], d[5:7]) - for d in _builtin_dictionaries] -fname_params = pytest.mark.parametrize('fname, in_aspell, in_dictionary', _fnames_in_aspell) # noqa: E501 + (op.join(_data_dir, "dictionary%s.txt" % d[2]), d[3:5], d[5:7]) + for d in _builtin_dictionaries +] +fname_params = pytest.mark.parametrize( + "fname, in_aspell, in_dictionary", _fnames_in_aspell +) # noqa: E501 def test_dictionaries_exist(): """Test consistency of dictionaries.""" doc_fnames = {op.basename(f[0]) for f in _fnames_in_aspell} - got_fnames = {op.basename(f) - for f in glob.glob(op.join(_data_dir, '*.txt'))} + got_fnames = {op.basename(f) for f in glob.glob(op.join(_data_dir, "*.txt"))} assert doc_fnames == got_fnames @@ -48,17 +52,17 @@ def test_dictionaries_exist(): def test_dictionary_formatting(fname, in_aspell, in_dictionary): """Test that all dictionary entries are valid.""" errors = [] - with open(fname, 'rb') as fid: + with open(fname, "rb") as fid: for line in fid: - err, rep = line.decode('utf-8').split('->') + err, rep = line.decode("utf-8").split("->") err = err.lower() - rep = rep.rstrip('\n') + rep = rep.rstrip("\n") try: _check_err_rep(err, rep, in_aspell, fname, in_dictionary) except AssertionError as exp: - errors.append(str(exp).split('\n')[0]) + errors.append(str(exp).split("\n")[0]) if errors: - raise AssertionError('\n' + '\n'.join(errors)) + raise AssertionError("\n" + "\n".join(errors)) def _check_aspell(phrase, msg, in_aspell, fname, languages): @@ -66,186 +70,217 @@ def _check_aspell(phrase, msg, in_aspell, fname, languages): return # cannot check if in_aspell is None: return # don't check - if ' ' in phrase: + if " " in phrase: for word in phrase.split(): _check_aspell(word, msg, in_aspell, fname, languages) return # stop normal checking as we've done each word above - this_in_aspell = any(spellers[lang].check(phrase.encode( - spellers[lang].ConfigKeys()['encoding'][1])) for lang in languages) - end = 'be in aspell dictionaries (%s) for dictionary %s' % ( - ', '.join(languages), fname) + this_in_aspell = any( + spellers[lang].check(phrase.encode(spellers[lang].ConfigKeys()["encoding"][1])) + for lang in languages + ) + end = "be in aspell dictionaries (%s) for dictionary %s" % ( + ", ".join(languages), + fname, + ) if in_aspell: # should be an error in aspell - assert this_in_aspell, '%s should %s' % (msg, end) + assert this_in_aspell, "%s should %s" % (msg, end) else: # shouldn't be - assert not this_in_aspell, '%s should not %s' % (msg, end) + assert not this_in_aspell, "%s should not %s" % (msg, end) -whitespace = re.compile(r'\s') -start_whitespace = re.compile(r'^\s') -start_comma = re.compile(r'^,') -whitespace_comma = re.compile(r'\s,') -comma_whitespaces = re.compile(r',\s\s') -comma_without_space = re.compile(r',[^ ]') -whitespace_end = re.compile(r'\s+$') -single_comma = re.compile(r'^[^,]*,\s*$') +whitespace = re.compile(r"\s") +start_whitespace = re.compile(r"^\s") +start_comma = re.compile(r"^,") +whitespace_comma = re.compile(r"\s,") +comma_whitespaces = re.compile(r",\s\s") +comma_without_space = re.compile(r",[^ ]") +whitespace_end = re.compile(r"\s+$") +single_comma = re.compile(r"^[^,]*,\s*$") def _check_err_rep(err, rep, in_aspell, fname, languages): - assert whitespace.search(err) is None, 'error %r has whitespace' % err - assert ',' not in err, 'error %r has a comma' % err - assert len(rep) > 0, ('error %s: correction %r must be non-empty' - % (err, rep)) - assert not start_whitespace.match(rep), ('error %s: correction %r ' - 'cannot start with whitespace' - % (err, rep)) - _check_aspell(err, 'error %r' % (err,), in_aspell[0], fname, languages[0]) - prefix = 'error %s: correction %r' % (err, rep) + assert whitespace.search(err) is None, "error %r has whitespace" % err + assert "," not in err, "error %r has a comma" % err + assert len(rep) > 0, "error %s: correction %r must be non-empty" % (err, rep) + assert not start_whitespace.match( + rep + ), "error %s: correction %r cannot start with whitespace" % (err, rep) + _check_aspell(err, "error %r" % (err,), in_aspell[0], fname, languages[0]) + prefix = "error %s: correction %r" % (err, rep) for (r, msg) in [ - (start_comma, - '%s starts with a comma'), - (whitespace_comma, - '%s contains a whitespace character followed by a comma'), - (comma_whitespaces, - '%s contains a comma followed by multiple whitespace characters'), - (comma_without_space, - '%s contains a comma *not* followed by a space'), - (whitespace_end, - '%s has a trailing space'), - (single_comma, - '%s has a single entry but contains a trailing comma')]: - assert not r.search(rep), (msg % (prefix,)) + (start_comma, "%s starts with a comma"), + (whitespace_comma, "%s contains a whitespace character followed by a comma"), + ( + comma_whitespaces, + "%s contains a comma followed by multiple whitespace characters", + ), + (comma_without_space, "%s contains a comma *not* followed by a space"), + (whitespace_end, "%s has a trailing space"), + (single_comma, "%s has a single entry but contains a trailing comma"), + ]: + assert not r.search(rep), msg % (prefix,) del msg - if rep.count(','): - assert rep.endswith(','), ('error %s: multiple corrections must end ' - 'with trailing ","' % (err,)) - reps = [r.strip() for r in rep.split(',')] + if rep.count(","): + assert rep.endswith( + "," + ), "error %s: multiple corrections must end " 'with trailing ","' % (err,) + reps = [r.strip() for r in rep.split(",")] reps = [r for r in reps if len(r)] for r in reps: - assert err != r.lower(), ('error %r corrects to itself amongst others' - % (err,)) + assert err != r.lower(), "error %r corrects to itself amongst others" % (err,) _check_aspell( - r, 'error %s: correction %r' % (err, r), - in_aspell[1], fname, languages[1]) + r, "error %s: correction %r" % (err, r), in_aspell[1], fname, languages[1] + ) # aspell dictionary is case sensitive, so pass the original case into there # we could ignore the case, but that would miss things like days of the # week which we want to be correct reps = [r.lower() for r in reps] - assert len(set(reps)) == len(reps), ('error %s: corrections "%s" are not ' - '(lower-case) unique' % (err, rep)) + assert len(set(reps)) == len( + reps + ), 'error %s: corrections "%s" are not ' "(lower-case) unique" % (err, rep) -@pytest.mark.parametrize('err, rep, match', [ - ('a a', 'bar', 'has whitespace'), - ('a,a', 'bar', 'has a comma'), - ('a', '', 'non-empty'), - ('a', ' bar', 'start with whitespace'), - ('a', ',bar', 'starts with a comma'), - ('a', 'bar,bat', '.*not.*followed by a space'), - ('a', 'bar ', 'trailing space'), - ('a', 'b ,ar', 'contains a whitespace.*followed by a comma'), - ('a', 'bar,', 'single entry.*comma'), - ('a', 'bar, bat', 'must end with trailing ","'), - ('a', 'a, bar,', 'corrects to itself amongst others'), - ('a', 'a', 'corrects to itself'), - ('a', 'bar, Bar,', 'unique'), -]) +@pytest.mark.parametrize( + "err, rep, match", + [ + ("a a", "bar", "has whitespace"), + ("a,a", "bar", "has a comma"), + ("a", "", "non-empty"), + ("a", " bar", "start with whitespace"), + ("a", ",bar", "starts with a comma"), + ("a", "bar,bat", ".*not.*followed by a space"), + ("a", "bar ", "trailing space"), + ("a", "b ,ar", "contains a whitespace.*followed by a comma"), + ("a", "bar,", "single entry.*comma"), + ("a", "bar, bat", 'must end with trailing ","'), + ("a", "a, bar,", "corrects to itself amongst others"), + ("a", "a", "corrects to itself"), + ("a", "bar, Bar,", "unique"), + ], +) def test_error_checking(err, rep, match): """Test that our error checking works.""" with pytest.raises(AssertionError, match=match): - _check_err_rep(err, rep, (None, None), 'dummy', - (supported_languages, supported_languages)) + _check_err_rep( + err, rep, (None, None), "dummy", (supported_languages, supported_languages) + ) -@pytest.mark.skipif(not spellers, reason='requires aspell-en') -@pytest.mark.parametrize('err, rep, err_aspell, rep_aspell, match', [ - # This doesn't raise any exceptions, so skip for now: - # pytest.param('a', 'uvw, bar,', None, None, 'should be in aspell'), - ('abcdef', 'uvwxyz, bar,', True, None, 'should be in aspell'), - ('a', 'uvwxyz, bar,', False, None, 'should not be in aspell'), - ('a', 'abcdef, uvwxyz,', None, True, 'should be in aspell'), - ('abcdef', 'uvwxyz, bar,', True, True, 'should be in aspell'), - ('abcdef', 'uvwxyz, bar,', False, True, 'should be in aspell'), - ('a', 'bar, back,', None, False, 'should not be in aspell'), - ('a', 'bar, back, Wednesday,', None, False, 'should not be in aspell'), - ('abcdef', 'ghijkl, uvwxyz,', True, False, 'should be in aspell'), - ('abcdef', 'uvwxyz, bar,', False, False, 'should not be in aspell'), - # Multi-word corrections - # One multi-word, both parts - ('a', 'abcdef uvwxyz', None, True, 'should be in aspell'), - ('a', 'bar back', None, False, 'should not be in aspell'), - ('a', 'bar back Wednesday', None, False, 'should not be in aspell'), - # Second multi-word, both parts - ('a', 'bar back, abcdef uvwxyz, bar,', None, True, 'should be in aspell'), - ('a', 'abcdef uvwxyz, bar back, ghijkl,', None, False, 'should not be in aspell'), # noqa: E501 - # One multi-word, second part - ('a', 'bar abcdef', None, True, 'should be in aspell'), - ('a', 'abcdef back', None, False, 'should not be in aspell'), -]) +@pytest.mark.skipif(not spellers, reason="requires aspell-en") +@pytest.mark.parametrize( + "err, rep, err_aspell, rep_aspell, match", + [ + # This doesn't raise any exceptions, so skip for now: + # pytest.param('a', 'uvw, bar,', None, None, 'should be in aspell'), + ("abcdef", "uvwxyz, bar,", True, None, "should be in aspell"), + ("a", "uvwxyz, bar,", False, None, "should not be in aspell"), + ("a", "abcdef, uvwxyz,", None, True, "should be in aspell"), + ("abcdef", "uvwxyz, bar,", True, True, "should be in aspell"), + ("abcdef", "uvwxyz, bar,", False, True, "should be in aspell"), + ("a", "bar, back,", None, False, "should not be in aspell"), + ("a", "bar, back, Wednesday,", None, False, "should not be in aspell"), + ("abcdef", "ghijkl, uvwxyz,", True, False, "should be in aspell"), + ("abcdef", "uvwxyz, bar,", False, False, "should not be in aspell"), + # Multi-word corrections + # One multi-word, both parts + ("a", "abcdef uvwxyz", None, True, "should be in aspell"), + ("a", "bar back", None, False, "should not be in aspell"), + ("a", "bar back Wednesday", None, False, "should not be in aspell"), + # Second multi-word, both parts + ("a", "bar back, abcdef uvwxyz, bar,", None, True, "should be in aspell"), + ( + "a", + "abcdef uvwxyz, bar back, ghijkl,", + None, + False, + "should not be in aspell", + ), # noqa: E501 + # One multi-word, second part + ("a", "bar abcdef", None, True, "should be in aspell"), + ("a", "abcdef back", None, False, "should not be in aspell"), + ], +) def test_error_checking_in_aspell(err, rep, err_aspell, rep_aspell, match): """Test that our error checking works with aspell.""" with pytest.raises(AssertionError, match=match): _check_err_rep( - err, rep, (err_aspell, rep_aspell), 'dummy', - (supported_languages, supported_languages)) + err, + rep, + (err_aspell, rep_aspell), + "dummy", + (supported_languages, supported_languages), + ) # allow some duplicates, like "m-i-n-i-m-i-s-e", or "c-a-l-c-u-l-a-t-a-b-l-e" # correction in left can appear as typo in right allowed_dups = { - ('dictionary.txt', 'dictionary_code.txt'), - ('dictionary.txt', 'dictionary_en-GB_to_en-US.txt'), - ('dictionary.txt', 'dictionary_names.txt'), - ('dictionary.txt', 'dictionary_rare.txt'), - ('dictionary.txt', 'dictionary_usage.txt'), - ('dictionary_code.txt', 'dictionary_rare.txt'), - ('dictionary_rare.txt', 'dictionary_usage.txt'), + ("dictionary.txt", "dictionary_code.txt"), + ("dictionary.txt", "dictionary_en-GB_to_en-US.txt"), + ("dictionary.txt", "dictionary_names.txt"), + ("dictionary.txt", "dictionary_rare.txt"), + ("dictionary.txt", "dictionary_usage.txt"), + ("dictionary_code.txt", "dictionary_rare.txt"), + ("dictionary_rare.txt", "dictionary_usage.txt"), } @fname_params -@pytest.mark.dependency(name='dictionary loop') +@pytest.mark.dependency(name="dictionary loop") def test_dictionary_looping(fname, in_aspell, in_dictionary): """Test that all dictionary entries are valid.""" this_err_dict = {} short_fname = op.basename(fname) - with open(fname, 'rb') as fid: + with open(fname, "rb") as fid: for line in fid: - err, rep = line.decode('utf-8').split('->') + err, rep = line.decode("utf-8").split("->") err = err.lower() - assert err not in this_err_dict, \ - 'error %r already exists in %s' % (err, short_fname) - rep = rep.rstrip('\n') - reps = [r.strip() for r in rep.lower().split(',')] + assert err not in this_err_dict, "error %r already exists in %s" % ( + err, + short_fname, + ) + rep = rep.rstrip("\n") + reps = [r.strip() for r in rep.lower().split(",")] reps = [r for r in reps if len(r)] this_err_dict[err] = reps # 1. check the dict against itself (diagonal) for err in this_err_dict: for r in this_err_dict[err]: - assert r not in this_err_dict, \ - ('error %s: correction %s is an error itself in the same ' - 'dictionary file %s' % (err, r, short_fname)) + assert r not in this_err_dict, ( + "error %s: correction %s is an error itself in the same " + "dictionary file %s" % (err, r, short_fname) + ) pair = (short_fname, short_fname) assert pair not in global_pairs global_pairs.add(pair) for other_fname, other_err_dict in global_err_dicts.items(): # error duplication (eventually maybe we should just merge?) for err in this_err_dict: - assert err not in other_err_dict, \ - ('error %r in dictionary %s already exists in dictionary ' - '%s' % (err, short_fname, other_fname)) + assert ( + err not in other_err_dict + ), "error %r in dictionary %s already exists in dictionary %s" % ( + err, + short_fname, + other_fname, + ) # 2. check corrections in this dict against other dicts (upper) pair = (short_fname, other_fname) if pair not in allowed_dups: for err in this_err_dict: - assert err not in other_err_dict, \ - ('error %r in dictionary %s already exists in dictionary ' - '%s' % (err, short_fname, other_fname)) + assert ( + err not in other_err_dict + ), "error %r in dictionary %s already exists in dictionary %s" % ( + err, + short_fname, + other_fname, + ) for r in this_err_dict[err]: - assert r not in other_err_dict, \ - ('error %s: correction %s from dictionary %s is an ' - 'error itself in dictionary %s' - % (err, r, short_fname, other_fname)) + assert r not in other_err_dict, ( + "error %s: correction %s from dictionary %s is an " + "error itself in dictionary %s" + % (err, r, short_fname, other_fname) + ) assert pair not in global_pairs global_pairs.add(pair) # 3. check corrections in other dicts against this dict (lower) @@ -253,16 +288,17 @@ def test_dictionary_looping(fname, in_aspell, in_dictionary): if pair not in allowed_dups: for err in other_err_dict: for r in other_err_dict[err]: - assert r not in this_err_dict, \ - ('error %s: correction %s from dictionary %s is an ' - 'error itself in dictionary %s' - % (err, r, other_fname, short_fname)) + assert r not in this_err_dict, ( + "error %s: correction %s from dictionary %s is an " + "error itself in dictionary %s" + % (err, r, other_fname, short_fname) + ) assert pair not in global_pairs global_pairs.add(pair) global_err_dicts[short_fname] = this_err_dict -@pytest.mark.dependency(depends=['dictionary loop']) +@pytest.mark.dependency(depends=["dictionary loop"]) def test_ran_all(): """Test that all pairwise tests ran.""" for f1, _, _ in _fnames_in_aspell: diff --git a/pyproject.toml b/pyproject.toml index 997e91dc2f6..543b96041f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,5 +67,8 @@ codespell_lib = [ [tool.check-manifest] ignore = ["codespell_lib/_version.py"] +[tool.isort] +profile = "black" + [tool.pytest.ini_options] addopts = "--cov=codespell_lib -rs --cov-report= --tb=short --junit-xml=junit-results.xml" diff --git a/setup.cfg b/setup.cfg index ae7e595a3bb..8dd399ab55b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,3 @@ [flake8] -exclude = build, ci-helpers -ignore = W503 +max-line-length = 88 +extend-ignore = E203