diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py index f52b840c07..83ebaa4e01 100755 --- a/codespell_lib/_codespell.py +++ b/codespell_lib/_codespell.py @@ -273,6 +273,13 @@ def parse_options(args): 'to include (when "-D -" or no "-D" is passed). ' 'Current options are:' + builtin_opts + '\n' 'The default is %(default)r.') + parser.add_argument('--ignore-regex', + action='store', type=str, + help='regular expression which is used to find ' + 'patterns to ignore by treating as whitespace. ' + 'When writing regexes, consider ensuring there ' + 'are boundary non-word chars, e.g., ' + '"\\Wmatch\\W". Defaults to empty/disabled.') parser.add_argument('-I', '--ignore-words', action='append', metavar='FILE', help='file that contains words which will be ignored ' @@ -489,8 +496,14 @@ def print_context(lines, index, context): print('%s %s' % ('>' if i == index else ':', lines[i].rstrip())) +def extract_words(text, word_regex, ignore_word_regex): + if ignore_word_regex: + text = ignore_word_regex.sub(' ', text) + return word_regex.findall(text) + + def parse_file(filename, colors, summary, misspellings, exclude_lines, - file_opener, word_regex, context, options): + file_opener, word_regex, ignore_word_regex, context, options): bad_count = 0 lines = None changed = False @@ -501,7 +514,7 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines, lines = f.readlines() else: if options.check_filenames: - for word in word_regex.findall(filename): + for word in extract_words(filename, word_regex, ignore_word_regex): lword = word.lower() if lword not in misspellings: continue @@ -555,7 +568,7 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines, fixed_words = set() asked_for = set() - for word in word_regex.findall(line): + for word in extract_words(line, word_regex, ignore_word_regex): lword = word.lower() if lword in misspellings: context_shown = False @@ -658,11 +671,22 @@ def main(*args): try: word_regex = re.compile(word_regex) except re.error as err: - print("ERROR: invalid regular expression \"%s\" (%s)" % + print("ERROR: invalid --regex \"%s\" (%s)" % (word_regex, err), file=sys.stderr) parser.print_help() return EX_USAGE + if options.ignore_regex: + try: + ignore_word_regex = re.compile(options.ignore_regex) + except re.error as err: + print("ERROR: invalid --ignore-regex \"%s\" (%s)" % + (options.ignore_regex, err), file=sys.stderr) + parser.print_help() + return EX_USAGE + else: + ignore_word_regex = None + ignore_words_files = options.ignore_words or [] ignore_words = set() for ignore_words_file in ignore_words_files: @@ -770,7 +794,8 @@ def main(*args): continue bad_count += parse_file( fname, colors, summary, misspellings, exclude_lines, - file_opener, word_regex, context, options) + file_opener, word_regex, ignore_word_regex, context, + options) # skip (relative) directories dirs[:] = [dir_ for dir_ in dirs if not glob_match.match(dir_)] @@ -778,7 +803,7 @@ def main(*args): else: bad_count += parse_file( filename, colors, summary, misspellings, exclude_lines, - file_opener, word_regex, context, options) + file_opener, word_regex, ignore_word_regex, context, options) if summary: print("\n-------8<-------\nSUMMARY:") diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py index 4107b6ce1c..876e24ce93 100644 --- a/codespell_lib/tests/test_basic.py +++ b/codespell_lib/tests/test_basic.py @@ -455,6 +455,39 @@ def test_context(tmpdir, capsys): assert 'ERROR' in lines[0] +def test_ignore_regex_flag(tmpdir, capsys): + """Test ignore regex flag functionality.""" + d = str(tmpdir) + + # Invalid regex. + code, stdout, _ = cs.main('--ignore-regex=(', std=True) + assert code == EX_USAGE + assert 'usage:' in stdout + + with open(op.join(d, 'flag.txt'), 'w') as f: + f.write('# Please see http://example.com/abandonned for info\n') + # Test file has 1 invalid entry, and it's not ignored by default. + assert cs.main(f.name) == 1 + # An empty regex is the default value, and nothing is ignored. + assert cs.main(f.name, '--ignore-regex=') == 1 + assert cs.main(f.name, '--ignore-regex=""') == 1 + # Non-matching regex results in nothing being ignored. + assert cs.main(f.name, '--ignore-regex=^$') == 1 + # A word can be ignored. + assert cs.main(f.name, '--ignore-regex=abandonned') == 0 + # Ignoring part of the word can result in odd behavior. + assert cs.main(f.name, '--ignore-regex=nn') == 0 + + with open(op.join(d, 'flag.txt'), 'w') as f: + f.write('abandonned donn\n') + # Test file has 2 invalid entries. + assert cs.main(f.name) == 2 + # Ignoring donn breaks them both. + assert cs.main(f.name, '--ignore-regex=donn') == 0 + # Adding word breaks causes only one to be ignored. + assert cs.main(f.name, r'--ignore-regex=\Wdonn\W') == 1 + + @contextlib.contextmanager def FakeStdin(text): if sys.version[0] == '2':