Merge pull request #1592 from jonmeow/ignore-regex

Add --ignore-regex
codespell-project · Aug 10, 2020 · 8bbef35 · 8bbef35
2 parents be88368 + 5ae1b91
commit 8bbef35
Show file tree

Hide file tree

Showing 2 changed files with 64 additions and 6 deletions.
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
@@ -273,6 +273,13 @@ def parse_options(args):
                         'to include (when "-D -" or no "-D" is passed). '
                         'Current options are:' + builtin_opts + '\n'
                         'The default is %(default)r.')
+    parser.add_argument('--ignore-regex',
+                        action='store', type=str,
+                        help='regular expression which is used to find '
+                             'patterns to ignore by treating as whitespace. '
+                             'When writing regexes, consider ensuring there '
+                             'are boundary non-word chars, e.g., '
+                             '"\\Wmatch\\W". Defaults to empty/disabled.')
     parser.add_argument('-I', '--ignore-words',
                         action='append', metavar='FILE',
                         help='file that contains words which will be ignored '
@@ -489,8 +496,14 @@ def print_context(lines, index, context):
             print('%s %s' % ('>' if i == index else ':', lines[i].rstrip()))
 
 
+def extract_words(text, word_regex, ignore_word_regex):
+    if ignore_word_regex:
+        text = ignore_word_regex.sub(' ', text)
+    return word_regex.findall(text)
+
+
 def parse_file(filename, colors, summary, misspellings, exclude_lines,
-               file_opener, word_regex, context, options):
+               file_opener, word_regex, ignore_word_regex, context, options):
     bad_count = 0
     lines = None
     changed = False
@@ -501,7 +514,7 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines,
         lines = f.readlines()
     else:
         if options.check_filenames:
-            for word in word_regex.findall(filename):
+            for word in extract_words(filename, word_regex, ignore_word_regex):
                 lword = word.lower()
                 if lword not in misspellings:
                     continue
@@ -555,7 +568,7 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines,
         fixed_words = set()
         asked_for = set()
 
-        for word in word_regex.findall(line):
+        for word in extract_words(line, word_regex, ignore_word_regex):
             lword = word.lower()
             if lword in misspellings:
                 context_shown = False
@@ -658,11 +671,22 @@ def main(*args):
     try:
         word_regex = re.compile(word_regex)
     except re.error as err:
-        print("ERROR: invalid regular expression \"%s\" (%s)" %
+        print("ERROR: invalid --regex \"%s\" (%s)" %
               (word_regex, err), file=sys.stderr)
         parser.print_help()
         return EX_USAGE
 
+    if options.ignore_regex:
+        try:
+            ignore_word_regex = re.compile(options.ignore_regex)
+        except re.error as err:
+            print("ERROR: invalid --ignore-regex \"%s\" (%s)" %
+                  (options.ignore_regex, err), file=sys.stderr)
+            parser.print_help()
+            return EX_USAGE
+    else:
+        ignore_word_regex = None
+
     ignore_words_files = options.ignore_words or []
     ignore_words = set()
     for ignore_words_file in ignore_words_files:
@@ -770,15 +794,16 @@ def main(*args):
                         continue
                     bad_count += parse_file(
                         fname, colors, summary, misspellings, exclude_lines,
-                        file_opener, word_regex, context, options)
+                        file_opener, word_regex, ignore_word_regex, context,
+                        options)
 
                 # skip (relative) directories
                 dirs[:] = [dir_ for dir_ in dirs if not glob_match.match(dir_)]
 
         else:
             bad_count += parse_file(
                 filename, colors, summary, misspellings, exclude_lines,
-                file_opener, word_regex, context, options)
+                file_opener, word_regex, ignore_word_regex, context, options)
 
     if summary:
         print("\n-------8<-------\nSUMMARY:")

diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py
@@ -455,6 +455,39 @@ def test_context(tmpdir, capsys):
     assert 'ERROR' in lines[0]
 
 
+def test_ignore_regex_flag(tmpdir, capsys):
+    """Test ignore regex flag functionality."""
+    d = str(tmpdir)
+
+    # Invalid regex.
+    code, stdout, _ = cs.main('--ignore-regex=(', std=True)
+    assert code == EX_USAGE
+    assert 'usage:' in stdout
+
+    with open(op.join(d, 'flag.txt'), 'w') as f:
+        f.write('# Please see http://example.com/abandonned for info\n')
+    # Test file has 1 invalid entry, and it's not ignored by default.
+    assert cs.main(f.name) == 1
+    # An empty regex is the default value, and nothing is ignored.
+    assert cs.main(f.name, '--ignore-regex=') == 1
+    assert cs.main(f.name, '--ignore-regex=""') == 1
+    # Non-matching regex results in nothing being ignored.
+    assert cs.main(f.name, '--ignore-regex=^$') == 1
+    # A word can be ignored.
+    assert cs.main(f.name, '--ignore-regex=abandonned') == 0
+    # Ignoring part of the word can result in odd behavior.
+    assert cs.main(f.name, '--ignore-regex=nn') == 0
+
+    with open(op.join(d, 'flag.txt'), 'w') as f:
+        f.write('abandonned donn\n')
+    # Test file has 2 invalid entries.
+    assert cs.main(f.name) == 2
+    # Ignoring donn breaks them both.
+    assert cs.main(f.name, '--ignore-regex=donn') == 0
+    # Adding word breaks causes only one to be ignored.
+    assert cs.main(f.name, r'--ignore-regex=\Wdonn\W') == 1
+
+
 @contextlib.contextmanager
 def FakeStdin(text):
     if sys.version[0] == '2':