Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --ignore-regex #1592

Merged
merged 7 commits into from
Aug 10, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,13 @@ def parse_options(args):
'to include (when "-D -" or no "-D" is passed). '
'Current options are:' + builtin_opts + '\n'
'The default is %(default)r.')
parser.add_argument('--ignore-regex',
action='store', type=str,
help='regular expression which is used to find '
'patterns to ignore by treating as whitespace. '
'When writing regexes, consider ensuring there '
'is boundary non-word chars, e.g., '
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit sorry, grammar wise it should be "ensuring there are boundary..."

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

'"\\Wmatch\\W". Defaults to empty/disabled.')
parser.add_argument('-I', '--ignore-words',
action='append', metavar='FILE',
help='file that contains words which will be ignored '
Expand Down Expand Up @@ -489,8 +496,14 @@ def print_context(lines, index, context):
print('%s %s' % ('>' if i == index else ':', lines[i].rstrip()))


def extract_words(text, word_regex, ignore_word_regex):
if ignore_word_regex:
text = ignore_word_regex.sub(' ', text)
return word_regex.findall(text)


def parse_file(filename, colors, summary, misspellings, exclude_lines,
file_opener, word_regex, context, options):
file_opener, word_regex, ignore_word_regex, context, options):
bad_count = 0
lines = None
changed = False
Expand All @@ -501,7 +514,7 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines,
lines = f.readlines()
else:
if options.check_filenames:
for word in word_regex.findall(filename):
for word in extract_words(filename, word_regex, ignore_word_regex):
lword = word.lower()
if lword not in misspellings:
continue
Expand Down Expand Up @@ -555,7 +568,7 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines,
fixed_words = set()
asked_for = set()

for word in word_regex.findall(line):
for word in extract_words(line, word_regex, ignore_word_regex):
lword = word.lower()
if lword in misspellings:
context_shown = False
Expand Down Expand Up @@ -658,11 +671,22 @@ def main(*args):
try:
word_regex = re.compile(word_regex)
except re.error as err:
print("ERROR: invalid regular expression \"%s\" (%s)" %
print("ERROR: invalid --regex \"%s\" (%s)" %
(word_regex, err), file=sys.stderr)
parser.print_help()
return EX_USAGE

if options.ignore_regex:
try:
ignore_word_regex = re.compile(options.ignore_regex)
except re.error as err:
print("ERROR: invalid --ignore-regex \"%s\" (%s)" %
(options.ignore_regex, err), file=sys.stderr)
parser.print_help()
return EX_USAGE
else:
ignore_word_regex = None

ignore_words_files = options.ignore_words or []
ignore_words = set()
for ignore_words_file in ignore_words_files:
Expand Down Expand Up @@ -770,15 +794,16 @@ def main(*args):
continue
bad_count += parse_file(
fname, colors, summary, misspellings, exclude_lines,
file_opener, word_regex, context, options)
file_opener, word_regex, ignore_word_regex, context,
options)

# skip (relative) directories
dirs[:] = [dir_ for dir_ in dirs if not glob_match.match(dir_)]

else:
bad_count += parse_file(
filename, colors, summary, misspellings, exclude_lines,
file_opener, word_regex, context, options)
file_opener, word_regex, ignore_word_regex, context, options)

if summary:
print("\n-------8<-------\nSUMMARY:")
Expand Down
24 changes: 24 additions & 0 deletions codespell_lib/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,30 @@ def test_context(tmpdir, capsys):
assert 'ERROR' in lines[0]


def test_ignore_regex_flag(tmpdir, capsys):
"""Test ignore regex flag functionality."""
d = str(tmpdir)

# Invalid regex.
code, stdout, _ = cs.main('--ignore-regex=(', std=True)
assert code == EX_USAGE
assert 'usage:' in stdout

with open(op.join(d, 'flag.txt'), 'w') as f:
f.write('# Please see http://example.com/abandonned for info\n')
# Test file has 1 invalid entry, and it's not ignored by default.
assert cs.main(f.name) == 1
# An empty regex is the default value, and nothing is ignored.
assert cs.main(f.name, '--ignore-regex=') == 1
assert cs.main(f.name, '--ignore-regex=""') == 1
# Non-matching regex results in nothing being ignored.
assert cs.main(f.name, '--ignore-regex=^$') == 1
peternewman marked this conversation as resolved.
Show resolved Hide resolved
# A word can be ignored.
assert cs.main(f.name, '--ignore-regex=abandonned') == 0
peternewman marked this conversation as resolved.
Show resolved Hide resolved
# Ignoring part of the word can result in odd behavior.
assert cs.main(f.name, '--ignore-regex=nn') == 0
peternewman marked this conversation as resolved.
Show resolved Hide resolved


@contextlib.contextmanager
def FakeStdin(text):
if sys.version[0] == '2':
Expand Down