Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --ignore-regex #1592

Merged
merged 7 commits into from
Aug 10, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 26 additions & 5 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import textwrap

word_regex_def = u"[\\w\\-'’`]+"
# Matches common URIs and email addresses, in that order.
ignore_word_regex_def = r"(?:(?:https?|ftp|smtp):\/\/([\w-]+\.)+\w{2,}(?:/(?:[\w:/?#\[\]@!$&'()*+,;=.~-]*/?)*)?|[\w.%+-]+@[\w.-]+\.[a-z]{2,})" # noqa: E501
peternewman marked this conversation as resolved.
Show resolved Hide resolved
encodings = ('utf-8', 'iso-8859-1')
USAGE = """
\t%prog [OPTIONS] [file1 file2 ... fileN]
Expand Down Expand Up @@ -273,6 +275,11 @@ def parse_options(args):
'to include (when "-D -" or no "-D" is passed). '
'Current options are:' + builtin_opts + '\n'
'The default is %(default)r.')
parser.add_argument('--ignore-regex',
action='store', type=str,
help='regular expression which is used to find words '
'to ignore. Matches URIs and emails by default. '
'Can be disabled by setting to "^$".')
parser.add_argument('-I', '--ignore-words',
action='append', metavar='FILE',
help='file that contains words which will be ignored '
Expand Down Expand Up @@ -489,8 +496,13 @@ def print_context(lines, index, context):
print('%s %s' % ('>' if i == index else ':', lines[i].rstrip()))


def extract_words(text, word_regex, ignore_word_regex):
interesting_text = ignore_word_regex.sub(' ', text)
return word_regex.findall(interesting_text)


def parse_file(filename, colors, summary, misspellings, exclude_lines,
file_opener, word_regex, context, options):
file_opener, word_regex, ignore_word_regex, context, options):
bad_count = 0
lines = None
changed = False
Expand All @@ -501,7 +513,7 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines,
lines = f.readlines()
else:
if options.check_filenames:
for word in word_regex.findall(filename):
for word in extract_words(filename, word_regex, ignore_word_regex):
lword = word.lower()
if lword not in misspellings:
continue
Expand Down Expand Up @@ -555,7 +567,7 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines,
fixed_words = set()
asked_for = set()

for word in word_regex.findall(line):
for word in extract_words(line, word_regex, ignore_word_regex):
lword = word.lower()
if lword in misspellings:
context_shown = False
Expand Down Expand Up @@ -662,6 +674,14 @@ def main(*args):
(word_regex, err), file=sys.stderr)
parser.print_help()
return EX_USAGE
ignore_word_regex = options.ignore_regex or ignore_word_regex_def
try:
ignore_word_regex = re.compile(ignore_word_regex)
except re.error as err:
print("ERROR: invalid regular expression \"%s\" (%s)" %
(ignore_word_regex, err), file=sys.stderr)
parser.print_help()
return EX_USAGE

ignore_words_files = options.ignore_words or []
ignore_words = set()
Expand Down Expand Up @@ -770,15 +790,16 @@ def main(*args):
continue
bad_count += parse_file(
fname, colors, summary, misspellings, exclude_lines,
file_opener, word_regex, context, options)
file_opener, word_regex, ignore_word_regex, context,
options)

# skip (relative) directories
dirs[:] = [dir_ for dir_ in dirs if not glob_match.match(dir_)]

else:
bad_count += parse_file(
filename, colors, summary, misspellings, exclude_lines,
file_opener, word_regex, context, options)
file_opener, word_regex, ignore_word_regex, context, options)

if summary:
print("\n-------8<-------\nSUMMARY:")
Expand Down
80 changes: 80 additions & 0 deletions codespell_lib/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,86 @@ def test_context(tmpdir, capsys):
assert 'ERROR' in lines[0]


def test_ignore_regex_flag(tmpdir, capsys):
"""Test ignore regex flag functionality."""
d = str(tmpdir)

# Invalid regex.
code, stdout, _ = cs.main('--ignore-regex=(', std=True)
assert code == EX_USAGE
assert 'usage:' in stdout

# Empty regex matches everything.
with open(op.join(d, 'flag.txt'), 'w') as f:
f.write('# Please see http://example.com/abandonned for info\n')
assert cs.main(f.name, '--ignore-regex=^$') == 1
peternewman marked this conversation as resolved.
Show resolved Hide resolved

# Custom ignore.
with open(op.join(d, 'flag.txt'), 'w') as f:
f.write('abandonned\n')
assert cs.main(f.name, '--ignore-regex=abandonned') == 0
peternewman marked this conversation as resolved.
Show resolved Hide resolved


def test_uri(tmpdir, capsys):
"""Test ignore regex functionality for URIs."""
d = str(tmpdir)

# Ignoring text in path.
with open(op.join(d, 'uri.txt'), 'w') as f:
f.write('# Please see http://example.com/abandonned for info\n')
assert cs.main(f.name) == 0

# Test a different protocol.
with open(op.join(d, 'uri.txt'), 'w') as f:
f.write('# Please see https://example.com/abandonned for info\n')
assert cs.main(f.name) == 0

# Ignoring text in path ending with /.
with open(op.join(d, 'uri.txt'), 'w') as f:
f.write('# Please see http://example.com/abandonned/ for info\n')
assert cs.main(f.name) == 0

# Ignoring text in domain.
with open(op.join(d, 'uri.txt'), 'w') as f:
f.write('# Please see http://abandonned.com/example for info\n')
assert cs.main(f.name) == 0

# Ignoring text in anchor.
with open(op.join(d, 'uri.txt'), 'w') as f:
f.write('# Please see http://example.com/ex#abandonned for info\n')
assert cs.main(f.name) == 0

# Typo because there's no protocol.
with open(op.join(d, 'uri.txt'), 'w') as f:
f.write('# Please see example.com/abandonned for info\n')
assert cs.main(f.name) == 1

# Typo because there aren't enough domain parts.
with open(op.join(d, 'uri.txt'), 'w') as f:
f.write('# Please see http://abandonned for info\n')
peternewman marked this conversation as resolved.
Show resolved Hide resolved
assert cs.main(f.name) == 1


def test_email(tmpdir, capsys):
"""Test ignore regex functionality for emails."""
d = str(tmpdir)

# Ignoring text in username.
with open(op.join(d, 'email.txt'), 'w') as f:
f.write('# Please contact [email protected] for info\n')
assert cs.main(f.name) == 0

# Ignoring text in domain.
with open(op.join(d, 'email.txt'), 'w') as f:
f.write('# Please contact [email protected] for info\n')
assert cs.main(f.name) == 0

# Typo because there's no TLD for an email.
with open(op.join(d, 'email.txt'), 'w') as f:
f.write('# Please contact abandonned@example for info\n')
peternewman marked this conversation as resolved.
Show resolved Hide resolved
assert cs.main(f.name) == 1


@contextlib.contextmanager
def FakeStdin(text):
if sys.version[0] == '2':
Expand Down