Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🚧 Ignore escaped characters #1422

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 67 additions & 3 deletions codespell_lib/_codespell.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,10 @@ def parse_options(args):
help='Comma separated list of words to be ignored '
'by codespell. Words are case sensitive based on '
'how they are written in the dictionary file')
parser.add_argument('-P', '--sub-pairs', type=str, metavar='FILE',
help='Custom substitution text file that contains '
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit unclear from the help detail what this is for, is it to "fix up" the dictionary to deal with it matching escape sequences? To actually do sed type runs on my codebase or something else?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this linked to #233 ?

'substituions key value pairs. Can be used to '
akaszynski marked this conversation as resolved.
Show resolved Hide resolved
'substitute escaped characters.')
parser.add_argument('-r', '--regex',
action='store', type=str,
help='Regular expression which is used to find words. '
Expand Down Expand Up @@ -344,6 +348,25 @@ def build_dict(filename, misspellings, ignore_words):
misspellings[key] = Misspelling(data, fix, reason)


def build_sub_pairs(filename):
"""Parse substitution pairs from a text file.

Notes
-----
File expected to be in the following format
tobesubstituted->substituted
"""
if not os.path.isfile(filename):
raise Exception('Unable to find sub pair file "{}"'.format(filename))

sub_pairs = {}
with codecs.open(filename, mode='r', encoding='utf-8') as f:
for line in f:
[key, data] = line.split('->')
sub_pairs[key] = data
return sub_pairs


def is_hidden(filename, check_hidden):
bfilename = os.path.basename(filename)

Expand All @@ -369,6 +392,37 @@ def fix_case(word, fixword):
return fixword


def multiple_replace(find_dict, text):
"""Multiple find and replace based on a dictionary.

Parameters
----------
find_dict : dict
Dictionary containing values to find and replace. For example
``{'\\n': ' ', r"\'": "'"}``

text : str
Text to perform substitution on.

Returns
-------
sub_text : str
Text with substitutions.

Examples
--------
>>> line = r'this was a cat meow meow\nWhere don\'t'
>>> find_dict = {'\\n': ' ', r"\'": "'"}
>>> multiple_replace(find_dict, text)
this was a cat meow meow Where don't

"""
# replace for each match
for key, rep in find_dict.items():
text = text.replace(key, rep)
return text


def ask_for_word_fix(line, wrongword, misspelling, interactivity):
if interactivity <= 0:
return misspelling.fix, fix_case(wrongword, misspelling.data)
Expand Down Expand Up @@ -428,7 +482,7 @@ def print_context(lines, index, context):


def parse_file(filename, colors, summary, misspellings, exclude_lines,
file_opener, word_regex, context, options):
file_opener, word_regex, context, sub_pairs, options):
bad_count = 0
lines = None
changed = False
Expand Down Expand Up @@ -492,6 +546,9 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines,
fixed_words = set()
asked_for = set()

# escape valid characters or perform general substitutions
line = multiple_replace(sub_pairs, line)

for word in word_regex.findall(line):
lword = word.lower()
if lword in misspellings:
Expand Down Expand Up @@ -659,6 +716,13 @@ def main(*args):
if options.exclude_file:
build_exclude_hashes(options.exclude_file, exclude_lines)

# build substitution dictionary
if options.sub_pairs:
sub_pairs = build_sub_pairs(options.sub_pairs)
else:
# default escape substitution dictionary
sub_pairs = {'\\n': ' ', r"\'": "'"}

file_opener = FileOpener(options.hard_encoding_detection,
options.quiet_level)
glob_match = GlobMatch(options.skip)
Expand All @@ -684,15 +748,15 @@ def main(*args):
continue
bad_count += parse_file(
fname, colors, summary, misspellings, exclude_lines,
file_opener, word_regex, context, options)
file_opener, word_regex, context, sub_pairs, options)

# skip (relative) directories
dirs[:] = [dir_ for dir_ in dirs if not glob_match.match(dir_)]

else:
bad_count += parse_file(
filename, colors, summary, misspellings, exclude_lines,
file_opener, word_regex, context, options)
file_opener, word_regex, context, sub_pairs, options)

if summary:
print("\n-------8<-------\nSUMMARY:")
Expand Down
24 changes: 24 additions & 0 deletions codespell_lib/tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import subprocess
import sys

import pytest

import codespell_lib as cs


Expand Down Expand Up @@ -73,6 +75,28 @@ def test_basic(tmpdir, capsys):
assert cs.main(d) == 0


def test_escaped(tmpdir, capsys):
"""Test escaping characters"""
d = str(tmpdir)
with open(op.join(d, 'escaped_char.txt'), 'w') as f:
f.write(r"\n\nWe can")
assert cs.main(d) == 0


def test_escaped_sub_file(tmpdir, capsys):
"""Test escaping characters using substituion file"""
akaszynski marked this conversation as resolved.
Show resolved Hide resolved
d = str(tmpdir)
with open(op.join(d, 'escaped_text.txt'), 'w') as f:
f.write(r"We can\'t")

sub_pair_filename = op.join(d, 'sub_pairs.txt')
with open(sub_pair_filename, 'w') as f:
f.write(r"\'->'")
with pytest.raises(Exception):
cs.main(d, '-P', 'notafile')
assert cs.main(d, '-P', sub_pair_filename) == 0


def test_interactivity(tmpdir, capsys):
"""Test interaction"""
# Windows can't read a currently-opened file, so here we use
Expand Down