codespell-project · akaszynski · Feb 6, 2020 · Feb 7, 2020 · Feb 14, 2020 · Feb 15, 2020
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
@@ -232,6 +232,10 @@ def parse_options(args):
                         help='Comma separated list of words to be ignored '
                              'by codespell. Words are case sensitive based on '
                              'how they are written in the dictionary file')
+    parser.add_argument('-P', '--sub-pairs', type=str, metavar='FILE',
+                        help='Custom substitution text file that contains '
+                             'substituions key value pairs.  Can be used to '
+                             'substitute escaped characters.')
     parser.add_argument('-r', '--regex',
                         action='store', type=str,
                         help='Regular expression which is used to find words. '
@@ -344,6 +348,25 @@ def build_dict(filename, misspellings, ignore_words):
             misspellings[key] = Misspelling(data, fix, reason)
 
 
+def build_sub_pairs(filename):
+    """Parse substitution pairs from a text file.
+
+    Notes
+    -----
+    File expected to be in the following format
+    tobesubstituted->substituted
+    """
+    if not os.path.isfile(filename):
+        raise Exception('Unable to find sub pair file "{}"'.format(filename))
+
+    sub_pairs = {}
+    with codecs.open(filename, mode='r', encoding='utf-8') as f:
+        for line in f:
+            [key, data] = line.split('->')
+            sub_pairs[key] = data
+    return sub_pairs
+
+
 def is_hidden(filename, check_hidden):
     bfilename = os.path.basename(filename)
 
@@ -369,6 +392,37 @@ def fix_case(word, fixword):
     return fixword
 
 
+def multiple_replace(find_dict, text):
+    """Multiple find and replace based on a dictionary.
+
+    Parameters
+    ----------
+    find_dict : dict
+        Dictionary containing values to find and replace.  For example
+        ``{'\\n': ' ', r"\'": "'"}``
+
+    text : str
+        Text to perform substitution on.
+
+    Returns
+    -------
+    sub_text : str
+        Text with substitutions.
+
+    Examples
+    --------
+    >>> line = r'this was a cat meow meow\nWhere don\'t'
+    >>> find_dict = {'\\n': ' ', r"\'": "'"}
+    >>> multiple_replace(find_dict, text)
+    this was a cat meow meow Where don't
+
+    """
+    # replace for each match
+    for key, rep in find_dict.items():
+        text = text.replace(key, rep)
+    return text
+
+
 def ask_for_word_fix(line, wrongword, misspelling, interactivity):
     if interactivity <= 0:
         return misspelling.fix, fix_case(wrongword, misspelling.data)
@@ -428,7 +482,7 @@ def print_context(lines, index, context):
 
 
 def parse_file(filename, colors, summary, misspellings, exclude_lines,
-               file_opener, word_regex, context, options):
+               file_opener, word_regex, context, sub_pairs, options):
     bad_count = 0
     lines = None
     changed = False
@@ -492,6 +546,9 @@ def parse_file(filename, colors, summary, misspellings, exclude_lines,
         fixed_words = set()
         asked_for = set()
 
+        # escape valid characters or perform general substitutions
+        line = multiple_replace(sub_pairs, line)
+
         for word in word_regex.findall(line):
             lword = word.lower()
             if lword in misspellings:
@@ -659,6 +716,13 @@ def main(*args):
     if options.exclude_file:
         build_exclude_hashes(options.exclude_file, exclude_lines)
 
+    # build substitution dictionary
+    if options.sub_pairs:
+        sub_pairs = build_sub_pairs(options.sub_pairs)
+    else:
+        # default escape substitution dictionary
+        sub_pairs = {'\\n': ' ', r"\'": "'"}
+
     file_opener = FileOpener(options.hard_encoding_detection,
                              options.quiet_level)
     glob_match = GlobMatch(options.skip)
@@ -684,15 +748,15 @@ def main(*args):
                         continue
                     bad_count += parse_file(
                         fname, colors, summary, misspellings, exclude_lines,
-                        file_opener, word_regex, context, options)
+                        file_opener, word_regex, context, sub_pairs, options)
 
                 # skip (relative) directories
                 dirs[:] = [dir_ for dir_ in dirs if not glob_match.match(dir_)]
 
         else:
             bad_count += parse_file(
                 filename, colors, summary, misspellings, exclude_lines,
-                file_opener, word_regex, context, options)
+                file_opener, word_regex, context, sub_pairs, options)
 
     if summary:
         print("\n-------8<-------\nSUMMARY:")

diff --git a/codespell_lib/tests/test_basic.py b/codespell_lib/tests/test_basic.py
@@ -8,6 +8,8 @@
 import subprocess
 import sys
 
+import pytest
+
 import codespell_lib as cs
 
 
@@ -73,6 +75,28 @@ def test_basic(tmpdir, capsys):
     assert cs.main(d) == 0
 
 
+def test_escaped(tmpdir, capsys):
+    """Test escaping characters"""
+    d = str(tmpdir)
+    with open(op.join(d, 'escaped_char.txt'), 'w') as f:
+        f.write(r"\n\nWe can")
+    assert cs.main(d) == 0
+
+
+def test_escaped_sub_file(tmpdir, capsys):
+    """Test escaping characters using substituion file"""
+    d = str(tmpdir)
+    with open(op.join(d, 'escaped_text.txt'), 'w') as f:
+        f.write(r"We can\'t")
+
+    sub_pair_filename = op.join(d, 'sub_pairs.txt')
+    with open(sub_pair_filename, 'w') as f:
+        f.write(r"\'->'")
+    with pytest.raises(Exception):
+        cs.main(d, '-P', 'notafile')
+    assert cs.main(d, '-P', sub_pair_filename) == 0
+
+
 def test_interactivity(tmpdir, capsys):
     """Test interaction"""
     # Windows can't read a currently-opened file, so here we use