Skip to content

Commit

Permalink
Merge pull request #12 from jmelahman/jamison/invalid-escape-sequence
Browse files Browse the repository at this point in the history
[chore] fix "SyntaxError: invalid escape sequence"
  • Loading branch information
ZeroCool940711 authored Jan 3, 2024
2 parents c663c61 + 2a04eb9 commit 3d98428
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 8 deletions.
4 changes: 2 additions & 2 deletions src/whoosh/analysis/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
\\S+? # URL body
(?=\\s|[.]\\s|$|[.]$) # Stop at space/end, or a dot followed by space/end
) | ( # or...
\w+([:.]?\w+)* # word characters, with opt. internal colons/dots
\\w+([:.]?\\w+)* # word characters, with opt. internal colons/dots
)
""", verbose=True)

Expand Down Expand Up @@ -145,7 +145,7 @@ def __call__(self, tokens):


class TeeFilter(Filter):
"""Interleaves the results of two or more filters (or filter chains).
r"""Interleaves the results of two or more filters (or filter chains).
NOTE: because it needs to create copies of each token for each sub-filter,
this filter is quite slow.
Expand Down
6 changes: 3 additions & 3 deletions src/whoosh/analysis/intraword.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@


class CompoundWordFilter(Filter):
"""Given a set of words (or any object with a ``__contains__`` method),
r"""Given a set of words (or any object with a ``__contains__`` method),
break any tokens in the stream that are composites of words in the word set
into their individual parts.
Expand Down Expand Up @@ -272,7 +272,7 @@ class IntraWordFilter(Filter):
>>> iwf_i = IntraWordFilter(mergewords=True, mergenums=True)
>>> iwf_q = IntraWordFilter(mergewords=False, mergenums=False)
>>> iwf = MultiFilter(index=iwf_i, query=iwf_q)
>>> analyzer = RegexTokenizer(r"\S+") | iwf | LowercaseFilter()
>>> analyzer = RegexTokenizer(r"\\S+") | iwf | LowercaseFilter()
(See :class:`MultiFilter`.)
"""
Expand All @@ -282,7 +282,7 @@ class IntraWordFilter(Filter):
__inittypes__ = dict(delims=text_type, splitwords=bool, splitnums=bool,
mergewords=bool, mergenums=bool)

def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\|;:,./?`~=+"),
def __init__(self, delims=u("-_'\"()!@#$%^&*[]{}<>\\|;:,./?`~=+"),
splitwords=True, splitnums=True,
mergewords=False, mergenums=False):
"""
Expand Down
2 changes: 1 addition & 1 deletion src/whoosh/lang/paicehusk.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class PaiceHuskStemmer(object):
(?P<cont>[.>])
""", re.UNICODE | re.VERBOSE)

stem_expr = re.compile("^\w+", re.UNICODE)
stem_expr = re.compile(r"^\w+", re.UNICODE)

def __init__(self, ruletable):
"""
Expand Down
2 changes: 1 addition & 1 deletion src/whoosh/lang/porter2.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def remove_initial_apostrophe(word):
def capitalize_consonant_ys(word):
if word.startswith('y'):
word = 'Y' + word[1:]
return ccy_exp.sub('\g<1>Y', word)
return ccy_exp.sub(r'\g<1>Y', word)


def step_0(word):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ def test_stop_lang():


def test_issue358():
t = analysis.RegexTokenizer("\w+")
t = analysis.RegexTokenizer(r"\w+")
with pytest.raises(analysis.CompositionError):
_ = t | analysis.StandardAnalyzer()

Expand Down

0 comments on commit 3d98428

Please sign in to comment.