From a0c22d89e0a2c654ea367df9e8865a9279692790 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Sat, 2 Dec 2023 13:24:44 -0500 Subject: [PATCH] Avoid syntax error via invalid ur string prefix --- .../test/fixtures/pycodestyle/W605_0.py | 3 + .../test/fixtures/pycodestyle/W605_1.py | 42 ++- .../test/fixtures/pycodestyle/W605_2.py | 54 ---- .../ruff_linter/src/rules/pycodestyle/mod.rs | 1 - .../rules/invalid_escape_sequence.rs | 84 ++++-- ...s__pycodestyle__tests__W605_W605_0.py.snap | 20 ++ ...s__pycodestyle__tests__W605_W605_1.py.snap | 273 +++++++++++++----- 7 files changed, 303 insertions(+), 174 deletions(-) delete mode 100644 crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_2.py diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_0.py b/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_0.py index 967ed5bc4d334b..85ec535e22d196 100644 --- a/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_0.py +++ b/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_0.py @@ -43,3 +43,6 @@ def f(): ''' # noqa regex = '\\\_' + +#: W605:1:7 +u'foo\ bar' diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_1.py b/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_1.py index 20bf0ea14c8f7e..b34ad587c46d53 100644 --- a/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_1.py +++ b/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_1.py @@ -1,40 +1,54 @@ +# Same as `W605_0.py` but using f-strings instead. + #: W605:1:10 -regex = '\.png$' +regex = f'\.png$' #: W605:2:1 -regex = ''' +regex = f''' \.png$ ''' #: W605:2:6 f( - '\_' + f'\_' ) #: W605:4:6 -""" +f""" multi-line literal with \_ somewhere in the middle """ +#: W605:1:38 +value = f'new line\nand invalid escape \_ here' -def f(): - #: W605:1:11 - return'\.png$' #: Okay -regex = r'\.png$' -regex = '\\.png$' -regex = r''' +regex = fr'\.png$' +regex = f'\\.png$' +regex = fr''' \.png$ ''' -regex = r''' +regex = fr''' \\.png$ ''' -s = '\\' -regex = '\w' # noqa -regex = ''' +s = f'\\' +regex = f'\w' # noqa +regex = f''' \w ''' # noqa + +regex = f'\\\_' +value = f'\{{1}}' +value = f'\{1}' +value = f'{1:\}' +value = f"{f"\{1}"}" +value = rf"{f"\{1}"}" + +# Okay +value = rf'\{{1}}' +value = rf'\{1}' +value = rf'{1:\}' +value = f"{rf"\{1}"}" diff --git a/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_2.py b/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_2.py deleted file mode 100644 index b34ad587c46d53..00000000000000 --- a/crates/ruff_linter/resources/test/fixtures/pycodestyle/W605_2.py +++ /dev/null @@ -1,54 +0,0 @@ -# Same as `W605_0.py` but using f-strings instead. - -#: W605:1:10 -regex = f'\.png$' - -#: W605:2:1 -regex = f''' -\.png$ -''' - -#: W605:2:6 -f( - f'\_' -) - -#: W605:4:6 -f""" -multi-line -literal -with \_ somewhere -in the middle -""" - -#: W605:1:38 -value = f'new line\nand invalid escape \_ here' - - -#: Okay -regex = fr'\.png$' -regex = f'\\.png$' -regex = fr''' -\.png$ -''' -regex = fr''' -\\.png$ -''' -s = f'\\' -regex = f'\w' # noqa -regex = f''' -\w -''' # noqa - -regex = f'\\\_' -value = f'\{{1}}' -value = f'\{1}' -value = f'{1:\}' -value = f"{f"\{1}"}" -value = rf"{f"\{1}"}" - -# Okay -value = rf'\{{1}}' -value = rf'\{1}' -value = rf'{1:\}' -value = f"{rf"\{1}"}" diff --git a/crates/ruff_linter/src/rules/pycodestyle/mod.rs b/crates/ruff_linter/src/rules/pycodestyle/mod.rs index b2a02f5e23c557..3f6aa412bf1797 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/mod.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/mod.rs @@ -31,7 +31,6 @@ mod tests { #[test_case(Rule::BlankLineWithWhitespace, Path::new("W29.py"))] #[test_case(Rule::InvalidEscapeSequence, Path::new("W605_0.py"))] #[test_case(Rule::InvalidEscapeSequence, Path::new("W605_1.py"))] - #[test_case(Rule::InvalidEscapeSequence, Path::new("W605_2.py"))] #[test_case(Rule::LineTooLong, Path::new("E501.py"))] #[test_case(Rule::LineTooLong, Path::new("E501_3.py"))] #[test_case(Rule::MixedSpacesAndTabs, Path::new("E101.py"))] diff --git a/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs b/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs index b4d100aa9d6f32..77a3d6ba621df0 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs +++ b/crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs @@ -3,9 +3,9 @@ use memchr::memchr_iter; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_index::Indexer; -use ruff_python_parser::Tok; +use ruff_python_parser::{StringKind, Tok}; use ruff_source_file::Locator; -use ruff_text_size::{TextLen, TextRange, TextSize}; +use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::fix::edits::pad_start; @@ -58,18 +58,6 @@ impl AlwaysFixableViolation for InvalidEscapeSequence { } } -#[derive(Debug, PartialEq, Eq)] -enum FixTitle { - AddBackslash, - UseRawStringLiteral, -} - -#[derive(Debug)] -struct InvalidEscapeChar { - ch: char, - range: TextRange, -} - /// W605 pub(crate) fn invalid_escape_sequence( diagnostics: &mut Vec, @@ -195,41 +183,77 @@ pub(crate) fn invalid_escape_sequence( if contains_valid_escape_sequence { // Escape with backslash. for invalid_escape_char in &invalid_escape_chars { - let diagnostic = Diagnostic::new( + let mut diagnostic = Diagnostic::new( InvalidEscapeSequence { ch: invalid_escape_char.ch, fix_title: FixTitle::AddBackslash, }, - invalid_escape_char.range, - ) - .with_fix(Fix::safe_edit(Edit::insertion( + invalid_escape_char.range(), + ); + diagnostic.set_fix(Fix::safe_edit(Edit::insertion( r"\".to_string(), - invalid_escape_char.range.start() + TextSize::from(1), + invalid_escape_char.start() + TextSize::from(1), ))); invalid_escape_sequence.push(diagnostic); } } else { // Turn into raw string. for invalid_escape_char in &invalid_escape_chars { - let diagnostic = Diagnostic::new( + let mut diagnostic = Diagnostic::new( InvalidEscapeSequence { ch: invalid_escape_char.ch, fix_title: FixTitle::UseRawStringLiteral, }, - invalid_escape_char.range, - ) - .with_fix( - // If necessary, add a space between any leading keyword (`return`, `yield`, - // `assert`, etc.) and the string. For example, `return"foo"` is valid, but - // `returnr"foo"` is not. - Fix::safe_edit(Edit::insertion( - pad_start("r".to_string(), string_start_location, locator), - string_start_location, - )), + invalid_escape_char.range(), ); + + if matches!( + token, + Tok::String { + kind: StringKind::Unicode, + .. + } + ) { + // Replace the Unicode prefix with `r`. + diagnostic.set_fix(Fix::safe_edit(Edit::replacement( + "r".to_string(), + string_start_location, + string_start_location + TextSize::from(1), + ))); + } else { + // Insert the `r` prefix. + diagnostic.set_fix( + // If necessary, add a space between any leading keyword (`return`, `yield`, + // `assert`, etc.) and the string. For example, `return"foo"` is valid, but + // `returnr"foo"` is not. + Fix::safe_edit(Edit::insertion( + pad_start("r".to_string(), string_start_location, locator), + string_start_location, + )), + ); + } + invalid_escape_sequence.push(diagnostic); } } diagnostics.extend(invalid_escape_sequence); } + +#[derive(Debug, PartialEq, Eq)] +enum FixTitle { + AddBackslash, + UseRawStringLiteral, +} + +#[derive(Debug)] +struct InvalidEscapeChar { + ch: char, + range: TextRange, +} + +impl Ranged for InvalidEscapeChar { + fn range(&self) -> TextRange { + self.range + } +} diff --git a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_0.py.snap b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_0.py.snap index 98da5fff80c25c..e4630487b7500b 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_0.py.snap +++ b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_0.py.snap @@ -125,6 +125,8 @@ W605_0.py:45:12: W605 [*] Invalid escape sequence: `\_` 44 | 45 | regex = '\\\_' | ^^ W605 +46 | +47 | #: W605:1:7 | = help: Add backslash to escape sequence @@ -134,5 +136,23 @@ W605_0.py:45:12: W605 [*] Invalid escape sequence: `\_` 44 44 | 45 |-regex = '\\\_' 45 |+regex = '\\\\_' +46 46 | +47 47 | #: W605:1:7 +48 48 | u'foo\ bar' + +W605_0.py:48:6: W605 [*] Invalid escape sequence: `\ ` + | +47 | #: W605:1:7 +48 | u'foo\ bar' + | ^^ W605 + | + = help: Use a raw string literal + +ℹ Safe fix +45 45 | regex = '\\\_' +46 46 | +47 47 | #: W605:1:7 +48 |-u'foo\ bar' + 48 |+r'foo\ bar' diff --git a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_1.py.snap b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_1.py.snap index 2fee83a5fee08e..c47507e0ccc88d 100644 --- a/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_1.py.snap +++ b/crates/ruff_linter/src/rules/pycodestyle/snapshots/ruff_linter__rules__pycodestyle__tests__W605_W605_1.py.snap @@ -1,104 +1,227 @@ --- source: crates/ruff_linter/src/rules/pycodestyle/mod.rs --- -W605_1.py:2:10: W605 [*] Invalid escape sequence: `\.` +W605_1.py:4:11: W605 [*] Invalid escape sequence: `\.` | -1 | #: W605:1:10 -2 | regex = '\.png$' - | ^^ W605 -3 | -4 | #: W605:2:1 +3 | #: W605:1:10 +4 | regex = f'\.png$' + | ^^ W605 +5 | +6 | #: W605:2:1 | = help: Use a raw string literal ℹ Safe fix -1 1 | #: W605:1:10 -2 |-regex = '\.png$' - 2 |+regex = r'\.png$' -3 3 | -4 4 | #: W605:2:1 -5 5 | regex = ''' - -W605_1.py:6:1: W605 [*] Invalid escape sequence: `\.` +1 1 | # Same as `W605_0.py` but using f-strings instead. +2 2 | +3 3 | #: W605:1:10 +4 |-regex = f'\.png$' + 4 |+regex = rf'\.png$' +5 5 | +6 6 | #: W605:2:1 +7 7 | regex = f''' + +W605_1.py:8:1: W605 [*] Invalid escape sequence: `\.` | -4 | #: W605:2:1 -5 | regex = ''' -6 | \.png$ +6 | #: W605:2:1 +7 | regex = f''' +8 | \.png$ | ^^ W605 -7 | ''' +9 | ''' | = help: Use a raw string literal ℹ Safe fix -2 2 | regex = '\.png$' -3 3 | -4 4 | #: W605:2:1 -5 |-regex = ''' - 5 |+regex = r''' -6 6 | \.png$ -7 7 | ''' -8 8 | - -W605_1.py:11:6: W605 [*] Invalid escape sequence: `\_` - | - 9 | #: W605:2:6 -10 | f( -11 | '\_' - | ^^ W605 -12 | ) +4 4 | regex = f'\.png$' +5 5 | +6 6 | #: W605:2:1 +7 |-regex = f''' + 7 |+regex = rf''' +8 8 | \.png$ +9 9 | ''' +10 10 | + +W605_1.py:13:7: W605 [*] Invalid escape sequence: `\_` + | +11 | #: W605:2:6 +12 | f( +13 | f'\_' + | ^^ W605 +14 | ) | = help: Use a raw string literal ℹ Safe fix -8 8 | -9 9 | #: W605:2:6 -10 10 | f( -11 |- '\_' - 11 |+ r'\_' -12 12 | ) -13 13 | -14 14 | #: W605:4:6 - -W605_1.py:18:6: W605 [*] Invalid escape sequence: `\_` - | -16 | multi-line -17 | literal -18 | with \_ somewhere +10 10 | +11 11 | #: W605:2:6 +12 12 | f( +13 |- f'\_' + 13 |+ rf'\_' +14 14 | ) +15 15 | +16 16 | #: W605:4:6 + +W605_1.py:20:6: W605 [*] Invalid escape sequence: `\_` + | +18 | multi-line +19 | literal +20 | with \_ somewhere | ^^ W605 -19 | in the middle -20 | """ +21 | in the middle +22 | """ | = help: Use a raw string literal ℹ Safe fix -12 12 | ) -13 13 | -14 14 | #: W605:4:6 -15 |-""" - 15 |+r""" -16 16 | multi-line -17 17 | literal -18 18 | with \_ somewhere - -W605_1.py:25:12: W605 [*] Invalid escape sequence: `\.` - | -23 | def f(): -24 | #: W605:1:11 -25 | return'\.png$' - | ^^ W605 -26 | -27 | #: Okay +14 14 | ) +15 15 | +16 16 | #: W605:4:6 +17 |-f""" + 17 |+rf""" +18 18 | multi-line +19 19 | literal +20 20 | with \_ somewhere + +W605_1.py:25:40: W605 [*] Invalid escape sequence: `\_` | - = help: Use a raw string literal +24 | #: W605:1:38 +25 | value = f'new line\nand invalid escape \_ here' + | ^^ W605 + | + = help: Add backslash to escape sequence ℹ Safe fix -22 22 | -23 23 | def f(): -24 24 | #: W605:1:11 -25 |- return'\.png$' - 25 |+ return r'\.png$' +22 22 | """ +23 23 | +24 24 | #: W605:1:38 +25 |-value = f'new line\nand invalid escape \_ here' + 25 |+value = f'new line\nand invalid escape \\_ here' 26 26 | -27 27 | #: Okay -28 28 | regex = r'\.png$' +27 27 | +28 28 | #: Okay + +W605_1.py:43:13: W605 [*] Invalid escape sequence: `\_` + | +41 | ''' # noqa +42 | +43 | regex = f'\\\_' + | ^^ W605 +44 | value = f'\{{1}}' +45 | value = f'\{1}' + | + = help: Add backslash to escape sequence + +ℹ Safe fix +40 40 | \w +41 41 | ''' # noqa +42 42 | +43 |-regex = f'\\\_' + 43 |+regex = f'\\\\_' +44 44 | value = f'\{{1}}' +45 45 | value = f'\{1}' +46 46 | value = f'{1:\}' + +W605_1.py:44:11: W605 [*] Invalid escape sequence: `\{` + | +43 | regex = f'\\\_' +44 | value = f'\{{1}}' + | ^^ W605 +45 | value = f'\{1}' +46 | value = f'{1:\}' + | + = help: Use a raw string literal + +ℹ Safe fix +41 41 | ''' # noqa +42 42 | +43 43 | regex = f'\\\_' +44 |-value = f'\{{1}}' + 44 |+value = rf'\{{1}}' +45 45 | value = f'\{1}' +46 46 | value = f'{1:\}' +47 47 | value = f"{f"\{1}"}" + +W605_1.py:45:11: W605 [*] Invalid escape sequence: `\{` + | +43 | regex = f'\\\_' +44 | value = f'\{{1}}' +45 | value = f'\{1}' + | ^^ W605 +46 | value = f'{1:\}' +47 | value = f"{f"\{1}"}" + | + = help: Use a raw string literal + +ℹ Safe fix +42 42 | +43 43 | regex = f'\\\_' +44 44 | value = f'\{{1}}' +45 |-value = f'\{1}' + 45 |+value = rf'\{1}' +46 46 | value = f'{1:\}' +47 47 | value = f"{f"\{1}"}" +48 48 | value = rf"{f"\{1}"}" + +W605_1.py:46:14: W605 [*] Invalid escape sequence: `\}` + | +44 | value = f'\{{1}}' +45 | value = f'\{1}' +46 | value = f'{1:\}' + | ^^ W605 +47 | value = f"{f"\{1}"}" +48 | value = rf"{f"\{1}"}" + | + = help: Use a raw string literal + +ℹ Safe fix +43 43 | regex = f'\\\_' +44 44 | value = f'\{{1}}' +45 45 | value = f'\{1}' +46 |-value = f'{1:\}' + 46 |+value = rf'{1:\}' +47 47 | value = f"{f"\{1}"}" +48 48 | value = rf"{f"\{1}"}" +49 49 | + +W605_1.py:47:14: W605 [*] Invalid escape sequence: `\{` + | +45 | value = f'\{1}' +46 | value = f'{1:\}' +47 | value = f"{f"\{1}"}" + | ^^ W605 +48 | value = rf"{f"\{1}"}" + | + = help: Use a raw string literal + +ℹ Safe fix +44 44 | value = f'\{{1}}' +45 45 | value = f'\{1}' +46 46 | value = f'{1:\}' +47 |-value = f"{f"\{1}"}" + 47 |+value = f"{rf"\{1}"}" +48 48 | value = rf"{f"\{1}"}" +49 49 | +50 50 | # Okay + +W605_1.py:48:15: W605 [*] Invalid escape sequence: `\{` + | +46 | value = f'{1:\}' +47 | value = f"{f"\{1}"}" +48 | value = rf"{f"\{1}"}" + | ^^ W605 +49 | +50 | # Okay + | + = help: Use a raw string literal + +ℹ Safe fix +45 45 | value = f'\{1}' +46 46 | value = f'{1:\}' +47 47 | value = f"{f"\{1}"}" +48 |-value = rf"{f"\{1}"}" + 48 |+value = rf"{rf"\{1}"}" +49 49 | +50 50 | # Okay +51 51 | value = rf'\{{1}}'