From 0ed0107e780969778e036e801bbafb0e936cbcbd Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Wed, 26 Jun 2024 08:41:30 +0530 Subject: [PATCH] Consider 2-character EOL before line continuation --- .gitattributes | 1 + .../line_continuation_windows_eol.py | 4 + crates/ruff_python_parser/src/lexer.rs | 48 +++++--- ...ing__line_continuation_windows_eol.py.snap | 105 ++++++++++++++++++ 4 files changed, 141 insertions(+), 17 deletions(-) create mode 100644 crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap diff --git a/.gitattributes b/.gitattributes index 8f333acef68b92..9ae06f93d11a96 100644 --- a/.gitattributes +++ b/.gitattributes @@ -8,6 +8,7 @@ crates/ruff_linter/resources/test/fixtures/pycodestyle/W391_3.py text eol=crlf crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples_crlf.py text eol=crlf crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples_crlf.py.snap text eol=crlf +crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py text eol=crlf crates/ruff_python_parser/resources/invalid/re_lex_logical_token_windows_eol.py text eol=crlf crates/ruff_python_parser/resources/invalid/re_lex_logical_token_mac_eol.py text eol=cr diff --git a/crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py b/crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py new file mode 100644 index 00000000000000..b78f87c6c5c575 --- /dev/null +++ b/crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py @@ -0,0 +1,4 @@ +call(a, b, \\\ + +def bar(): + pass \ No newline at end of file diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index cc04e7926476f7..0640bd8349f666 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -1393,26 +1393,40 @@ impl<'src> Lexer<'src> { while let Some(ch) = reverse_chars.next() { if is_python_whitespace(ch) { current_position -= ch.text_len(); - } else if matches!(ch, '\n' | '\r') { - current_position -= ch.text_len(); - // Count the number of backslashes before the newline character. - let mut backslash_count = 0; - while reverse_chars.next_if_eq(&'\\').is_some() { - backslash_count += 1; - } - if backslash_count == 0 { - // No escapes: `\n` - newline_position = Some(current_position); - } else { - if backslash_count % 2 == 0 { - // Even number of backslashes i.e., all backslashes cancel each other out - // which means the newline character is not being escaped. - newline_position = Some(current_position); + continue; + } + + match ch { + '\n' => { + current_position -= ch.text_len(); + if let Some(carriage_return) = reverse_chars.next_if_eq(&'\r') { + current_position -= carriage_return.text_len(); } - current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count); } + '\r' => { + current_position -= ch.text_len(); + } + _ => break, + } + + debug_assert!(matches!(ch, '\n' | '\r')); + + // Count the number of backslashes before the newline character. + let mut backslash_count = 0; + while reverse_chars.next_if_eq(&'\\').is_some() { + backslash_count += 1; + } + + if backslash_count == 0 { + // No escapes: `\n` + newline_position = Some(current_position); } else { - break; + if backslash_count % 2 == 0 { + // Even number of backslashes i.e., all backslashes cancel each other out + // which means the newline character is not being escaped. + newline_position = Some(current_position); + } + current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count); } } diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap new file mode 100644 index 00000000000000..9d4e05956208d2 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap @@ -0,0 +1,105 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py +--- +## AST + +``` +Module( + ModModule { + range: 0..38, + body: [ + Expr( + StmtExpr { + range: 0..14, + value: Call( + ExprCall { + range: 0..14, + func: Name( + ExprName { + range: 0..4, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 4..14, + args: [ + Name( + ExprName { + range: 5..6, + id: "a", + ctx: Load, + }, + ), + Name( + ExprName { + range: 8..9, + id: "b", + ctx: Load, + }, + ), + ], + keywords: [], + }, + }, + ), + }, + ), + FunctionDef( + StmtFunctionDef { + range: 18..38, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 22..25, + }, + type_params: None, + parameters: Parameters { + range: 25..27, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Pass( + StmtPass { + range: 34..38, + }, + ), + ], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | call(a, b, \\\ + | ^ Syntax Error: unexpected character after line continuation character +2 | +3 | def bar(): + | + + + | +1 | call(a, b, \\\ + | ^ Syntax Error: unexpected character after line continuation character +2 | +3 | def bar(): + | + + + | +1 | call(a, b, \\\ +2 | / +3 | | def bar(): + | |_^ Syntax Error: Expected ')', found newline +4 | pass + |