Skip to content

Commit

Permalink
Consider 2-character EOL before line continuation (#12035)
Browse files Browse the repository at this point in the history
## Summary

This PR fixes a bug introduced in
#12008 which didn't consider the
two character newline after the line continuation character.

For example, consider the following code highlighted with whitespaces:
```py
call(foo # comment \\r\n
\r\n
def bar():\r\n
....pass\r\n
```
The lexer is at `def` when it's running the re-lexing logic and trying
to move back to a newline character. It encounters `\n` and it's being
escaped (incorrect) but `\r` is being escaped, so it moves the lexer to
`\n` character. This creates an overlap in token ranges which causes the
panic.

```
Name 0..4
Lpar 4..5
Name 5..8
Comment 9..20
NonLogicalNewline 20..22 <-- overlap between
Newline 21..22           <-- these two tokens
NonLogicalNewline 22..23
Def 23..26
...
```

fixes: #12028 

## Test Plan

Add a test case with line continuation and windows style newline
character.
  • Loading branch information
dhruvmanila authored Jun 26, 2024
1 parent 7cb2619 commit 47c9ed0
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 17 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ crates/ruff_linter/resources/test/fixtures/pycodestyle/W391_3.py text eol=crlf
crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples_crlf.py text eol=crlf
crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples_crlf.py.snap text eol=crlf

crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py text eol=crlf
crates/ruff_python_parser/resources/invalid/re_lex_logical_token_windows_eol.py text eol=crlf
crates/ruff_python_parser/resources/invalid/re_lex_logical_token_mac_eol.py text eol=cr

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
call(a, b, # comment \

def bar():
pass
48 changes: 31 additions & 17 deletions crates/ruff_python_parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1393,26 +1393,40 @@ impl<'src> Lexer<'src> {
while let Some(ch) = reverse_chars.next() {
if is_python_whitespace(ch) {
current_position -= ch.text_len();
} else if matches!(ch, '\n' | '\r') {
current_position -= ch.text_len();
// Count the number of backslashes before the newline character.
let mut backslash_count = 0;
while reverse_chars.next_if_eq(&'\\').is_some() {
backslash_count += 1;
}
if backslash_count == 0 {
// No escapes: `\n`
newline_position = Some(current_position);
} else {
if backslash_count % 2 == 0 {
// Even number of backslashes i.e., all backslashes cancel each other out
// which means the newline character is not being escaped.
newline_position = Some(current_position);
continue;
}

match ch {
'\n' => {
current_position -= ch.text_len();
if let Some(carriage_return) = reverse_chars.next_if_eq(&'\r') {
current_position -= carriage_return.text_len();
}
current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count);
}
'\r' => {
current_position -= ch.text_len();
}
_ => break,
}

debug_assert!(matches!(ch, '\n' | '\r'));

// Count the number of backslashes before the newline character.
let mut backslash_count = 0;
while reverse_chars.next_if_eq(&'\\').is_some() {
backslash_count += 1;
}

if backslash_count == 0 {
// No escapes: `\n`
newline_position = Some(current_position);
} else {
break;
if backslash_count % 2 == 0 {
// Even number of backslashes i.e., all backslashes cancel each other out
// which means the newline character is not being escaped.
newline_position = Some(current_position);
}
current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
---
source: crates/ruff_python_parser/tests/fixtures.rs
input_file: crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_windows_eol.py
---
## AST

```
Module(
ModModule {
range: 0..46,
body: [
Expr(
StmtExpr {
range: 0..10,
value: Call(
ExprCall {
range: 0..10,
func: Name(
ExprName {
range: 0..4,
id: "call",
ctx: Load,
},
),
arguments: Arguments {
range: 4..10,
args: [
Name(
ExprName {
range: 5..6,
id: "a",
ctx: Load,
},
),
Name(
ExprName {
range: 8..9,
id: "b",
ctx: Load,
},
),
],
keywords: [],
},
},
),
},
),
FunctionDef(
StmtFunctionDef {
range: 26..46,
is_async: false,
decorator_list: [],
name: Identifier {
id: "bar",
range: 30..33,
},
type_params: None,
parameters: Parameters {
range: 33..35,
posonlyargs: [],
args: [],
vararg: None,
kwonlyargs: [],
kwarg: None,
},
returns: None,
body: [
Pass(
StmtPass {
range: 42..46,
},
),
],
},
),
],
},
)
```
## Errors

|
1 | call(a, b, # comment \
2 | /
3 | | def bar():
| |_^ Syntax Error: Expected ')', found newline
4 | pass
|

0 comments on commit 47c9ed0

Please sign in to comment.