Skip to content

Commit

Permalink
Fix unicode handling in PLE2515
Browse files Browse the repository at this point in the history
Previously, we used byte indices when we should have used char indices, causing crashes when the were non-ascii characters before our replaces

Fixes #3716
  • Loading branch information
konstin committed Apr 6, 2023
1 parent 34e9786 commit de7e379
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 2 deletions.
Binary file modified crates/ruff/resources/test/fixtures/pylint/invalid_characters.py
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,9 @@ pub fn invalid_string_characters(
let text = locator.slice(Range::new(start, end));

for (row, line) in UniversalNewlineIterator::from(text).enumerate() {
for (column, match_) in line.match_indices(&['\x08', '\x1A', '\x1B', '\0', '\u{200b}']) {
for (column_bytes, match_) in
line.match_indices(&['\x08', '\x1A', '\x1B', '\0', '\u{200b}'])
{
let (replacement, rule): (&str, DiagnosticKind) = match match_.chars().next().unwrap() {
'\x08' => ("\\b", InvalidCharacterBackspace.into()),
'\x1A' => ("\\x1A", InvalidCharacterSub.into()),
Expand All @@ -193,7 +195,8 @@ pub fn invalid_string_characters(
'\u{200b}' => ("\\u200b", InvalidCharacterZeroWidthSpace.into()),
_ => unreachable!(),
};
let location = helpers::to_absolute(Location::new(row + 1, column), start);
let column_chars = line[..column_bytes].chars().count();
let location = helpers::to_absolute(Location::new(row + 1, column_chars), start);
let end_location = Location::new(location.row(), location.column() + 1);
let mut diagnostic = Diagnostic::new(rule, Range::new(location, end_location));
if autofix {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,67 @@ expression: diagnostics
row: 34
column: 13
parent: ~
- kind:
name: InvalidCharacterZeroWidthSpace
body: "Invalid unescaped character zero-width-space, use \"\\u200B\" instead"
suggestion: Replace with escape sequence
fixable: true
location:
row: 38
column: 35
end_location:
row: 38
column: 36
fix:
edits:
- content: "\\u200b"
location:
row: 38
column: 35
end_location:
row: 38
column: 36
parent: ~
- kind:
name: InvalidCharacterZeroWidthSpace
body: "Invalid unescaped character zero-width-space, use \"\\u200B\" instead"
suggestion: Replace with escape sequence
fixable: true
location:
row: 39
column: 59
end_location:
row: 39
column: 60
fix:
edits:
- content: "\\u200b"
location:
row: 39
column: 59
end_location:
row: 39
column: 60
parent: ~
- kind:
name: InvalidCharacterZeroWidthSpace
body: "Invalid unescaped character zero-width-space, use \"\\u200B\" instead"
suggestion: Replace with escape sequence
fixable: true
location:
row: 39
column: 60
end_location:
row: 39
column: 61
fix:
edits:
- content: "\\u200b"
location:
row: 39
column: 60
end_location:
row: 39
column: 61
parent: ~

0 comments on commit de7e379

Please sign in to comment.