From de7e379cae0cf4910bd771fd495092472c6c5740 Mon Sep 17 00:00:00 2001 From: konstin Date: Thu, 6 Apr 2023 12:58:36 +0200 Subject: [PATCH] Fix unicode handling in PLE2515 Previously, we used byte indices when we should have used char indices, causing crashes when the were non-ascii characters before our replaces Fixes #3716 --- .../fixtures/pylint/invalid_characters.py | Bin 972 -> 1106 bytes .../pylint/rules/invalid_string_characters.rs | 7 +- ..._tests__PLE2515_invalid_characters.py.snap | 63 ++++++++++++++++++ 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/crates/ruff/resources/test/fixtures/pylint/invalid_characters.py b/crates/ruff/resources/test/fixtures/pylint/invalid_characters.py index ecc681b112288e93baa23e2f27c688d3d7fb907d..6233ed9353308d93ea948e16755fa23d395c9552 100644 GIT binary patch delta 143 zcmX@Zeu-nl8RmMfs`BE3_{6l5)S~#@(wvgaq{@=i_~eYlqQqpNn1Zc>(t}N_A2oC< zVW@;Ficc>}EXYXB1?tNwEk@{iu(|iarVS4^?R&6k1(?0#!KVEWHthhjr#{%+rSOQo Ip&M*A0OWf{9RL6T delta 7 Ocmcb_afW@v8D; ("\\b", InvalidCharacterBackspace.into()), '\x1A' => ("\\x1A", InvalidCharacterSub.into()), @@ -193,7 +195,8 @@ pub fn invalid_string_characters( '\u{200b}' => ("\\u200b", InvalidCharacterZeroWidthSpace.into()), _ => unreachable!(), }; - let location = helpers::to_absolute(Location::new(row + 1, column), start); + let column_chars = line[..column_bytes].chars().count(); + let location = helpers::to_absolute(Location::new(row + 1, column_chars), start); let end_location = Location::new(location.row(), location.column() + 1); let mut diagnostic = Diagnostic::new(rule, Range::new(location, end_location)); if autofix { diff --git a/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE2515_invalid_characters.py.snap b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE2515_invalid_characters.py.snap index 4bc299394025f..272ae53441add 100644 --- a/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE2515_invalid_characters.py.snap +++ b/crates/ruff/src/rules/pylint/snapshots/ruff__rules__pylint__tests__PLE2515_invalid_characters.py.snap @@ -23,4 +23,67 @@ expression: diagnostics row: 34 column: 13 parent: ~ +- kind: + name: InvalidCharacterZeroWidthSpace + body: "Invalid unescaped character zero-width-space, use \"\\u200B\" instead" + suggestion: Replace with escape sequence + fixable: true + location: + row: 38 + column: 35 + end_location: + row: 38 + column: 36 + fix: + edits: + - content: "\\u200b" + location: + row: 38 + column: 35 + end_location: + row: 38 + column: 36 + parent: ~ +- kind: + name: InvalidCharacterZeroWidthSpace + body: "Invalid unescaped character zero-width-space, use \"\\u200B\" instead" + suggestion: Replace with escape sequence + fixable: true + location: + row: 39 + column: 59 + end_location: + row: 39 + column: 60 + fix: + edits: + - content: "\\u200b" + location: + row: 39 + column: 59 + end_location: + row: 39 + column: 60 + parent: ~ +- kind: + name: InvalidCharacterZeroWidthSpace + body: "Invalid unescaped character zero-width-space, use \"\\u200B\" instead" + suggestion: Replace with escape sequence + fixable: true + location: + row: 39 + column: 60 + end_location: + row: 39 + column: 61 + fix: + edits: + - content: "\\u200b" + location: + row: 39 + column: 60 + end_location: + row: 39 + column: 61 + parent: ~