From 7c82846a6b6e8059a8d95f76514c2de9eafc457f Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Sat, 25 Mar 2023 11:55:39 +0100 Subject: [PATCH] Avoid using Regex captures --- .../src/rules/pycodestyle/logical_lines.rs | 13 ++-- .../rules/extraneous_whitespace.rs | 9 ++- .../ruff/src/rules/pycodestyle/rules/mod.rs | 51 ++++++++++++++++ .../rules/space_around_operator.rs | 38 +++++++----- .../rules/whitespace_around_keywords.rs | 33 ++++++---- ...ules__pycodestyle__tests__E221_E22.py.snap | 26 ++++++++ ...ules__pycodestyle__tests__E272_E27.py.snap | 26 ++++++++ ...ules__pycodestyle__tests__E274_E27.py.snap | 60 +++++++++++++++++-- 8 files changed, 216 insertions(+), 40 deletions(-) diff --git a/crates/ruff/src/rules/pycodestyle/logical_lines.rs b/crates/ruff/src/rules/pycodestyle/logical_lines.rs index 40ba65d1bac02..c8b41c6cd06d0 100644 --- a/crates/ruff/src/rules/pycodestyle/logical_lines.rs +++ b/crates/ruff/src/rules/pycodestyle/logical_lines.rs @@ -45,7 +45,8 @@ impl<'a> LogicalLines<'a> { assert!(u32::try_from(tokens.len()).is_ok()); let single_token = tokens.len() == 1; - let mut builder = LogicalLinesBuilder::with_token_capacity(tokens.len()); + let mut builder = + LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len()); let mut parens: u32 = 0; for (start, token, end) in tokens.iter().flatten() { @@ -280,10 +281,11 @@ pub struct LogicalLinesBuilder<'a> { } impl<'a> LogicalLinesBuilder<'a> { - fn with_token_capacity(capacity: usize) -> Self { + fn with_capacity(tokens: usize, string: usize) -> Self { Self { - tokens: Vec::with_capacity(capacity), - mappings: Mappings::with_capacity(capacity + 1), + tokens: Vec::with_capacity(tokens), + mappings: Mappings::with_capacity(tokens + 1), + text: String::with_capacity(string), ..Self::default() } } @@ -340,6 +342,9 @@ impl<'a> LogicalLinesBuilder<'a> { // TODO(charlie): "Mute" strings. let text = if let Tok::String { value, .. } = token { + // Replace the content of strings with a non-whs sequence because some lints + // search for whitespace in the document and whitespace inside of the strinig + // would complicate the search. Cow::Owned(format!("\"{}\"", "x".repeat(value.width()))) } else { Cow::Borrowed(locator.slice(Range { diff --git a/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs b/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs index 6cf1557457750..a7d8fba96d457 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs @@ -103,17 +103,16 @@ impl Violation for WhitespaceBeforePunctuation { // TODO(charlie): Pycodestyle has a negative lookahead on the end. static EXTRANEOUS_WHITESPACE_REGEX: Lazy = - Lazy::new(|| Regex::new(r"([\[({][ \t]|[ \t][]}),;:])").unwrap()); + Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap()); /// E201, E202, E203 #[cfg(feature = "logical_lines")] pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> { let mut diagnostics = vec![]; - for line_match in EXTRANEOUS_WHITESPACE_REGEX.captures_iter(line) { - let match_ = line_match.get(1).unwrap(); - let text = match_.as_str(); + for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) { + let text = &line[line_match.range()]; let char = text.trim(); - let found = match_.start(); + let found = line_match.start(); if text.chars().last().unwrap().is_ascii_whitespace() { diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into())); } else if line.chars().nth(found - 1).map_or(false, |c| c != ',') { diff --git a/crates/ruff/src/rules/pycodestyle/rules/mod.rs b/crates/ruff/src/rules/pycodestyle/rules/mod.rs index f36347fa4c832..adafe3eaf855c 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/mod.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/mod.rs @@ -86,3 +86,54 @@ mod whitespace_around_keywords; mod whitespace_around_named_parameter_equals; mod whitespace_before_comment; mod whitespace_before_parameters; + +enum Whitespace { + None, + Single, + Many, + Tab, +} + +impl Whitespace { + fn leading(content: &str) -> Self { + let mut count = 0u32; + + for c in content.chars() { + if c == '\t' { + return Self::Tab; + } else if c.is_whitespace() { + count += 1; + } else { + break; + } + } + + match count { + 0 => Self::None, + 1 => Self::Single, + _ => Self::Many, + } + } + + fn trailing(content: &str) -> (Self, usize) { + let mut count = 0u32; + let mut offset = 0; + + for c in content.chars().rev() { + if c == '\t' { + return (Self::Tab, offset + 1); + } else if c.is_whitespace() { + count += 1; + offset += c.len_utf8(); + } else { + break; + } + } + + match count { + 0 => (Self::None, 0), + 1 => (Self::Single, offset), + _ => (Self::Many, offset), + } + } +} diff --git a/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs b/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs index fedc6b69e2384..5dd8fabad4da6 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs @@ -2,10 +2,15 @@ use once_cell::sync::Lazy; use regex::Regex; +use rustpython_parser::ast::Location; +use rustpython_parser::Tok; +use crate::rules::pycodestyle::helpers::is_op_token; +use crate::rules::pycodestyle::rules::Whitespace; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::source_code::Locator; /// ## What it does /// Checks for extraneous tabs before an operator. @@ -123,27 +128,32 @@ impl Violation for MultipleSpacesAfterOperator { } } -static OPERATOR_REGEX: Lazy = - Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap()); +static OPERATOR_REGEX: Lazy = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap()); /// E221, E222, E223, E224 #[cfg(feature = "logical_lines")] pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> { let mut diagnostics = vec![]; - for line_match in OPERATOR_REGEX.captures_iter(line) { - let before = line_match.get(1).unwrap(); - let after = line_match.get(2).unwrap(); - - if before.as_str().contains('\t') { - diagnostics.push((before.start(), TabBeforeOperator.into())); - } else if before.as_str().len() > 1 { - diagnostics.push((before.start(), MultipleSpacesBeforeOperator.into())); + for line_match in OPERATOR_REGEX.find_iter(line) { + let before = &line[..line_match.start()]; + match Whitespace::trailing(before) { + (Whitespace::Tab, offset) => { + diagnostics.push((line_match.start() - offset, TabBeforeOperator.into())) + } + (Whitespace::Many, offset) => diagnostics.push(( + line_match.start() - offset, + MultipleSpacesBeforeOperator.into(), + )), + _ => {} } - if after.as_str().contains('\t') { - diagnostics.push((after.start(), TabAfterOperator.into())); - } else if after.as_str().len() > 1 { - diagnostics.push((after.start(), MultipleSpacesAfterOperator.into())); + let after = &line[line_match.end()..]; + match Whitespace::leading(after) { + Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())), + Whitespace::Many => { + diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into())) + } + _ => {} } } diagnostics diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs index f7c056b917a6e..283f3460ab79d 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs @@ -3,6 +3,7 @@ use once_cell::sync::Lazy; use regex::Regex; +use crate::rules::pycodestyle::rules::Whitespace; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; @@ -111,27 +112,33 @@ impl Violation for TabBeforeKeyword { } static KEYWORD_REGEX: Lazy = Lazy::new(|| { - Regex::new(r"(\s*)\b(?:False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b(\s*)").unwrap() + Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap() }); /// E271, E272, E273, E274 #[cfg(feature = "logical_lines")] pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> { let mut diagnostics = vec![]; - for line_match in KEYWORD_REGEX.captures_iter(line) { - let before = line_match.get(1).unwrap(); - let after = line_match.get(2).unwrap(); - - if before.as_str().contains('\t') { - diagnostics.push((before.start(), TabBeforeKeyword.into())); - } else if before.as_str().len() > 1 { - diagnostics.push((before.start(), MultipleSpacesBeforeKeyword.into())); + for line_match in KEYWORD_REGEX.find_iter(line) { + let before = &line[..line_match.start()]; + match Whitespace::trailing(before) { + (Whitespace::Tab, offset) => { + diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into())) + } + (Whitespace::Many, offset) => diagnostics.push(( + line_match.start() - offset, + MultipleSpacesBeforeKeyword.into(), + )), + _ => {} } - if after.as_str().contains('\t') { - diagnostics.push((after.start(), TabAfterKeyword.into())); - } else if after.as_str().len() > 1 { - diagnostics.push((after.start(), MultipleSpacesAfterKeyword.into())); + let after = &line[line_match.end()..]; + match Whitespace::leading(after) { + Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())), + Whitespace::Many => { + diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into())) + } + _ => {} } } diagnostics diff --git a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E221_E22.py.snap b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E221_E22.py.snap index 1c2446595da00..80fad4d696e1f 100644 --- a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E221_E22.py.snap +++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E221_E22.py.snap @@ -106,4 +106,30 @@ expression: diagnostics column: 13 fix: ~ parent: ~ +- kind: + name: MultipleSpacesBeforeOperator + body: Multiple spaces before operator + suggestion: ~ + fixable: false + location: + row: 31 + column: 3 + end_location: + row: 31 + column: 3 + fix: ~ + parent: ~ +- kind: + name: MultipleSpacesBeforeOperator + body: Multiple spaces before operator + suggestion: ~ + fixable: false + location: + row: 32 + column: 3 + end_location: + row: 32 + column: 3 + fix: ~ + parent: ~ diff --git a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E272_E27.py.snap b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E272_E27.py.snap index 6c5592bd4803c..b4ae3418e2b50 100644 --- a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E272_E27.py.snap +++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E272_E27.py.snap @@ -2,6 +2,32 @@ source: crates/ruff/src/rules/pycodestyle/mod.rs expression: diagnostics --- +- kind: + name: MultipleSpacesBeforeKeyword + body: Multiple spaces before keyword + suggestion: ~ + fixable: false + location: + row: 4 + column: 8 + end_location: + row: 4 + column: 8 + fix: ~ + parent: ~ +- kind: + name: MultipleSpacesBeforeKeyword + body: Multiple spaces before keyword + suggestion: ~ + fixable: false + location: + row: 6 + column: 4 + end_location: + row: 6 + column: 4 + fix: ~ + parent: ~ - kind: name: MultipleSpacesBeforeKeyword body: Multiple spaces before keyword diff --git a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap index b080c2919b0ca..52dcc5cf9a935 100644 --- a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap +++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap @@ -2,6 +2,45 @@ source: crates/ruff/src/rules/pycodestyle/mod.rs expression: diagnostics --- +- kind: + name: TabBeforeKeyword + body: Tab before keyword + suggestion: ~ + fixable: false + location: + row: 10 + column: 9 + end_location: + row: 10 + column: 9 + fix: ~ + parent: ~ +- kind: + name: TabBeforeKeyword + body: Tab before keyword + suggestion: ~ + fixable: false + location: + row: 12 + column: 5 + end_location: + row: 12 + column: 5 + fix: ~ + parent: ~ +- kind: + name: TabBeforeKeyword + body: Tab before keyword + suggestion: ~ + fixable: false + location: + row: 12 + column: 9 + end_location: + row: 12 + column: 9 + fix: ~ + parent: ~ - kind: name: TabBeforeKeyword body: Tab before keyword @@ -9,10 +48,23 @@ expression: diagnostics fixable: false location: row: 28 - column: 1 + column: 2 end_location: row: 28 - column: 1 + column: 2 + fix: ~ + parent: ~ +- kind: + name: TabBeforeKeyword + body: Tab before keyword + suggestion: ~ + fixable: false + location: + row: 30 + column: 5 + end_location: + row: 30 + column: 5 fix: ~ parent: ~ - kind: @@ -22,10 +74,10 @@ expression: diagnostics fixable: false location: row: 30 - column: 4 + column: 9 end_location: row: 30 - column: 4 + column: 9 fix: ~ parent: ~