From 59e7f102a4d64f4d0e4c562d419e4071c17bcf87 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Sat, 25 Mar 2023 11:55:39 +0100 Subject: [PATCH] Avoid using Regex captures --- .../src/rules/pycodestyle/logical_lines.rs | 13 ++-- .../rules/extraneous_whitespace.rs | 9 ++- .../ruff/src/rules/pycodestyle/rules/mod.rs | 54 ++++++++++++++++ .../rules/space_around_operator.rs | 38 +++++++---- .../rules/whitespace_around_keywords.rs | 33 ++++++---- ...ules__pycodestyle__tests__E221_E22.py.snap | 28 ++++++++ ...ules__pycodestyle__tests__E272_E27.py.snap | 28 ++++++++ ...ules__pycodestyle__tests__E274_E27.py.snap | 64 +++++++++++++++++-- 8 files changed, 227 insertions(+), 40 deletions(-) diff --git a/crates/ruff/src/rules/pycodestyle/logical_lines.rs b/crates/ruff/src/rules/pycodestyle/logical_lines.rs index 97b36ac9f42212..3c1e55da3b18e3 100644 --- a/crates/ruff/src/rules/pycodestyle/logical_lines.rs +++ b/crates/ruff/src/rules/pycodestyle/logical_lines.rs @@ -45,7 +45,8 @@ impl<'a> LogicalLines<'a> { assert!(u32::try_from(tokens.len()).is_ok()); let single_token = tokens.len() == 1; - let mut builder = LogicalLinesBuilder::with_token_capacity(tokens.len()); + let mut builder = + LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len()); let mut parens: u32 = 0; for (start, token, end) in tokens.iter().flatten() { @@ -280,10 +281,11 @@ pub struct LogicalLinesBuilder<'a> { } impl<'a> LogicalLinesBuilder<'a> { - fn with_token_capacity(capacity: usize) -> Self { + fn with_capacity(tokens: usize, string: usize) -> Self { Self { - tokens: Vec::with_capacity(capacity), - mappings: Mappings::with_capacity(capacity + 1), + tokens: Vec::with_capacity(tokens), + mappings: Mappings::with_capacity(tokens + 1), + text: String::with_capacity(string), ..Self::default() } } @@ -340,6 +342,9 @@ impl<'a> LogicalLinesBuilder<'a> { // TODO(charlie): "Mute" strings. let text = if let Tok::String { value, .. } = token { + // Replace the content of strings with a non-whs sequence because some lints + // search for whitespace in the document and whitespace inside of the strinig + // would complicate the search. Cow::Owned(format!("\"{}\"", "x".repeat(value.width()))) } else { Cow::Borrowed(locator.slice(Range { diff --git a/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs b/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs index 6cf15574577509..a7d8fba96d457c 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs @@ -103,17 +103,16 @@ impl Violation for WhitespaceBeforePunctuation { // TODO(charlie): Pycodestyle has a negative lookahead on the end. static EXTRANEOUS_WHITESPACE_REGEX: Lazy = - Lazy::new(|| Regex::new(r"([\[({][ \t]|[ \t][]}),;:])").unwrap()); + Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap()); /// E201, E202, E203 #[cfg(feature = "logical_lines")] pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> { let mut diagnostics = vec![]; - for line_match in EXTRANEOUS_WHITESPACE_REGEX.captures_iter(line) { - let match_ = line_match.get(1).unwrap(); - let text = match_.as_str(); + for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) { + let text = &line[line_match.range()]; let char = text.trim(); - let found = match_.start(); + let found = line_match.start(); if text.chars().last().unwrap().is_ascii_whitespace() { diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into())); } else if line.chars().nth(found - 1).map_or(false, |c| c != ',') { diff --git a/crates/ruff/src/rules/pycodestyle/rules/mod.rs b/crates/ruff/src/rules/pycodestyle/rules/mod.rs index f36347fa4c8327..2e79852166483e 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/mod.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/mod.rs @@ -86,3 +86,57 @@ mod whitespace_around_keywords; mod whitespace_around_named_parameter_equals; mod whitespace_before_comment; mod whitespace_before_parameters; + +#[allow(unused)] +enum Whitespace { + None, + Single, + Many, + Tab, +} + +impl Whitespace { + #[allow(dead_code)] + fn leading(content: &str) -> Self { + let mut count = 0u32; + + for c in content.chars() { + if c == '\t' { + return Self::Tab; + } else if c.is_whitespace() { + count += 1; + } else { + break; + } + } + + match count { + 0 => Self::None, + 1 => Self::Single, + _ => Self::Many, + } + } + + #[allow(dead_code)] + fn trailing(content: &str) -> (Self, usize) { + let mut count = 0u32; + let mut offset = 0; + + for c in content.chars().rev() { + if c == '\t' { + return (Self::Tab, offset + 1); + } else if c.is_whitespace() { + count += 1; + offset += c.len_utf8(); + } else { + break; + } + } + + match count { + 0 => (Self::None, 0), + 1 => (Self::Single, offset), + _ => (Self::Many, offset), + } + } +} diff --git a/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs b/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs index fedc6b69e2384d..08fff10c2217f3 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs @@ -2,10 +2,15 @@ use once_cell::sync::Lazy; use regex::Regex; +use rustpython_parser::ast::Location; +use rustpython_parser::Tok; +use crate::rules::pycodestyle::helpers::is_op_token; +use crate::rules::pycodestyle::rules::Whitespace; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::source_code::Locator; /// ## What it does /// Checks for extraneous tabs before an operator. @@ -123,27 +128,32 @@ impl Violation for MultipleSpacesAfterOperator { } } -static OPERATOR_REGEX: Lazy = - Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap()); +static OPERATOR_REGEX: Lazy = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap()); /// E221, E222, E223, E224 #[cfg(feature = "logical_lines")] pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> { let mut diagnostics = vec![]; - for line_match in OPERATOR_REGEX.captures_iter(line) { - let before = line_match.get(1).unwrap(); - let after = line_match.get(2).unwrap(); - - if before.as_str().contains('\t') { - diagnostics.push((before.start(), TabBeforeOperator.into())); - } else if before.as_str().len() > 1 { - diagnostics.push((before.start(), MultipleSpacesBeforeOperator.into())); + for line_match in OPERATOR_REGEX.find_iter(line) { + let before = &line[..line_match.start()]; + match Whitespace::trailing(before) { + (Whitespace::Tab, offset) => { + diagnostics.push((line_match.start() - offset, TabBeforeOperator.into())); + } + (Whitespace::Many, offset) => diagnostics.push(( + line_match.start() - offset, + MultipleSpacesBeforeOperator.into(), + )), + _ => {} } - if after.as_str().contains('\t') { - diagnostics.push((after.start(), TabAfterOperator.into())); - } else if after.as_str().len() > 1 { - diagnostics.push((after.start(), MultipleSpacesAfterOperator.into())); + let after = &line[line_match.end()..]; + match Whitespace::leading(after) { + Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())), + Whitespace::Many => { + diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into())); + } + _ => {} } } diagnostics diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs index f7c056b917a6ef..1d3d16d11fb653 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs @@ -3,6 +3,7 @@ use once_cell::sync::Lazy; use regex::Regex; +use crate::rules::pycodestyle::rules::Whitespace; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; @@ -111,27 +112,33 @@ impl Violation for TabBeforeKeyword { } static KEYWORD_REGEX: Lazy = Lazy::new(|| { - Regex::new(r"(\s*)\b(?:False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b(\s*)").unwrap() + Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap() }); /// E271, E272, E273, E274 #[cfg(feature = "logical_lines")] pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> { let mut diagnostics = vec![]; - for line_match in KEYWORD_REGEX.captures_iter(line) { - let before = line_match.get(1).unwrap(); - let after = line_match.get(2).unwrap(); - - if before.as_str().contains('\t') { - diagnostics.push((before.start(), TabBeforeKeyword.into())); - } else if before.as_str().len() > 1 { - diagnostics.push((before.start(), MultipleSpacesBeforeKeyword.into())); + for line_match in KEYWORD_REGEX.find_iter(line) { + let before = &line[..line_match.start()]; + match Whitespace::trailing(before) { + (Whitespace::Tab, offset) => { + diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into())); + } + (Whitespace::Many, offset) => diagnostics.push(( + line_match.start() - offset, + MultipleSpacesBeforeKeyword.into(), + )), + _ => {} } - if after.as_str().contains('\t') { - diagnostics.push((after.start(), TabAfterKeyword.into())); - } else if after.as_str().len() > 1 { - diagnostics.push((after.start(), MultipleSpacesAfterKeyword.into())); + let after = &line[line_match.end()..]; + match Whitespace::leading(after) { + Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())), + Whitespace::Many => { + diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into())); + } + _ => {} } } diagnostics diff --git a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E221_E22.py.snap b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E221_E22.py.snap index a980f134779b6c..9992378d62e7c9 100644 --- a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E221_E22.py.snap +++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E221_E22.py.snap @@ -114,4 +114,32 @@ expression: diagnostics fix: edits: [] parent: ~ +- kind: + name: MultipleSpacesBeforeOperator + body: Multiple spaces before operator + suggestion: ~ + fixable: false + location: + row: 31 + column: 3 + end_location: + row: 31 + column: 3 + fix: + edits: [] + parent: ~ +- kind: + name: MultipleSpacesBeforeOperator + body: Multiple spaces before operator + suggestion: ~ + fixable: false + location: + row: 32 + column: 3 + end_location: + row: 32 + column: 3 + fix: + edits: [] + parent: ~ diff --git a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E272_E27.py.snap b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E272_E27.py.snap index 7b082431d1ffee..f9cb57dfff6c0b 100644 --- a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E272_E27.py.snap +++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E272_E27.py.snap @@ -2,6 +2,34 @@ source: crates/ruff/src/rules/pycodestyle/mod.rs expression: diagnostics --- +- kind: + name: MultipleSpacesBeforeKeyword + body: Multiple spaces before keyword + suggestion: ~ + fixable: false + location: + row: 4 + column: 8 + end_location: + row: 4 + column: 8 + fix: + edits: [] + parent: ~ +- kind: + name: MultipleSpacesBeforeKeyword + body: Multiple spaces before keyword + suggestion: ~ + fixable: false + location: + row: 6 + column: 4 + end_location: + row: 6 + column: 4 + fix: + edits: [] + parent: ~ - kind: name: MultipleSpacesBeforeKeyword body: Multiple spaces before keyword diff --git a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap index 511063725035f3..73711d250914a8 100644 --- a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap +++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap @@ -2,6 +2,48 @@ source: crates/ruff/src/rules/pycodestyle/mod.rs expression: diagnostics --- +- kind: + name: TabBeforeKeyword + body: Tab before keyword + suggestion: ~ + fixable: false + location: + row: 10 + column: 9 + end_location: + row: 10 + column: 9 + fix: + edits: [] + parent: ~ +- kind: + name: TabBeforeKeyword + body: Tab before keyword + suggestion: ~ + fixable: false + location: + row: 12 + column: 5 + end_location: + row: 12 + column: 5 + fix: + edits: [] + parent: ~ +- kind: + name: TabBeforeKeyword + body: Tab before keyword + suggestion: ~ + fixable: false + location: + row: 12 + column: 9 + end_location: + row: 12 + column: 9 + fix: + edits: [] + parent: ~ - kind: name: TabBeforeKeyword body: Tab before keyword @@ -9,10 +51,24 @@ expression: diagnostics fixable: false location: row: 28 - column: 1 + column: 2 end_location: row: 28 - column: 1 + column: 2 + fix: + edits: [] + parent: ~ +- kind: + name: TabBeforeKeyword + body: Tab before keyword + suggestion: ~ + fixable: false + location: + row: 30 + column: 5 + end_location: + row: 30 + column: 5 fix: edits: [] parent: ~ @@ -23,10 +79,10 @@ expression: diagnostics fixable: false location: row: 30 - column: 4 + column: 9 end_location: row: 30 - column: 4 + column: 9 fix: edits: [] parent: ~