diff --git a/crates/ruff/src/checkers/logical_lines.rs b/crates/ruff/src/checkers/logical_lines.rs index 106e056f605e4..198205e73f5d0 100644 --- a/crates/ruff/src/checkers/logical_lines.rs +++ b/crates/ruff/src/checkers/logical_lines.rs @@ -43,6 +43,20 @@ pub fn check_logical_lines( ) -> Vec { let mut diagnostics = vec![]; + #[cfg(feature = "logical_lines")] + let should_fix_missing_whitespace = + autofix.into() && settings.rules.should_fix(Rule::MissingWhitespace); + + #[cfg(not(feature = "logical_lines"))] + let should_fix_missing_whitespace = false; + + #[cfg(feature = "logical_lines")] + let should_fix_whitespace_before_parameters = + autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters); + + #[cfg(not(feature = "logical_lines"))] + let should_fix_whitespace_before_parameters = false; + let indent_char = stylist.indentation().as_char(); let mut prev_line = None; let mut prev_indent_level = None; @@ -152,15 +166,12 @@ pub fn check_logical_lines( } } - #[cfg(feature = "logical_lines")] - let should_fix = autofix.into() && settings.rules.should_fix(Rule::MissingWhitespace); - - #[cfg(not(feature = "logical_lines"))] - let should_fix = false; - - for diagnostic in - missing_whitespace(line.text(), start_loc.row(), should_fix, indent_level) - { + for diagnostic in missing_whitespace( + line.text(), + start_loc.row(), + should_fix_missing_whitespace, + indent_level, + ) { if settings.rules.enabled(diagnostic.kind.rule()) { diagnostics.push(diagnostic); } @@ -168,14 +179,9 @@ pub fn check_logical_lines( } if line.flags().contains(TokenFlags::BRACKET) { - #[cfg(feature = "logical_lines")] - let should_fix = - autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters); - - #[cfg(not(feature = "logical_lines"))] - let should_fix = false; - - for diagnostic in whitespace_before_parameters(line.tokens(), should_fix) { + for diagnostic in + whitespace_before_parameters(line.tokens(), should_fix_whitespace_before_parameters) + { if settings.rules.enabled(diagnostic.kind.rule()) { diagnostics.push(diagnostic); } diff --git a/crates/ruff/src/rules/pycodestyle/logical_lines.rs b/crates/ruff/src/rules/pycodestyle/logical_lines.rs index 97b36ac9f4221..d5b26e5676b82 100644 --- a/crates/ruff/src/rules/pycodestyle/logical_lines.rs +++ b/crates/ruff/src/rules/pycodestyle/logical_lines.rs @@ -45,7 +45,8 @@ impl<'a> LogicalLines<'a> { assert!(u32::try_from(tokens.len()).is_ok()); let single_token = tokens.len() == 1; - let mut builder = LogicalLinesBuilder::with_token_capacity(tokens.len()); + let mut builder = + LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len()); let mut parens: u32 = 0; for (start, token, end) in tokens.iter().flatten() { @@ -280,10 +281,11 @@ pub struct LogicalLinesBuilder<'a> { } impl<'a> LogicalLinesBuilder<'a> { - fn with_token_capacity(capacity: usize) -> Self { + fn with_capacity(tokens: usize, string: usize) -> Self { Self { - tokens: Vec::with_capacity(capacity), - mappings: Mappings::with_capacity(capacity + 1), + tokens: Vec::with_capacity(tokens), + mappings: Mappings::with_capacity(tokens + 1), + text: String::with_capacity(string), ..Self::default() } } @@ -340,6 +342,9 @@ impl<'a> LogicalLinesBuilder<'a> { // TODO(charlie): "Mute" strings. let text = if let Tok::String { value, .. } = token { + // Replace the content of strings with a non-whs sequence because some lints + // search for whitespace in the document and whitespace inside of the string + // would complicate the search. Cow::Owned(format!("\"{}\"", "x".repeat(value.width()))) } else { Cow::Borrowed(locator.slice(Range { diff --git a/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs b/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs index 6cf1557457750..a7d8fba96d457 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs @@ -103,17 +103,16 @@ impl Violation for WhitespaceBeforePunctuation { // TODO(charlie): Pycodestyle has a negative lookahead on the end. static EXTRANEOUS_WHITESPACE_REGEX: Lazy = - Lazy::new(|| Regex::new(r"([\[({][ \t]|[ \t][]}),;:])").unwrap()); + Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap()); /// E201, E202, E203 #[cfg(feature = "logical_lines")] pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> { let mut diagnostics = vec![]; - for line_match in EXTRANEOUS_WHITESPACE_REGEX.captures_iter(line) { - let match_ = line_match.get(1).unwrap(); - let text = match_.as_str(); + for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) { + let text = &line[line_match.range()]; let char = text.trim(); - let found = match_.start(); + let found = line_match.start(); if text.chars().last().unwrap().is_ascii_whitespace() { diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into())); } else if line.chars().nth(found - 1).map_or(false, |c| c != ',') { diff --git a/crates/ruff/src/rules/pycodestyle/rules/mod.rs b/crates/ruff/src/rules/pycodestyle/rules/mod.rs index f36347fa4c832..97cdd26a59e4f 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/mod.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/mod.rs @@ -86,3 +86,60 @@ mod whitespace_around_keywords; mod whitespace_around_named_parameter_equals; mod whitespace_before_comment; mod whitespace_before_parameters; + +#[allow(unused)] +enum Whitespace { + None, + Single, + Many, + Tab, +} + +impl Whitespace { + #[allow(dead_code)] + fn leading(content: &str) -> (usize, Self) { + let mut offset = 0; + let mut kind = Self::None; + + for c in content.chars() { + if c == '\t' { + kind = Self::Tab; + offset += 1; + } else if c.is_whitespace() { + kind = match kind { + Whitespace::None => Whitespace::Single, + Whitespace::Single | Whitespace::Many => Whitespace::Many, + Whitespace::Tab => Whitespace::Tab, + }; + offset += c.len_utf8(); + } else { + break; + } + } + + (offset, kind) + } + + #[allow(dead_code)] + fn trailing(content: &str) -> (Self, usize) { + let mut count = 0u32; + let mut offset = 0; + + for c in content.chars().rev() { + if c == '\t' { + return (Self::Tab, offset + 1); + } else if c.is_whitespace() { + count += 1; + offset += c.len_utf8(); + } else { + break; + } + } + + match count { + 0 => (Self::None, 0), + 1 => (Self::Single, offset), + _ => (Self::Many, offset), + } + } +} diff --git a/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs b/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs index fedc6b69e2384..81bda1401afb6 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs @@ -2,10 +2,15 @@ use once_cell::sync::Lazy; use regex::Regex; +use rustpython_parser::ast::Location; +use rustpython_parser::Tok; +use crate::rules::pycodestyle::helpers::is_op_token; +use crate::rules::pycodestyle::rules::Whitespace; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::source_code::Locator; /// ## What it does /// Checks for extraneous tabs before an operator. @@ -123,28 +128,41 @@ impl Violation for MultipleSpacesAfterOperator { } } -static OPERATOR_REGEX: Lazy = - Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap()); +static OPERATOR_REGEX: Lazy = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap()); /// E221, E222, E223, E224 #[cfg(feature = "logical_lines")] pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> { let mut diagnostics = vec![]; - for line_match in OPERATOR_REGEX.captures_iter(line) { - let before = line_match.get(1).unwrap(); - let after = line_match.get(2).unwrap(); - - if before.as_str().contains('\t') { - diagnostics.push((before.start(), TabBeforeOperator.into())); - } else if before.as_str().len() > 1 { - diagnostics.push((before.start(), MultipleSpacesBeforeOperator.into())); + let mut last_end = None; + + for line_match in OPERATOR_REGEX.find_iter(line) { + if last_end != Some(line_match.start()) { + let before = &line[..line_match.start()]; + + match Whitespace::trailing(before) { + (Whitespace::Tab, offset) => { + diagnostics.push((line_match.start() - offset, TabBeforeOperator.into())); + } + (Whitespace::Many, offset) => diagnostics.push(( + line_match.start() - offset, + MultipleSpacesBeforeOperator.into(), + )), + _ => {} + } } - if after.as_str().contains('\t') { - diagnostics.push((after.start(), TabAfterOperator.into())); - } else if after.as_str().len() > 1 { - diagnostics.push((after.start(), MultipleSpacesAfterOperator.into())); + let after = &line[line_match.end()..]; + let (leading_offset, leading_kind) = Whitespace::leading(after); + match leading_kind { + Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())), + Whitespace::Many => { + diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into())); + } + _ => {} } + + last_end = Some(line_match.end() + leading_offset); } diagnostics } diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs index f7c056b917a6e..a6eb880b6df3b 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs @@ -3,6 +3,7 @@ use once_cell::sync::Lazy; use regex::Regex; +use crate::rules::pycodestyle::rules::Whitespace; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; @@ -111,28 +112,41 @@ impl Violation for TabBeforeKeyword { } static KEYWORD_REGEX: Lazy = Lazy::new(|| { - Regex::new(r"(\s*)\b(?:False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b(\s*)").unwrap() + Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap() }); /// E271, E272, E273, E274 #[cfg(feature = "logical_lines")] pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> { let mut diagnostics = vec![]; - for line_match in KEYWORD_REGEX.captures_iter(line) { - let before = line_match.get(1).unwrap(); - let after = line_match.get(2).unwrap(); - - if before.as_str().contains('\t') { - diagnostics.push((before.start(), TabBeforeKeyword.into())); - } else if before.as_str().len() > 1 { - diagnostics.push((before.start(), MultipleSpacesBeforeKeyword.into())); + let mut last_end = None; + + for line_match in KEYWORD_REGEX.find_iter(line) { + if last_end != Some(line_match.start()) { + let before = &line[..line_match.start()]; + match Whitespace::trailing(before) { + (Whitespace::Tab, offset) => { + diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into())); + } + (Whitespace::Many, offset) => diagnostics.push(( + line_match.start() - offset, + MultipleSpacesBeforeKeyword.into(), + )), + _ => {} + } } - if after.as_str().contains('\t') { - diagnostics.push((after.start(), TabAfterKeyword.into())); - } else if after.as_str().len() > 1 { - diagnostics.push((after.start(), MultipleSpacesAfterKeyword.into())); + let after = &line[line_match.end()..]; + let (leading_offset, leading_kind) = Whitespace::leading(after); + match leading_kind { + Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())), + Whitespace::Many => { + diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into())); + } + _ => {} } + + last_end = Some(line_match.end() + leading_offset); } diagnostics } diff --git a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap index 511063725035f..62caae28b8f1f 100644 --- a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap +++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap @@ -9,10 +9,10 @@ expression: diagnostics fixable: false location: row: 28 - column: 1 + column: 2 end_location: row: 28 - column: 1 + column: 2 fix: edits: [] parent: ~ @@ -23,10 +23,10 @@ expression: diagnostics fixable: false location: row: 30 - column: 4 + column: 5 end_location: row: 30 - column: 4 + column: 5 fix: edits: [] parent: ~