astral-sh · MichaReiser · Mar 28, 2023 · Mar 25, 2023 · MichaReiser · Mar 27, 2023
diff --git a/crates/ruff/src/checkers/logical_lines.rs b/crates/ruff/src/checkers/logical_lines.rs
@@ -43,6 +43,20 @@ pub fn check_logical_lines(
 ) -> Vec<Diagnostic> {
     let mut diagnostics = vec![];
 
+    #[cfg(feature = "logical_lines")]
+    let should_fix_missing_whitespace =
+        autofix.into() && settings.rules.should_fix(Rule::MissingWhitespace);
+
+    #[cfg(not(feature = "logical_lines"))]
+    let should_fix_missing_whitespace = false;
+
+    #[cfg(feature = "logical_lines")]
+    let should_fix_whitespace_before_parameters =
+        autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters);
+
+    #[cfg(not(feature = "logical_lines"))]
+    let should_fix_whitespace_before_parameters = false;
+
     let indent_char = stylist.indentation().as_char();
     let mut prev_line = None;
     let mut prev_indent_level = None;
@@ -152,30 +166,22 @@ pub fn check_logical_lines(
                 }
             }
 
-            #[cfg(feature = "logical_lines")]
-            let should_fix = autofix.into() && settings.rules.should_fix(Rule::MissingWhitespace);
-
-            #[cfg(not(feature = "logical_lines"))]
-            let should_fix = false;
-
-            for diagnostic in
-                missing_whitespace(line.text(), start_loc.row(), should_fix, indent_level)
-            {
+            for diagnostic in missing_whitespace(
+                line.text(),
+                start_loc.row(),
+                should_fix_missing_whitespace,
+                indent_level,
+            ) {
                 if settings.rules.enabled(diagnostic.kind.rule()) {
                     diagnostics.push(diagnostic);
                 }
             }
         }
 
         if line.flags().contains(TokenFlags::BRACKET) {
-            #[cfg(feature = "logical_lines")]
-            let should_fix =
-                autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters);
-
-            #[cfg(not(feature = "logical_lines"))]
-            let should_fix = false;
-
-            for diagnostic in whitespace_before_parameters(line.tokens(), should_fix) {
+            for diagnostic in
+                whitespace_before_parameters(line.tokens(), should_fix_whitespace_before_parameters)
+            {
                 if settings.rules.enabled(diagnostic.kind.rule()) {
                     diagnostics.push(diagnostic);
                 }

diff --git a/crates/ruff/src/rules/pycodestyle/logical_lines.rs b/crates/ruff/src/rules/pycodestyle/logical_lines.rs
@@ -45,7 +45,8 @@ impl<'a> LogicalLines<'a> {
         assert!(u32::try_from(tokens.len()).is_ok());
 
         let single_token = tokens.len() == 1;
-        let mut builder = LogicalLinesBuilder::with_token_capacity(tokens.len());
+        let mut builder =
+            LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len());
         let mut parens: u32 = 0;
 
         for (start, token, end) in tokens.iter().flatten() {
@@ -280,10 +281,11 @@ pub struct LogicalLinesBuilder<'a> {
 }
 
 impl<'a> LogicalLinesBuilder<'a> {
-    fn with_token_capacity(capacity: usize) -> Self {
+    fn with_capacity(tokens: usize, string: usize) -> Self {
         Self {
-            tokens: Vec::with_capacity(capacity),
-            mappings: Mappings::with_capacity(capacity + 1),
+            tokens: Vec::with_capacity(tokens),
+            mappings: Mappings::with_capacity(tokens + 1),
+            text: String::with_capacity(string),
             ..Self::default()
         }
     }
@@ -340,6 +342,9 @@ impl<'a> LogicalLinesBuilder<'a> {
 
         // TODO(charlie): "Mute" strings.
         let text = if let Tok::String { value, .. } = token {
+            // Replace the content of strings with a non-whs sequence because some lints
+            // search for whitespace in the document and whitespace inside of the string
+            // would complicate the search.
             Cow::Owned(format!("\"{}\"", "x".repeat(value.width())))
         } else {
             Cow::Borrowed(locator.slice(Range {

diff --git a/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs b/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs
@@ -103,17 +103,16 @@ impl Violation for WhitespaceBeforePunctuation {
 
 // TODO(charlie): Pycodestyle has a negative lookahead on the end.
 static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> =
-    Lazy::new(|| Regex::new(r"([\[({][ \t]|[ \t][]}),;:])").unwrap());
+    Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap());
 
 /// E201, E202, E203
 #[cfg(feature = "logical_lines")]
 pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> {
     let mut diagnostics = vec![];
-    for line_match in EXTRANEOUS_WHITESPACE_REGEX.captures_iter(line) {
-        let match_ = line_match.get(1).unwrap();
-        let text = match_.as_str();
+    for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) {
+        let text = &line[line_match.range()];
         let char = text.trim();
-        let found = match_.start();
+        let found = line_match.start();
         if text.chars().last().unwrap().is_ascii_whitespace() {
             diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into()));
         } else if line.chars().nth(found - 1).map_or(false, |c| c != ',') {

diff --git a/crates/ruff/src/rules/pycodestyle/rules/mod.rs b/crates/ruff/src/rules/pycodestyle/rules/mod.rs
@@ -86,3 +86,60 @@ mod whitespace_around_keywords;
 mod whitespace_around_named_parameter_equals;
 mod whitespace_before_comment;
 mod whitespace_before_parameters;
+
+#[allow(unused)]
+enum Whitespace {
+    None,
+    Single,
+    Many,
+    Tab,
+}
+
+impl Whitespace {
+    #[allow(dead_code)]
+    fn leading(content: &str) -> (usize, Self) {
+        let mut offset = 0;
+        let mut kind = Self::None;
+
+        for c in content.chars() {
+            if c == '\t' {
+                kind = Self::Tab;
+                offset += 1;
+            } else if c.is_whitespace() {
+                kind = match kind {
+                    Whitespace::None => Whitespace::Single,
+                    Whitespace::Single | Whitespace::Many => Whitespace::Many,
+                    Whitespace::Tab => Whitespace::Tab,
+                };
+                offset += c.len_utf8();
+            } else {
+                break;
+            }
+        }
+
+        (offset, kind)
+    }
+
+    #[allow(dead_code)]
+    fn trailing(content: &str) -> (Self, usize) {
+        let mut count = 0u32;
+        let mut offset = 0;
+
+        for c in content.chars().rev() {
+            if c == '\t' {
+                return (Self::Tab, offset + 1);
+            } else if c.is_whitespace() {
+                count += 1;
+                offset += c.len_utf8();
+            } else {
+                break;
+            }
+        }
+
+        match count {
+            0 => (Self::None, 0),
+            1 => (Self::Single, offset),
+            _ => (Self::Many, offset),
+        }
+    }
+}
diff --git a/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs b/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs
@@ -2,10 +2,15 @@
 
 use once_cell::sync::Lazy;
 use regex::Regex;
+use rustpython_parser::ast::Location;
+use rustpython_parser::Tok;
 
+use crate::rules::pycodestyle::helpers::is_op_token;
+use crate::rules::pycodestyle::rules::Whitespace;
 use ruff_diagnostics::DiagnosticKind;
 use ruff_diagnostics::Violation;
 use ruff_macros::{derive_message_formats, violation};
+use ruff_python_ast::source_code::Locator;
 
 /// ## What it does
 /// Checks for extraneous tabs before an operator.
@@ -123,28 +128,41 @@ impl Violation for MultipleSpacesAfterOperator {
     }
 }
 
-static OPERATOR_REGEX: Lazy<Regex> =
-    Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap());
+static OPERATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap());
 
 /// E221, E222, E223, E224
 #[cfg(feature = "logical_lines")]
 pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> {
     let mut diagnostics = vec![];
-    for line_match in OPERATOR_REGEX.captures_iter(line) {
-        let before = line_match.get(1).unwrap();
-        let after = line_match.get(2).unwrap();
-
-        if before.as_str().contains('\t') {
-            diagnostics.push((before.start(), TabBeforeOperator.into()));
-        } else if before.as_str().len() > 1 {
-            diagnostics.push((before.start(), MultipleSpacesBeforeOperator.into()));
+    let mut last_end = None;
+
+    for line_match in OPERATOR_REGEX.find_iter(line) {
+        if last_end != Some(line_match.start()) {
+            let before = &line[..line_match.start()];
+
+            match Whitespace::trailing(before) {
+                (Whitespace::Tab, offset) => {
+                    diagnostics.push((line_match.start() - offset, TabBeforeOperator.into()));
+                }
+                (Whitespace::Many, offset) => diagnostics.push((
+                    line_match.start() - offset,
+                    MultipleSpacesBeforeOperator.into(),
+                )),
+                _ => {}
+            }
         }
 
-        if after.as_str().contains('\t') {
-            diagnostics.push((after.start(), TabAfterOperator.into()));
-        } else if after.as_str().len() > 1 {
-            diagnostics.push((after.start(), MultipleSpacesAfterOperator.into()));
+        let after = &line[line_match.end()..];
+        let (leading_offset, leading_kind) = Whitespace::leading(after);
+        match leading_kind {
+            Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())),
+            Whitespace::Many => {
+                diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into()));
+            }
+            _ => {}
         }
+
+        last_end = Some(line_match.end() + leading_offset);
     }
     diagnostics
 }

diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs
@@ -3,6 +3,7 @@
 use once_cell::sync::Lazy;
 use regex::Regex;
 
+use crate::rules::pycodestyle::rules::Whitespace;
 use ruff_diagnostics::DiagnosticKind;
 use ruff_diagnostics::Violation;
 use ruff_macros::{derive_message_formats, violation};
@@ -111,28 +112,41 @@ impl Violation for TabBeforeKeyword {
 }
 
 static KEYWORD_REGEX: Lazy<Regex> = Lazy::new(|| {
-    Regex::new(r"(\s*)\b(?:False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b(\s*)").unwrap()
+    Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap()
 });
 
 /// E271, E272, E273, E274
 #[cfg(feature = "logical_lines")]
 pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> {
     let mut diagnostics = vec![];
-    for line_match in KEYWORD_REGEX.captures_iter(line) {
-        let before = line_match.get(1).unwrap();
-        let after = line_match.get(2).unwrap();
-
-        if before.as_str().contains('\t') {
-            diagnostics.push((before.start(), TabBeforeKeyword.into()));
-        } else if before.as_str().len() > 1 {
-            diagnostics.push((before.start(), MultipleSpacesBeforeKeyword.into()));
+    let mut last_end = None;
+
+    for line_match in KEYWORD_REGEX.find_iter(line) {
+        if last_end != Some(line_match.start()) {
+            let before = &line[..line_match.start()];
+            match Whitespace::trailing(before) {
+                (Whitespace::Tab, offset) => {
+                    diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into()));
+                }
+                (Whitespace::Many, offset) => diagnostics.push((
+                    line_match.start() - offset,
+                    MultipleSpacesBeforeKeyword.into(),
+                )),
+                _ => {}
+            }
         }
 
-        if after.as_str().contains('\t') {
-            diagnostics.push((after.start(), TabAfterKeyword.into()));
-        } else if after.as_str().len() > 1 {
-            diagnostics.push((after.start(), MultipleSpacesAfterKeyword.into()));
+        let after = &line[line_match.end()..];
+        let (leading_offset, leading_kind) = Whitespace::leading(after);
+        match leading_kind {
+            Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())),
+            Whitespace::Many => {
+                diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into()));
+            }
+            _ => {}
         }
+
+        last_end = Some(line_match.end() + leading_offset);
     }
     diagnostics
 }

diff --git a/...es/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap b/...es/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E274_E27.py.snap
@@ -9,10 +9,10 @@ expression: diagnostics
     fixable: false
   location:
     row: 28
-    column: 1
+    column: 2
   end_location:
     row: 28
-    column: 1
+    column: 2
   fix:
     edits: []
   parent: ~
@@ -23,10 +23,10 @@ expression: diagnostics
     fixable: false
   location:
     row: 30
-    column: 4
+    column: 5
   end_location:
     row: 30
-    column: 4
+    column: 5
   fix:
     edits: []
   parent: ~