Skip to content

Commit

Permalink
Avoid using Regex captures
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser committed Mar 26, 2023
1 parent f86cb60 commit 7c82846
Show file tree
Hide file tree
Showing 8 changed files with 216 additions and 40 deletions.
13 changes: 9 additions & 4 deletions crates/ruff/src/rules/pycodestyle/logical_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ impl<'a> LogicalLines<'a> {
assert!(u32::try_from(tokens.len()).is_ok());

let single_token = tokens.len() == 1;
let mut builder = LogicalLinesBuilder::with_token_capacity(tokens.len());
let mut builder =
LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len());
let mut parens: u32 = 0;

for (start, token, end) in tokens.iter().flatten() {
Expand Down Expand Up @@ -280,10 +281,11 @@ pub struct LogicalLinesBuilder<'a> {
}

impl<'a> LogicalLinesBuilder<'a> {
fn with_token_capacity(capacity: usize) -> Self {
fn with_capacity(tokens: usize, string: usize) -> Self {
Self {
tokens: Vec::with_capacity(capacity),
mappings: Mappings::with_capacity(capacity + 1),
tokens: Vec::with_capacity(tokens),
mappings: Mappings::with_capacity(tokens + 1),
text: String::with_capacity(string),
..Self::default()
}
}
Expand Down Expand Up @@ -340,6 +342,9 @@ impl<'a> LogicalLinesBuilder<'a> {

// TODO(charlie): "Mute" strings.
let text = if let Tok::String { value, .. } = token {
// Replace the content of strings with a non-whs sequence because some lints
// search for whitespace in the document and whitespace inside of the strinig
// would complicate the search.
Cow::Owned(format!("\"{}\"", "x".repeat(value.width())))
} else {
Cow::Borrowed(locator.slice(Range {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,17 +103,16 @@ impl Violation for WhitespaceBeforePunctuation {

// TODO(charlie): Pycodestyle has a negative lookahead on the end.
static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"([\[({][ \t]|[ \t][]}),;:])").unwrap());
Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap());

/// E201, E202, E203
#[cfg(feature = "logical_lines")]
pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![];
for line_match in EXTRANEOUS_WHITESPACE_REGEX.captures_iter(line) {
let match_ = line_match.get(1).unwrap();
let text = match_.as_str();
for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) {
let text = &line[line_match.range()];
let char = text.trim();
let found = match_.start();
let found = line_match.start();
if text.chars().last().unwrap().is_ascii_whitespace() {
diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into()));
} else if line.chars().nth(found - 1).map_or(false, |c| c != ',') {
Expand Down
51 changes: 51 additions & 0 deletions crates/ruff/src/rules/pycodestyle/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,54 @@ mod whitespace_around_keywords;
mod whitespace_around_named_parameter_equals;
mod whitespace_before_comment;
mod whitespace_before_parameters;

enum Whitespace {
None,
Single,
Many,
Tab,
}

impl Whitespace {
fn leading(content: &str) -> Self {
let mut count = 0u32;

for c in content.chars() {
if c == '\t' {
return Self::Tab;
} else if c.is_whitespace() {
count += 1;
} else {
break;
}
}

match count {
0 => Self::None,
1 => Self::Single,
_ => Self::Many,
}
}

fn trailing(content: &str) -> (Self, usize) {
let mut count = 0u32;
let mut offset = 0;

for c in content.chars().rev() {
if c == '\t' {
return (Self::Tab, offset + 1);
} else if c.is_whitespace() {
count += 1;
offset += c.len_utf8();
} else {
break;
}
}

match count {
0 => (Self::None, 0),
1 => (Self::Single, offset),
_ => (Self::Many, offset),
}
}
}
38 changes: 24 additions & 14 deletions crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@

use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;

use crate::rules::pycodestyle::helpers::is_op_token;
use crate::rules::pycodestyle::rules::Whitespace;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator;

/// ## What it does
/// Checks for extraneous tabs before an operator.
Expand Down Expand Up @@ -123,27 +128,32 @@ impl Violation for MultipleSpacesAfterOperator {
}
}

static OPERATOR_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap());
static OPERATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap());

/// E221, E222, E223, E224
#[cfg(feature = "logical_lines")]
pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![];
for line_match in OPERATOR_REGEX.captures_iter(line) {
let before = line_match.get(1).unwrap();
let after = line_match.get(2).unwrap();

if before.as_str().contains('\t') {
diagnostics.push((before.start(), TabBeforeOperator.into()));
} else if before.as_str().len() > 1 {
diagnostics.push((before.start(), MultipleSpacesBeforeOperator.into()));
for line_match in OPERATOR_REGEX.find_iter(line) {
let before = &line[..line_match.start()];
match Whitespace::trailing(before) {
(Whitespace::Tab, offset) => {
diagnostics.push((line_match.start() - offset, TabBeforeOperator.into()))
}
(Whitespace::Many, offset) => diagnostics.push((
line_match.start() - offset,
MultipleSpacesBeforeOperator.into(),
)),
_ => {}
}

if after.as_str().contains('\t') {
diagnostics.push((after.start(), TabAfterOperator.into()));
} else if after.as_str().len() > 1 {
diagnostics.push((after.start(), MultipleSpacesAfterOperator.into()));
let after = &line[line_match.end()..];
match Whitespace::leading(after) {
Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())),
Whitespace::Many => {
diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into()))
}
_ => {}
}
}
diagnostics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
use once_cell::sync::Lazy;
use regex::Regex;

use crate::rules::pycodestyle::rules::Whitespace;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
Expand Down Expand Up @@ -111,27 +112,33 @@ impl Violation for TabBeforeKeyword {
}

static KEYWORD_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(\s*)\b(?:False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b(\s*)").unwrap()
Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap()
});

/// E271, E272, E273, E274
#[cfg(feature = "logical_lines")]
pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![];
for line_match in KEYWORD_REGEX.captures_iter(line) {
let before = line_match.get(1).unwrap();
let after = line_match.get(2).unwrap();

if before.as_str().contains('\t') {
diagnostics.push((before.start(), TabBeforeKeyword.into()));
} else if before.as_str().len() > 1 {
diagnostics.push((before.start(), MultipleSpacesBeforeKeyword.into()));
for line_match in KEYWORD_REGEX.find_iter(line) {
let before = &line[..line_match.start()];
match Whitespace::trailing(before) {
(Whitespace::Tab, offset) => {
diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into()))
}
(Whitespace::Many, offset) => diagnostics.push((
line_match.start() - offset,
MultipleSpacesBeforeKeyword.into(),
)),
_ => {}
}

if after.as_str().contains('\t') {
diagnostics.push((after.start(), TabAfterKeyword.into()));
} else if after.as_str().len() > 1 {
diagnostics.push((after.start(), MultipleSpacesAfterKeyword.into()));
let after = &line[line_match.end()..];
match Whitespace::leading(after) {
Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())),
Whitespace::Many => {
diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into()))
}
_ => {}
}
}
diagnostics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,30 @@ expression: diagnostics
column: 13
fix: ~
parent: ~
- kind:
name: MultipleSpacesBeforeOperator
body: Multiple spaces before operator
suggestion: ~
fixable: false
location:
row: 31
column: 3
end_location:
row: 31
column: 3
fix: ~
parent: ~
- kind:
name: MultipleSpacesBeforeOperator
body: Multiple spaces before operator
suggestion: ~
fixable: false
location:
row: 32
column: 3
end_location:
row: 32
column: 3
fix: ~
parent: ~

Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,32 @@
source: crates/ruff/src/rules/pycodestyle/mod.rs
expression: diagnostics
---
- kind:
name: MultipleSpacesBeforeKeyword
body: Multiple spaces before keyword
suggestion: ~
fixable: false
location:
row: 4
column: 8
end_location:
row: 4
column: 8
fix: ~
parent: ~
- kind:
name: MultipleSpacesBeforeKeyword
body: Multiple spaces before keyword
suggestion: ~
fixable: false
location:
row: 6
column: 4
end_location:
row: 6
column: 4
fix: ~
parent: ~
- kind:
name: MultipleSpacesBeforeKeyword
body: Multiple spaces before keyword
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,69 @@
source: crates/ruff/src/rules/pycodestyle/mod.rs
expression: diagnostics
---
- kind:
name: TabBeforeKeyword
body: Tab before keyword
suggestion: ~
fixable: false
location:
row: 10
column: 9
end_location:
row: 10
column: 9
fix: ~
parent: ~
- kind:
name: TabBeforeKeyword
body: Tab before keyword
suggestion: ~
fixable: false
location:
row: 12
column: 5
end_location:
row: 12
column: 5
fix: ~
parent: ~
- kind:
name: TabBeforeKeyword
body: Tab before keyword
suggestion: ~
fixable: false
location:
row: 12
column: 9
end_location:
row: 12
column: 9
fix: ~
parent: ~
- kind:
name: TabBeforeKeyword
body: Tab before keyword
suggestion: ~
fixable: false
location:
row: 28
column: 1
column: 2
end_location:
row: 28
column: 1
column: 2
fix: ~
parent: ~
- kind:
name: TabBeforeKeyword
body: Tab before keyword
suggestion: ~
fixable: false
location:
row: 30
column: 5
end_location:
row: 30
column: 5
fix: ~
parent: ~
- kind:
Expand All @@ -22,10 +74,10 @@ expression: diagnostics
fixable: false
location:
row: 30
column: 4
column: 9
end_location:
row: 30
column: 4
column: 9
fix: ~
parent: ~

0 comments on commit 7c82846

Please sign in to comment.