Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(pycodestyle): Remove regex captures #3735

Merged
merged 1 commit into from
Mar 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 23 additions & 17 deletions crates/ruff/src/checkers/logical_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,20 @@ pub fn check_logical_lines(
) -> Vec<Diagnostic> {
let mut diagnostics = vec![];

#[cfg(feature = "logical_lines")]
let should_fix_missing_whitespace =
autofix.into() && settings.rules.should_fix(Rule::MissingWhitespace);

#[cfg(not(feature = "logical_lines"))]
let should_fix_missing_whitespace = false;

#[cfg(feature = "logical_lines")]
let should_fix_whitespace_before_parameters =
autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters);

#[cfg(not(feature = "logical_lines"))]
let should_fix_whitespace_before_parameters = false;

let indent_char = stylist.indentation().as_char();
let mut prev_line = None;
let mut prev_indent_level = None;
Expand Down Expand Up @@ -152,30 +166,22 @@ pub fn check_logical_lines(
}
}

#[cfg(feature = "logical_lines")]
let should_fix = autofix.into() && settings.rules.should_fix(Rule::MissingWhitespace);

#[cfg(not(feature = "logical_lines"))]
let should_fix = false;

for diagnostic in
missing_whitespace(line.text(), start_loc.row(), should_fix, indent_level)
{
for diagnostic in missing_whitespace(
line.text(),
start_loc.row(),
should_fix_missing_whitespace,
indent_level,
) {
if settings.rules.enabled(diagnostic.kind.rule()) {
diagnostics.push(diagnostic);
}
}
}

if line.flags().contains(TokenFlags::BRACKET) {
#[cfg(feature = "logical_lines")]
let should_fix =
autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters);

#[cfg(not(feature = "logical_lines"))]
let should_fix = false;

for diagnostic in whitespace_before_parameters(line.tokens(), should_fix) {
for diagnostic in
whitespace_before_parameters(line.tokens(), should_fix_whitespace_before_parameters)
{
if settings.rules.enabled(diagnostic.kind.rule()) {
diagnostics.push(diagnostic);
}
Expand Down
13 changes: 9 additions & 4 deletions crates/ruff/src/rules/pycodestyle/logical_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ impl<'a> LogicalLines<'a> {
assert!(u32::try_from(tokens.len()).is_ok());

let single_token = tokens.len() == 1;
let mut builder = LogicalLinesBuilder::with_token_capacity(tokens.len());
let mut builder =
LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len());
let mut parens: u32 = 0;

for (start, token, end) in tokens.iter().flatten() {
Expand Down Expand Up @@ -280,10 +281,11 @@ pub struct LogicalLinesBuilder<'a> {
}

impl<'a> LogicalLinesBuilder<'a> {
fn with_token_capacity(capacity: usize) -> Self {
fn with_capacity(tokens: usize, string: usize) -> Self {
Self {
tokens: Vec::with_capacity(capacity),
mappings: Mappings::with_capacity(capacity + 1),
tokens: Vec::with_capacity(tokens),
mappings: Mappings::with_capacity(tokens + 1),
text: String::with_capacity(string),
..Self::default()
}
}
Expand Down Expand Up @@ -340,6 +342,9 @@ impl<'a> LogicalLinesBuilder<'a> {

// TODO(charlie): "Mute" strings.
let text = if let Tok::String { value, .. } = token {
// Replace the content of strings with a non-whs sequence because some lints
// search for whitespace in the document and whitespace inside of the string
// would complicate the search.
Cow::Owned(format!("\"{}\"", "x".repeat(value.width())))
} else {
Cow::Borrowed(locator.slice(Range {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,17 +103,16 @@ impl Violation for WhitespaceBeforePunctuation {

// TODO(charlie): Pycodestyle has a negative lookahead on the end.
static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"([\[({][ \t]|[ \t][]}),;:])").unwrap());
Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap());

/// E201, E202, E203
#[cfg(feature = "logical_lines")]
pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![];
for line_match in EXTRANEOUS_WHITESPACE_REGEX.captures_iter(line) {
let match_ = line_match.get(1).unwrap();
let text = match_.as_str();
for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) {
let text = &line[line_match.range()];
let char = text.trim();
let found = match_.start();
let found = line_match.start();
if text.chars().last().unwrap().is_ascii_whitespace() {
diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into()));
} else if line.chars().nth(found - 1).map_or(false, |c| c != ',') {
Expand Down
57 changes: 57 additions & 0 deletions crates/ruff/src/rules/pycodestyle/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,60 @@ mod whitespace_around_keywords;
mod whitespace_around_named_parameter_equals;
mod whitespace_before_comment;
mod whitespace_before_parameters;

#[allow(unused)]
enum Whitespace {
MichaReiser marked this conversation as resolved.
Show resolved Hide resolved
None,
Single,
Many,
Tab,
}

impl Whitespace {
#[allow(dead_code)]
fn leading(content: &str) -> (usize, Self) {
let mut offset = 0;
let mut kind = Self::None;

for c in content.chars() {
if c == '\t' {
kind = Self::Tab;
offset += 1;
} else if c.is_whitespace() {
kind = match kind {
Whitespace::None => Whitespace::Single,
Whitespace::Single | Whitespace::Many => Whitespace::Many,
Whitespace::Tab => Whitespace::Tab,
};
offset += c.len_utf8();
} else {
break;
}
}

(offset, kind)
}

#[allow(dead_code)]
fn trailing(content: &str) -> (Self, usize) {
let mut count = 0u32;
let mut offset = 0;

for c in content.chars().rev() {
if c == '\t' {
return (Self::Tab, offset + 1);
} else if c.is_whitespace() {
count += 1;
offset += c.len_utf8();
} else {
break;
}
}

match count {
0 => (Self::None, 0),
1 => (Self::Single, offset),
_ => (Self::Many, offset),
}
}
}
46 changes: 32 additions & 14 deletions crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@

use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;

use crate::rules::pycodestyle::helpers::is_op_token;
use crate::rules::pycodestyle::rules::Whitespace;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator;

/// ## What it does
/// Checks for extraneous tabs before an operator.
Expand Down Expand Up @@ -123,28 +128,41 @@ impl Violation for MultipleSpacesAfterOperator {
}
}

static OPERATOR_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap());
static OPERATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap());

/// E221, E222, E223, E224
#[cfg(feature = "logical_lines")]
pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![];
for line_match in OPERATOR_REGEX.captures_iter(line) {
let before = line_match.get(1).unwrap();
let after = line_match.get(2).unwrap();

if before.as_str().contains('\t') {
diagnostics.push((before.start(), TabBeforeOperator.into()));
} else if before.as_str().len() > 1 {
diagnostics.push((before.start(), MultipleSpacesBeforeOperator.into()));
let mut last_end = None;

for line_match in OPERATOR_REGEX.find_iter(line) {
if last_end != Some(line_match.start()) {
let before = &line[..line_match.start()];

match Whitespace::trailing(before) {
(Whitespace::Tab, offset) => {
diagnostics.push((line_match.start() - offset, TabBeforeOperator.into()));
}
(Whitespace::Many, offset) => diagnostics.push((
line_match.start() - offset,
MultipleSpacesBeforeOperator.into(),
)),
_ => {}
}
}

if after.as_str().contains('\t') {
diagnostics.push((after.start(), TabAfterOperator.into()));
} else if after.as_str().len() > 1 {
diagnostics.push((after.start(), MultipleSpacesAfterOperator.into()));
let after = &line[line_match.end()..];
let (leading_offset, leading_kind) = Whitespace::leading(after);
match leading_kind {
Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())),
Whitespace::Many => {
diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into()));
}
_ => {}
}

last_end = Some(line_match.end() + leading_offset);
}
diagnostics
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
use once_cell::sync::Lazy;
use regex::Regex;

use crate::rules::pycodestyle::rules::Whitespace;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
Expand Down Expand Up @@ -111,28 +112,41 @@ impl Violation for TabBeforeKeyword {
}

static KEYWORD_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(\s*)\b(?:False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b(\s*)").unwrap()
Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap()
});

/// E271, E272, E273, E274
#[cfg(feature = "logical_lines")]
pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![];
for line_match in KEYWORD_REGEX.captures_iter(line) {
let before = line_match.get(1).unwrap();
let after = line_match.get(2).unwrap();

if before.as_str().contains('\t') {
diagnostics.push((before.start(), TabBeforeKeyword.into()));
} else if before.as_str().len() > 1 {
diagnostics.push((before.start(), MultipleSpacesBeforeKeyword.into()));
let mut last_end = None;

for line_match in KEYWORD_REGEX.find_iter(line) {
if last_end != Some(line_match.start()) {
let before = &line[..line_match.start()];
match Whitespace::trailing(before) {
(Whitespace::Tab, offset) => {
diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into()));
}
(Whitespace::Many, offset) => diagnostics.push((
line_match.start() - offset,
MultipleSpacesBeforeKeyword.into(),
)),
_ => {}
}
}

if after.as_str().contains('\t') {
diagnostics.push((after.start(), TabAfterKeyword.into()));
} else if after.as_str().len() > 1 {
diagnostics.push((after.start(), MultipleSpacesAfterKeyword.into()));
let after = &line[line_match.end()..];
let (leading_offset, leading_kind) = Whitespace::leading(after);
match leading_kind {
Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())),
Whitespace::Many => {
diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into()));
}
_ => {}
}

last_end = Some(line_match.end() + leading_offset);
}
diagnostics
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ expression: diagnostics
fixable: false
location:
row: 28
column: 1
column: 2
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked these numbers and they match pycodestyle

crates/ruff/resources/test/fixtures/pycodestyle/E27.py:28:2: E274 tab before keyword
crates/ruff/resources/test/fixtures/pycodestyle/E27.py:30:5: E274 tab before keyword

The other tests are fixed (very hacky solution but the next PR replaces it anyway)

end_location:
row: 28
column: 1
column: 2
fix:
edits: []
parent: ~
Expand All @@ -23,10 +23,10 @@ expression: diagnostics
fixable: false
location:
row: 30
column: 4
column: 5
end_location:
row: 30
column: 4
column: 5
fix:
edits: []
parent: ~
Expand Down