Skip to content

Commit

Permalink
No regex
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser committed Mar 26, 2023
1 parent 6ca18cc commit 2d1be26
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 81 deletions.
16 changes: 4 additions & 12 deletions crates/ruff/src/checkers/logical_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,8 @@ pub fn check_logical_lines(
let indent_size = 4;

if line.flags().contains(TokenFlags::OPERATOR) {
for (index, kind) in space_around_operator(line.text()) {
for (location, kind) in space_around_operator(line.tokens(), locator) {
if settings.rules.enabled(kind.rule()) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
diagnostics.push(Diagnostic {
kind,
location,
Expand All @@ -72,10 +70,8 @@ pub fn check_logical_lines(
.flags()
.contains(TokenFlags::OPERATOR | TokenFlags::PUNCTUATION)
{
for (index, kind) in extraneous_whitespace(line.text()) {
for (location, kind) in extraneous_whitespace(line.tokens(), locator) {
if settings.rules.enabled(kind.rule()) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
diagnostics.push(Diagnostic {
kind,
location,
Expand All @@ -87,10 +83,8 @@ pub fn check_logical_lines(
}
}
if line.flags().contains(TokenFlags::KEYWORD) {
for (index, kind) in whitespace_around_keywords(line.text()) {
for (location, kind) in whitespace_around_keywords(line.tokens(), locator) {
if settings.rules.enabled(kind.rule()) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
diagnostics.push(Diagnostic {
kind,
location,
Expand Down Expand Up @@ -127,9 +121,7 @@ pub fn check_logical_lines(
}
}
if line.flags().contains(TokenFlags::OPERATOR) {
for (location, kind) in
whitespace_around_named_parameter_equals(line.tokens(), line.text())
{
for (location, kind) in whitespace_around_named_parameter_equals(line.tokens()) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
Expand Down
63 changes: 47 additions & 16 deletions crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@

use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;

use crate::rules::pycodestyle::rules::Whitespace;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator;

/// ## What it does
/// Checks for the use of extraneous whitespace after "(".
Expand Down Expand Up @@ -101,28 +105,55 @@ impl Violation for WhitespaceBeforePunctuation {
}
}

// TODO(charlie): Pycodestyle has a negative lookahead on the end.
static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[\[({][ \t]|[ \t][]}),;:]").unwrap());

/// E201, E202, E203
#[cfg(feature = "logical_lines")]
pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> {
pub fn extraneous_whitespace(
tokens: &[(Location, &Tok, Location)],
locator: &Locator,
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
for line_match in EXTRANEOUS_WHITESPACE_REGEX.find_iter(line) {
let text = &line[line_match.range()];
let char = text.trim();
let found = line_match.start();
if text.chars().last().unwrap().is_ascii_whitespace() {
diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into()));
} else if line.chars().nth(found - 1).map_or(false, |c| c != ',') {
if char == "}" || char == "]" || char == ")" {
diagnostics.push((found, WhitespaceBeforeCloseBracket.into()));
} else {
diagnostics.push((found, WhitespaceBeforePunctuation.into()));
let mut last_token: Option<&Tok> = None;

for (start, token, end) in tokens {
match token {
Tok::Lbrace | Tok::Lpar | Tok::Lsqb => {
let after = &locator.contents()[locator.offset(*end)..];

if !matches!(Whitespace::leading(after), Whitespace::None) {
diagnostics.push((
Location::new(end.row(), end.column()),
WhitespaceAfterOpenBracket.into(),
));
}
}
Tok::Rbrace | Tok::Rpar | Tok::Rsqb | Tok::Comma | Tok::Semi | Tok::Colon => {
let before = &locator.contents()[..locator.offset(*start)];

let diagnostic_kind = if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) {
DiagnosticKind::from(WhitespaceBeforePunctuation)
} else {
DiagnosticKind::from(WhitespaceBeforeCloseBracket)
};

match Whitespace::trailing(before) {
(Whitespace::None, _) => {}
(_, offset) => {
if !matches!(last_token, Some(Tok::Comma)) {
diagnostics.push((
Location::new(start.row(), start.column() - offset),
diagnostic_kind,
));
}
}
}
}

_ => {}
}

last_token = Some(token);
}

diagnostics
}

Expand Down
15 changes: 9 additions & 6 deletions crates/ruff/src/rules/pycodestyle/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ impl Whitespace {
for c in content.chars() {
if c == '\t' {
return Self::Tab;
} else if matches!(c, '\n' | '\r') {
break;
} else if c.is_whitespace() {
count += 1;
} else {
Expand All @@ -116,24 +118,25 @@ impl Whitespace {
}

fn trailing(content: &str) -> (Self, usize) {
let mut count = 0u32;
let mut offset = 0;
let mut count = 0;

for c in content.chars().rev() {
if c == '\t' {
return (Self::Tab, offset + 1);
return (Self::Tab, count + 1);
} else if matches!(c, '\n' | '\r') {
// Indent
return (Self::None, 0);
} else if c.is_whitespace() {
count += 1;
offset += c.len_utf8();
} else {
break;
}
}

match count {
0 => (Self::None, 0),
1 => (Self::Single, offset),
_ => (Self::Many, offset),
1 => (Self::Single, count),
_ => (Self::Many, count),
}
}
}
87 changes: 66 additions & 21 deletions crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ use regex::Regex;
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;

use crate::rules::pycodestyle::helpers::is_op_token;
use crate::rules::pycodestyle::helpers::{is_op_token, is_ws_needed_token};
use crate::rules::pycodestyle::rules::Whitespace;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::types::Range;

/// ## What it does
/// Checks for extraneous tabs before an operator.
Expand Down Expand Up @@ -128,37 +129,81 @@ impl Violation for MultipleSpacesAfterOperator {
}
}

static OPERATOR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"[-+*/|!<=>%&^]+|:=").unwrap());

/// E221, E222, E223, E224
#[cfg(feature = "logical_lines")]
pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> {
pub fn space_around_operator(
tokens: &[(Location, &Tok, Location)],
locator: &Locator,
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
for line_match in OPERATOR_REGEX.find_iter(line) {
let before = &line[..line_match.start()];
match Whitespace::trailing(before) {
(Whitespace::Tab, offset) => {
diagnostics.push((line_match.start() - offset, TabBeforeOperator.into()))

for (start, token, end) in tokens {
if is_operator_token(token) {
let start_offset = locator.offset(*start);
let before = &locator.contents()[..start_offset];

match Whitespace::trailing(before) {
(Whitespace::Tab, offset) => diagnostics.push((
Location::new(start.row(), start.column() - offset),
TabBeforeOperator.into(),
)),
(Whitespace::Many, offset) => diagnostics.push((
Location::new(start.row(), start.column() - offset),
MultipleSpacesBeforeOperator.into(),
)),
_ => {}
}
(Whitespace::Many, offset) => diagnostics.push((
line_match.start() - offset,
MultipleSpacesBeforeOperator.into(),
)),
_ => {}
}

let after = &line[line_match.end()..];
match Whitespace::leading(after) {
Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterOperator.into())),
Whitespace::Many => {
diagnostics.push((line_match.end(), MultipleSpacesAfterOperator.into()))
let end_offset = locator.offset(*end);
let after = &locator.contents()[end_offset..];
match Whitespace::leading(after) {
Whitespace::Tab => diagnostics.push((*end, TabAfterOperator.into())),
Whitespace::Many => diagnostics.push((*end, MultipleSpacesAfterOperator.into())),
_ => {}
}
_ => {}
}
}

diagnostics
}

const fn is_operator_token(token: &Tok) -> bool {
matches!(
token,
Tok::Plus
| Tok::Minus
| Tok::Star
| Tok::Slash
| Tok::Vbar
| Tok::Amper
| Tok::Less
| Tok::Greater
| Tok::Equal
| Tok::Percent
| Tok::NotEqual
| Tok::LessEqual
| Tok::GreaterEqual
| Tok::CircumFlex
| Tok::LeftShift
| Tok::RightShift
| Tok::DoubleStar
| Tok::PlusEqual
| Tok::MinusEqual
| Tok::StarEqual
| Tok::SlashEqual
| Tok::PercentEqual
| Tok::AmperEqual
| Tok::VbarEqual
| Tok::CircumflexEqual
| Tok::LeftShiftEqual
| Tok::RightShiftEqual
| Tok::DoubleStarEqual
| Tok::DoubleSlash
| Tok::DoubleSlashEqual
| Tok::ColonEqual
)
}

#[cfg(not(feature = "logical_lines"))]
pub fn space_around_operator(_line: &str) -> Vec<(usize, DiagnosticKind)> {
vec![]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_parser::ast::Location;
use rustpython_parser::Tok;

use crate::rules::pycodestyle::helpers::is_keyword_token;
use crate::rules::pycodestyle::rules::Whitespace;
use ruff_diagnostics::DiagnosticKind;
use ruff_diagnostics::Violation;
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::Locator;

/// ## What it does
/// Checks for extraneous whitespace after keywords.
Expand Down Expand Up @@ -111,36 +115,41 @@ impl Violation for TabBeforeKeyword {
}
}

static KEYWORD_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"\b(False|None|True|and|as|assert|async|await|break|class|continue|def|del|elif|else|except|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|raise|return|try|while|with|yield)\b").unwrap()
});

/// E271, E272, E273, E274
#[cfg(feature = "logical_lines")]
pub fn whitespace_around_keywords(line: &str) -> Vec<(usize, DiagnosticKind)> {
pub fn whitespace_around_keywords(
tokens: &[(Location, &Tok, Location)],
locator: &Locator,
) -> Vec<(Location, DiagnosticKind)> {
let mut diagnostics = vec![];
for line_match in KEYWORD_REGEX.find_iter(line) {
let before = &line[..line_match.start()];
match Whitespace::trailing(before) {
(Whitespace::Tab, offset) => {
diagnostics.push((line_match.start() - offset, TabBeforeKeyword.into()))

for (start, token, end) in tokens {
if is_keyword_token(token) {
let start_offset = locator.offset(*start);
let before = &locator.contents()[..start_offset];

match Whitespace::trailing(before) {
(Whitespace::Tab, offset) => diagnostics.push((
Location::new(start.row(), start.column() - offset),
TabBeforeKeyword.into(),
)),
(Whitespace::Many, offset) => diagnostics.push((
Location::new(start.row(), start.column() - offset),
MultipleSpacesBeforeKeyword.into(),
)),
_ => {}
}
(Whitespace::Many, offset) => diagnostics.push((
line_match.start() - offset,
MultipleSpacesBeforeKeyword.into(),
)),
_ => {}
}

let after = &line[line_match.end()..];
match Whitespace::leading(after) {
Whitespace::Tab => diagnostics.push((line_match.end(), TabAfterKeyword.into())),
Whitespace::Many => {
diagnostics.push((line_match.end(), MultipleSpacesAfterKeyword.into()))
let end_offset = locator.offset(*end);
let after = &locator.contents()[end_offset..];
match Whitespace::leading(after) {
Whitespace::Tab => diagnostics.push((*end, TabAfterKeyword.into())),
Whitespace::Many => diagnostics.push((*end, MultipleSpacesAfterKeyword.into())),
_ => {}
}
_ => {}
}
}

diagnostics
}

Expand Down
Loading

0 comments on commit 2d1be26

Please sign in to comment.