diff --git a/crates/ruff/src/checkers/logical_lines.rs b/crates/ruff/src/checkers/logical_lines.rs index fa6a9d06b4c8ac..0c671e5a5e42b2 100644 --- a/crates/ruff/src/checkers/logical_lines.rs +++ b/crates/ruff/src/checkers/logical_lines.rs @@ -43,16 +43,11 @@ pub fn check_logical_lines( ) -> Vec { let mut diagnostics = vec![]; - let indent_char = stylist.indentation().as_char(); let mut prev_line = None; let mut prev_indent_level = None; - for line in &LogicalLines::from_tokens(tokens, locator) { - // Extract the indentation level. - let Some(start_loc) = line.first_token_location() else { continue; }; - let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), *start_loc)); - let indent_level = expand_indent(start_line); - let indent_size = 4; + let indent_char = stylist.indentation().as_char(); + for line in &LogicalLines::from_tokens(tokens, locator) { if line.flags().contains(TokenFlags::OPERATOR) { for (location, kind) in space_around_operator(line.tokens(), locator) { if settings.rules.enabled(kind.rule()) { @@ -150,9 +145,7 @@ pub fn check_logical_lines( #[cfg(not(feature = "logical_lines"))] let should_fix = false; - for diagnostic in - missing_whitespace(line.text(), start_loc.row(), should_fix, indent_level) - { + for diagnostic in missing_whitespace(line.tokens(), locator, should_fix) { if settings.rules.enabled(diagnostic.kind.rule()) { diagnostics.push(diagnostic); } @@ -174,7 +167,13 @@ pub fn check_logical_lines( } } - for (index, kind) in indentation( + // Extract the indentation level. + let Some(start_loc) = line.first_token_location() else { continue; }; + let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), start_loc)); + let indent_level = expand_indent(start_line); + let indent_size = 4; + + for (location, kind) in indentation( &line, prev_line.as_ref(), indent_char, @@ -182,8 +181,6 @@ pub fn check_logical_lines( prev_indent_level, indent_size, ) { - let (token_offset, pos) = line.mapping(index); - let location = Location::new(pos.row(), pos.column() + index - token_offset); if settings.rules.enabled(kind.rule()) { diagnostics.push(Diagnostic { kind, @@ -195,7 +192,7 @@ pub fn check_logical_lines( } } - if !line.is_comment() { + if !line.is_comment_only() { prev_line = Some(line); prev_indent_level = Some(indent_level); } @@ -221,7 +218,7 @@ z = x + 1"#; let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) .into_iter() - .map(|line| line.text().to_string()) + .map(|line| line.text_trimmed().to_string()) .collect(); let expected = vec![ "x = 1".to_string(), @@ -242,10 +239,10 @@ z = x + 1"#; let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) .into_iter() - .map(|line| line.text().to_string()) + .map(|line| line.text_trimmed().to_string()) .collect(); let expected = vec![ - "x = [1, 2, 3, ]".to_string(), + "x = [\n 1,\n 2,\n 3,\n]".to_string(), "y = 2".to_string(), "z = x + 1".to_string(), ]; @@ -256,9 +253,9 @@ z = x + 1"#; let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) .into_iter() - .map(|line| line.text().to_string()) + .map(|line| line.text_trimmed().to_string()) .collect(); - let expected = vec!["x = \"xxx\"".to_string()]; + let expected = vec!["x = 'abc'".to_string()]; assert_eq!(actual, expected); let contents = r#" @@ -269,7 +266,7 @@ f()"#; let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) .into_iter() - .map(|line| line.text().to_string()) + .map(|line| line.text_trimmed().to_string()) .collect(); let expected = vec!["def f():", "x = 1", "f()"]; assert_eq!(actual, expected); @@ -284,9 +281,15 @@ f()"#; let locator = Locator::new(contents); let actual: Vec = LogicalLines::from_tokens(&lxr, &locator) .into_iter() - .map(|line| line.text().to_string()) + .map(|line| line.text_trimmed().to_string()) .collect(); - let expected = vec!["def f():", "\"xxxxxxxxxxxxxxxxxxxx\"", "", "x = 1", "f()"]; + let expected = vec![ + "def f():", + "\"\"\"Docstring goes here.\"\"\"", + "", + "x = 1", + "f()", + ]; assert_eq!(actual, expected); } } diff --git a/crates/ruff/src/rules/pycodestyle/helpers.rs b/crates/ruff/src/rules/pycodestyle/helpers.rs index 4b92b5fc97a659..b0f815e408557c 100644 --- a/crates/ruff/src/rules/pycodestyle/helpers.rs +++ b/crates/ruff/src/rules/pycodestyle/helpers.rs @@ -5,6 +5,7 @@ use rustpython_parser::Tok; use ruff_python_ast::helpers::{create_expr, unparse_expr}; use ruff_python_ast::source_code::Stylist; +use ruff_python_ast::token_kind::TokenKind; pub fn is_ambiguous_name(name: &str) -> bool { name == "l" || name == "I" || name == "O" @@ -59,168 +60,37 @@ pub fn is_overlong( } pub const fn is_keyword_token(token: &Tok) -> bool { - matches!( - token, - Tok::False - | Tok::True - | Tok::None - | Tok::And - | Tok::As - | Tok::Assert - | Tok::Await - | Tok::Break - | Tok::Class - | Tok::Continue - | Tok::Def - | Tok::Del - | Tok::Elif - | Tok::Else - | Tok::Except - | Tok::Finally - | Tok::For - | Tok::From - | Tok::Global - | Tok::If - | Tok::Import - | Tok::In - | Tok::Is - | Tok::Lambda - | Tok::Nonlocal - | Tok::Not - | Tok::Or - | Tok::Pass - | Tok::Raise - | Tok::Return - | Tok::Try - | Tok::While - | Tok::With - | Tok::Yield - ) + TokenKind::from_token(token).is_keyword() } pub const fn is_singleton_token(token: &Tok) -> bool { - matches!( - token, - Tok::False { .. } | Tok::True { .. } | Tok::None { .. }, - ) + TokenKind::from_token(token).is_singleton() } pub const fn is_op_token(token: &Tok) -> bool { - matches!( - token, - Tok::Lpar - | Tok::Rpar - | Tok::Lsqb - | Tok::Rsqb - | Tok::Comma - | Tok::Semi - | Tok::Plus - | Tok::Minus - | Tok::Star - | Tok::Slash - | Tok::Vbar - | Tok::Amper - | Tok::Less - | Tok::Greater - | Tok::Equal - | Tok::Dot - | Tok::Percent - | Tok::Lbrace - | Tok::Rbrace - | Tok::NotEqual - | Tok::LessEqual - | Tok::GreaterEqual - | Tok::Tilde - | Tok::CircumFlex - | Tok::LeftShift - | Tok::RightShift - | Tok::DoubleStar - | Tok::PlusEqual - | Tok::MinusEqual - | Tok::StarEqual - | Tok::SlashEqual - | Tok::PercentEqual - | Tok::AmperEqual - | Tok::VbarEqual - | Tok::CircumflexEqual - | Tok::LeftShiftEqual - | Tok::RightShiftEqual - | Tok::DoubleStarEqual - | Tok::DoubleSlash - | Tok::DoubleSlashEqual - | Tok::At - | Tok::AtEqual - | Tok::Rarrow - | Tok::Ellipsis - | Tok::ColonEqual - | Tok::Colon - ) + TokenKind::from_token(token).is_operator() } pub const fn is_skip_comment_token(token: &Tok) -> bool { - matches!( - token, - Tok::Newline | Tok::Indent | Tok::Dedent | Tok::NonLogicalNewline | Tok::Comment { .. } - ) + TokenKind::from_token(token).is_skip_comment() } pub const fn is_soft_keyword_token(token: &Tok) -> bool { - matches!(token, Tok::Match | Tok::Case) + TokenKind::from_token(token).is_soft_keyword() } pub const fn is_arithmetic_token(token: &Tok) -> bool { - matches!( - token, - Tok::DoubleStar | Tok::Star | Tok::Plus | Tok::Minus | Tok::Slash | Tok::At - ) + TokenKind::from_token(token).is_arithmetic() } pub const fn is_ws_optional_token(token: &Tok) -> bool { - is_arithmetic_token(token) - || matches!( - token, - Tok::CircumFlex - | Tok::Amper - | Tok::Vbar - | Tok::LeftShift - | Tok::RightShift - | Tok::Percent - ) + TokenKind::from_token(token).is_whitespace_optional() } pub const fn is_ws_needed_token(token: &Tok) -> bool { - matches!( - token, - Tok::DoubleStarEqual - | Tok::StarEqual - | Tok::SlashEqual - | Tok::DoubleSlashEqual - | Tok::PlusEqual - | Tok::MinusEqual - | Tok::NotEqual - | Tok::Less - | Tok::Greater - | Tok::PercentEqual - | Tok::CircumflexEqual - | Tok::AmperEqual - | Tok::VbarEqual - | Tok::EqEqual - | Tok::LessEqual - | Tok::GreaterEqual - | Tok::LeftShiftEqual - | Tok::RightShiftEqual - | Tok::Equal - | Tok::And - | Tok::Or - | Tok::In - | Tok::Is - | Tok::Rarrow - ) + TokenKind::from_token(token).is_whitespace_needed() } pub const fn is_unary_token(token: &Tok) -> bool { - matches!( - token, - Tok::Plus | Tok::Minus | Tok::Star | Tok::DoubleStar | Tok::RightShift - ) + TokenKind::from_token(token).is_unary() } diff --git a/crates/ruff/src/rules/pycodestyle/logical_lines.rs b/crates/ruff/src/rules/pycodestyle/logical_lines.rs index c8b41c6cd06d02..f12f8f281c0bd1 100644 --- a/crates/ruff/src/rules/pycodestyle/logical_lines.rs +++ b/crates/ruff/src/rules/pycodestyle/logical_lines.rs @@ -2,16 +2,13 @@ use bitflags::bitflags; use rustpython_parser::ast::Location; use rustpython_parser::lexer::LexResult; use rustpython_parser::Tok; -use std::borrow::Cow; use std::fmt::{Debug, Formatter}; use std::iter::FusedIterator; -use unicode_width::UnicodeWidthStr; use ruff_python_ast::source_code::Locator; +use ruff_python_ast::token_kind::TokenKind; use ruff_python_ast::types::Range; -use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token}; - bitflags! { #[derive(Default)] pub struct TokenFlags: u8 { @@ -30,37 +27,34 @@ bitflags! { #[derive(Clone)] pub struct LogicalLines<'a> { - text: String, - - /// start position, token, end position - tokens: Vec<(Location, &'a Tok, Location)>, - - mappings: Mappings, - + tokens: Tokens, lines: Vec, + locator: &'a Locator<'a>, } impl<'a> LogicalLines<'a> { - pub fn from_tokens(tokens: &'a [LexResult], locator: &Locator) -> Self { + pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self { assert!(u32::try_from(tokens.len()).is_ok()); let single_token = tokens.len() == 1; - let mut builder = - LogicalLinesBuilder::with_capacity(tokens.len(), locator.contents().len()); + let mut builder = LogicalLinesBuilder::with_capacity(tokens.len()); let mut parens: u32 = 0; for (start, token, end) in tokens.iter().flatten() { - builder.push_token(*start, token, *end, locator); + let token_kind = TokenKind::from_token(token); + builder.push_token(*start, token, *end); - match token { - Tok::Lbrace | Tok::Lpar | Tok::Lsqb => { + match token_kind { + TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => { parens += 1; } - Tok::Rbrace | Tok::Rpar | Tok::Rsqb => { + TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => { parens -= 1; } - Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(_) if parens == 0 => { - if matches!(token, Tok::Newline) { + TokenKind::Newline | TokenKind::NonLogicalNewline | TokenKind::Comment + if parens == 0 => + { + if token_kind == TokenKind::Newline { builder.finish_line(); } // Comment only file or non logical new line? @@ -74,7 +68,7 @@ impl<'a> LogicalLines<'a> { } } - builder.finish() + builder.finish(locator) } } @@ -98,20 +92,6 @@ impl<'a> IntoIterator for &'a LogicalLines<'a> { } } -#[derive(Debug, Clone)] -struct Line { - flags: TokenFlags, - /// Byte offset of the start of the text of this line. - text_start: u32, - - /// Byte offset of the end of the text of this line. - text_end: u32, - mappings_start: u32, - mappings_end: u32, - tokens_start: u32, - tokens_end: u32, -} - #[derive(Debug)] pub struct LogicalLine<'a> { lines: &'a LogicalLines<'a>, @@ -120,54 +100,51 @@ pub struct LogicalLine<'a> { impl<'a> LogicalLine<'a> { /// Returns true if this is a comment only line - pub fn is_comment(&self) -> bool { - self.text().is_empty() && self.flags().contains(TokenFlags::COMMENT) + pub fn is_comment_only(&self) -> bool { + self.flags() == TokenFlags::COMMENT && self.tokens().trimmed().is_empty() } - /// Returns the text of this line + /// Returns logical line's text including comments, indents, dedent and trailing new lines. pub fn text(&self) -> &'a str { - &self.lines.text[self.line.text_start as usize..self.line.text_end as usize] - } + let tokens = self.tokens().trimmed(); - /// Returns the tokens of the line - pub fn tokens(&self) -> &'a [(Location, &'a Tok, Location)] { - &self.lines.tokens[self.line.tokens_start as usize..self.line.tokens_end as usize] - } - - /// Returns the [`Location`] of the first token on the line or [`None`]. - pub fn first_token_location(&self) -> Option<&Location> { - self.token_locations().first() + match (tokens.first(), tokens.last()) { + (Some(first), Some(last)) => { + let locator = self.lines.locator; + locator.slice(Range::new(first.start(), last.end())) + } + _ => "", + } } - fn token_offsets(&self) -> &[u32] { - &self.lines.mappings.logical_line_offsets - [self.line.mappings_start as usize..self.line.mappings_end as usize] - } + /// Returns the text without any leading or trailing newline, comment, indent, or dedent of this line + pub fn text_trimmed(&self) -> &'a str { + let trimmed = self.tokens().trimmed(); - fn token_locations(&self) -> &[Location] { - &self.lines.mappings.locations - [self.line.mappings_start as usize..self.line.mappings_end as usize] + match (trimmed.first(), trimmed.last()) { + (Some(first), Some(last)) => { + let locator = self.lines.locator; + locator.slice(Range::new(first.start(), last.end())) + } + _ => "", + } } - /// Returns the mapping for an offset in the logical line. - /// - /// The offset of the closest token and its corresponding location. - pub fn mapping(&self, offset: usize) -> (usize, Location) { - let index = self - .token_offsets() - .binary_search(&(self.line.text_start + u32::try_from(offset).unwrap())) - .unwrap_or_default(); - - ( - (self.token_offsets()[index] - self.line.text_start) as usize, - self.token_locations()[index], - ) + /// Returns all tokens of the line, including comments and trailing new lines. + pub fn tokens(&self) -> LogicalLineTokens<'a> { + LogicalLineTokens { + tokens: &self.lines.tokens, + front: self.line.tokens_start, + back: self.line.tokens_end, + } } - pub fn is_empty(&self) -> bool { - self.lines.mappings.is_empty() + /// Returns the [`Location`] of the first token on the line or [`None`]. + pub fn first_token_location(&self) -> Option { + self.tokens().first().map(|t| t.start()) } + /// Returns the line's flags pub const fn flags(&self) -> TokenFlags { self.line.flags } @@ -223,160 +200,259 @@ impl ExactSizeIterator for LogicalLinesIter<'_> {} impl FusedIterator for LogicalLinesIter<'_> {} -/// Source map that maps byte positions in the logical line text to the [`Location`] in the -/// original document. -#[derive(Debug, Default, Clone)] -struct Mappings { - /// byte offsets of the logical lines at which tokens start/end. - logical_line_offsets: Vec, +/// The tokens of a logical line +pub struct LogicalLineTokens<'a> { + tokens: &'a Tokens, + front: u32, + back: u32, +} + +impl<'a> LogicalLineTokens<'a> { + pub fn iter(&self) -> LogicalLineTokensIter<'a> { + LogicalLineTokensIter { + tokens: self.tokens, + front: self.front, + back: self.back, + } + } + + pub fn len(&self) -> usize { + (self.back - self.front) as usize + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn trimmed(&self) -> LogicalLineTokens<'a> { + let mut front = self.front; + let mut back = self.back; + + while front < back { + let kind = self.tokens.kinds[front as usize]; + + if !matches!( + kind, + TokenKind::Newline + | TokenKind::NonLogicalNewline + | TokenKind::Indent + | TokenKind::Dedent + | TokenKind::Comment + ) { + break; + } + + front += 1; + } + + while front < back { + let kind = self.tokens.kinds[back as usize - 1]; + + if !matches!( + kind, + TokenKind::Newline + | TokenKind::NonLogicalNewline + | TokenKind::Indent + | TokenKind::Dedent + | TokenKind::Comment + ) { + break; + } + back -= 1; + } + + LogicalLineTokens { + tokens: self.tokens, + front, + back, + } + } + + pub fn first(&self) -> Option> { + self.iter().next() + } - /// Corresponding [`Location`]s for each byte offset mapping it to the position in the orignal document. - locations: Vec, + pub fn last(&self) -> Option> { + self.iter().next_back() + } } -impl Mappings { - fn with_capacity(capacity: usize) -> Self { - Self { - logical_line_offsets: Vec::with_capacity(capacity), - locations: Vec::with_capacity(capacity), +impl<'a> IntoIterator for LogicalLineTokens<'a> { + type Item = LogicalLineToken<'a>; + type IntoIter = LogicalLineTokensIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> IntoIterator for &LogicalLineTokens<'a> { + type Item = LogicalLineToken<'a>; + type IntoIter = LogicalLineTokensIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl Debug for LogicalLineTokens<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + +pub struct LogicalLineTokensIter<'a> { + tokens: &'a Tokens, + front: u32, + back: u32, +} + +impl<'a> Iterator for LogicalLineTokensIter<'a> { + type Item = LogicalLineToken<'a>; + + fn next(&mut self) -> Option { + if self.front < self.back { + let result = Some(LogicalLineToken { + tokens: self.tokens, + position: self.front, + }); + + self.front += 1; + result + } else { + None } } - fn len(&self) -> usize { - self.logical_line_offsets.len() + fn size_hint(&self) -> (usize, Option) { + let len = (self.back - self.front) as usize; + (len, Some(len)) } +} - fn is_empty(&self) -> bool { - self.logical_line_offsets.is_empty() +impl ExactSizeIterator for LogicalLineTokensIter<'_> {} + +impl FusedIterator for LogicalLineTokensIter<'_> {} + +impl DoubleEndedIterator for LogicalLineTokensIter<'_> { + fn next_back(&mut self) -> Option { + if self.front < self.back { + self.back -= 1; + Some(LogicalLineToken { + position: self.back, + tokens: self.tokens, + }) + } else { + None + } } +} - fn truncate(&mut self, len: usize) { - self.locations.truncate(len); - self.logical_line_offsets.truncate(len); +/// A token of a logical line +#[derive(Clone)] +pub struct LogicalLineToken<'a> { + tokens: &'a Tokens, + position: u32, +} + +impl<'a> LogicalLineToken<'a> { + /// Returns the token's kind + pub fn kind(&self) -> TokenKind { + #[allow(unsafe_code)] + unsafe { + *self.tokens.kinds.get_unchecked(self.position as usize) + } } - #[allow(clippy::cast_possible_truncation)] - fn push(&mut self, offset: usize, location: Location) { - self.logical_line_offsets.push(offset as u32); - self.locations.push(location); + /// Returns the token's start location + pub fn start(&self) -> Location { + self.range().0 + } + + /// Returns the token's end location + pub fn end(&self) -> Location { + self.range().1 + } + + /// Returns a tuple with the token's `(start, end)` locations + pub fn range(&self) -> (Location, Location) { + #[allow(unsafe_code)] + let &(start, end) = unsafe { self.tokens.locations.get_unchecked(self.position as usize) }; + + (start, end) + } +} + +impl Debug for LogicalLineToken<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("LogicalLineToken") + .field("kind", &self.kind()) + .field("range", &self.range()) + .finish() } } #[derive(Debug, Default)] struct CurrentLine { flags: TokenFlags, - text_start: u32, - mappings_start: u32, tokens_start: u32, - previous_token: Option, } #[derive(Debug, Default)] -pub struct LogicalLinesBuilder<'a> { - text: String, - tokens: Vec<(Location, &'a Tok, Location)>, - mappings: Mappings, +pub struct LogicalLinesBuilder { + tokens: Tokens, lines: Vec, current_line: Option, } -impl<'a> LogicalLinesBuilder<'a> { - fn with_capacity(tokens: usize, string: usize) -> Self { +impl LogicalLinesBuilder { + fn with_capacity(tokens: usize) -> Self { Self { - tokens: Vec::with_capacity(tokens), - mappings: Mappings::with_capacity(tokens + 1), - text: String::with_capacity(string), + tokens: Tokens::with_capacity(tokens), ..Self::default() } } // SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long #[allow(clippy::cast_possible_truncation)] - fn push_token(&mut self, start: Location, token: &'a Tok, end: Location, locator: &Locator) { + fn push_token(&mut self, start: Location, token: &Tok, end: Location) { let tokens_start = self.tokens.len(); - self.tokens.push((start, token, end)); - - let mut line = self.current_line.get_or_insert_with(|| { - let mappings_start = self.mappings.len(); - self.mappings.push(self.text.len(), start); - - CurrentLine { - flags: TokenFlags::empty(), - text_start: self.text.len() as u32, - mappings_start: mappings_start as u32, - tokens_start: tokens_start as u32, - previous_token: None, - } - }); + let token_kind = TokenKind::from_token(token); - if matches!( - token, - Tok::Newline | Tok::NonLogicalNewline | Tok::Indent | Tok::Dedent - ) { - return; - } + let line = self.current_line.get_or_insert_with(|| CurrentLine { + flags: TokenFlags::empty(), + tokens_start: tokens_start as u32, + }); - if matches!(token, Tok::Comment(..)) { + if matches!(token_kind, TokenKind::Comment) { line.flags.insert(TokenFlags::COMMENT); - return; - } - - if is_op_token(token) { + } else if token_kind.is_operator() { line.flags.insert(TokenFlags::OPERATOR); + + line.flags.set( + TokenFlags::BRACKET, + matches!( + token_kind, + TokenKind::Lpar + | TokenKind::Lsqb + | TokenKind::Lbrace + | TokenKind::Rpar + | TokenKind::Rsqb + | TokenKind::Rbrace + ), + ); } if matches!( - token, - Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace + token_kind, + TokenKind::Comma | TokenKind::Semi | TokenKind::Colon ) { - line.flags.insert(TokenFlags::BRACKET); - } - - if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) { line.flags.insert(TokenFlags::PUNCTUATION); - } - - if is_keyword_token(token) { + } else if token_kind.is_keyword() { line.flags.insert(TokenFlags::KEYWORD); } - // TODO(charlie): "Mute" strings. - let text = if let Tok::String { value, .. } = token { - // Replace the content of strings with a non-whs sequence because some lints - // search for whitespace in the document and whitespace inside of the strinig - // would complicate the search. - Cow::Owned(format!("\"{}\"", "x".repeat(value.width()))) - } else { - Cow::Borrowed(locator.slice(Range { - location: start, - end_location: end, - })) - }; - - if let Some(prev) = line.previous_token.take() { - if prev.row() != start.row() { - let prev_text = locator.slice(Range { - location: Location::new(prev.row(), prev.column() - 1), - end_location: Location::new(prev.row(), prev.column()), - }); - if prev_text == "," - || ((prev_text != "{" && prev_text != "[" && prev_text != "(") - && (text != "}" && text != "]" && text != ")")) - { - self.text.push(' '); - } - } else if prev.column() != start.column() { - let prev_text = locator.slice(Range { - location: prev, - end_location: start, - }); - self.text.push_str(prev_text); - } - } - - line.previous_token = Some(end); - self.text.push_str(&text); - self.mappings.push(self.text.len(), end); + self.tokens.push(token_kind, start, end); } // SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long @@ -385,10 +461,6 @@ impl<'a> LogicalLinesBuilder<'a> { if let Some(current) = self.current_line.take() { self.lines.push(Line { flags: current.flags, - text_start: current.text_start, - text_end: self.text.len() as u32, - mappings_start: current.mappings_start, - mappings_end: self.mappings.len() as u32, tokens_start: current.tokens_start, tokens_end: self.tokens.len() as u32, }); @@ -397,20 +469,55 @@ impl<'a> LogicalLinesBuilder<'a> { fn discard_line(&mut self) { if let Some(current) = self.current_line.take() { - self.text.truncate(current.text_start as usize); self.tokens.truncate(current.tokens_start as usize); - self.mappings.truncate(current.mappings_start as usize); } } - fn finish(mut self) -> LogicalLines<'a> { + fn finish<'a>(mut self, locator: &'a Locator<'a>) -> LogicalLines<'a> { self.finish_line(); LogicalLines { - text: self.text, tokens: self.tokens, - mappings: self.mappings, lines: self.lines, + locator, + } + } +} + +#[derive(Debug, Clone)] +struct Line { + flags: TokenFlags, + tokens_start: u32, + tokens_end: u32, +} + +#[derive(Debug, Clone, Default)] +struct Tokens { + /// Stores the kinds in a separate vec because most checkers first scan for a specific kind. + /// This speeds up scanning because it avoids loading the start, end locations in the L1 cache. + kinds: Vec, + locations: Vec<(Location, Location)>, +} + +impl Tokens { + fn with_capacity(capacity: usize) -> Self { + Self { + kinds: Vec::with_capacity(capacity), + locations: Vec::with_capacity(capacity), } } + + fn len(&self) -> usize { + self.kinds.len() + } + + fn truncate(&mut self, len: usize) { + self.kinds.truncate(len); + self.locations.truncate(len); + } + + fn push(&mut self, kind: TokenKind, start: Location, end: Location) { + self.kinds.push(kind); + self.locations.push((start, end)); + } } diff --git a/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs b/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs index 8f4556b80459c3..f7e019b1524b2b 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/extraneous_whitespace.rs @@ -5,11 +5,13 @@ use regex::Regex; use rustpython_parser::ast::Location; use rustpython_parser::Tok; +use crate::rules::pycodestyle::logical_lines::LogicalLineTokens; use crate::rules::pycodestyle::rules::Whitespace; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::source_code::Locator; +use ruff_python_ast::token_kind::TokenKind; /// ## What it does /// Checks for the use of extraneous whitespace after "(". @@ -108,16 +110,18 @@ impl Violation for WhitespaceBeforePunctuation { /// E201, E202, E203 #[cfg(feature = "logical_lines")] pub fn extraneous_whitespace( - tokens: &[(Location, &Tok, Location)], + tokens: LogicalLineTokens, locator: &Locator, ) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; - let mut last_token: Option<&Tok> = None; + let mut last_token: Option = None; - for (start, token, end) in tokens { - match token { - Tok::Lbrace | Tok::Lpar | Tok::Lsqb => { - let after = &locator.contents()[locator.offset(*end)..]; + for token in tokens { + let kind = token.kind(); + match kind { + TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => { + let end = token.end(); + let after = &locator.contents()[locator.offset(end)..]; if !matches!(Whitespace::leading(after), Whitespace::None) { diagnostics.push(( @@ -126,19 +130,26 @@ pub fn extraneous_whitespace( )); } } - Tok::Rbrace | Tok::Rpar | Tok::Rsqb | Tok::Comma | Tok::Semi | Tok::Colon => { - let before = &locator.contents()[..locator.offset(*start)]; - - let diagnostic_kind = if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) { - DiagnosticKind::from(WhitespaceBeforePunctuation) - } else { - DiagnosticKind::from(WhitespaceBeforeCloseBracket) - }; + TokenKind::Rbrace + | TokenKind::Rpar + | TokenKind::Rsqb + | TokenKind::Comma + | TokenKind::Semi + | TokenKind::Colon => { + let start = token.start(); + let before = &locator.contents()[..locator.offset(start)]; + + let diagnostic_kind = + if matches!(kind, TokenKind::Comma | TokenKind::Semi | TokenKind::Colon) { + DiagnosticKind::from(WhitespaceBeforePunctuation) + } else { + DiagnosticKind::from(WhitespaceBeforeCloseBracket) + }; match Whitespace::trailing(before) { (Whitespace::None, _) => {} (_, offset) => { - if !matches!(last_token, Some(Tok::Comma)) { + if !matches!(last_token, Some(TokenKind::Comma)) { diagnostics.push(( Location::new(start.row(), start.column() - offset), diagnostic_kind, @@ -151,13 +162,16 @@ pub fn extraneous_whitespace( _ => {} } - last_token = Some(token); + last_token = Some(kind); } diagnostics } #[cfg(not(feature = "logical_lines"))] -pub fn extraneous_whitespace(_line: &str) -> Vec<(usize, DiagnosticKind)> { +pub fn extraneous_whitespace( + _tokens: LogicalLineTokens, + locator: &Locator, +) -> Vec<(usize, DiagnosticKind)> { vec![] } diff --git a/crates/ruff/src/rules/pycodestyle/rules/indentation.rs b/crates/ruff/src/rules/pycodestyle/rules/indentation.rs index 9d7d7b3c93d558..d4f37512ede09c 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/indentation.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/indentation.rs @@ -3,6 +3,8 @@ use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; +use rustpython_parser::ast::Location; use crate::rules::pycodestyle::logical_lines::LogicalLine; @@ -238,25 +240,29 @@ pub fn indentation( indent_level: usize, prev_indent_level: Option, indent_size: usize, -) -> Vec<(usize, DiagnosticKind)> { +) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; + + let location = logical_line.first_token_location().unwrap(); + if indent_level % indent_size != 0 { diagnostics.push(( - 0, - if logical_line.is_comment() { + location, + if logical_line.is_comment_only() { IndentationWithInvalidMultipleComment { indent_size }.into() } else { IndentationWithInvalidMultiple { indent_size }.into() }, )); } - let indent_expect = prev_logical_line.map_or(false, |prev_logical_line| { - prev_logical_line.text().ends_with(':') - }); + let indent_expect = prev_logical_line + .and_then(|prev_logical_line| prev_logical_line.tokens().trimmed().last()) + .map_or(false, |t| t.kind() == TokenKind::Colon); + if indent_expect && indent_level <= prev_indent_level.unwrap_or(0) { diagnostics.push(( - 0, - if logical_line.is_comment() { + location, + if logical_line.is_comment_only() { NoIndentedBlockComment.into() } else { NoIndentedBlock.into() @@ -266,8 +272,8 @@ pub fn indentation( && prev_indent_level.map_or(false, |prev_indent_level| indent_level > prev_indent_level) { diagnostics.push(( - 0, - if logical_line.is_comment() { + location, + if logical_line.is_comment_only() { UnexpectedIndentationComment.into() } else { UnexpectedIndentation.into() @@ -278,9 +284,10 @@ pub fn indentation( let expected_indent_amount = if indent_char == '\t' { 8 } else { 4 }; let expected_indent_level = prev_indent_level.unwrap_or(0) + expected_indent_amount; if indent_level > expected_indent_level { - diagnostics.push((0, OverIndented.into())); + diagnostics.push((location, OverIndented.into())); } } + diagnostics } diff --git a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace.rs b/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace.rs index 8e413739bac81f..8e0c41f8417ebb 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace.rs @@ -3,10 +3,14 @@ use itertools::Itertools; use rustpython_parser::ast::Location; +use crate::rules::pycodestyle::logical_lines::LogicalLineTokens; +use crate::rules::pycodestyle::rules::Whitespace; use ruff_diagnostics::Edit; use ruff_diagnostics::Violation; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::source_code::Locator; +use ruff_python_ast::token_kind::TokenKind; use ruff_python_ast::types::Range; #[violation] @@ -30,10 +34,9 @@ impl AlwaysAutofixableViolation for MissingWhitespace { /// E231 #[cfg(feature = "logical_lines")] pub fn missing_whitespace( - line: &str, - row: usize, + tokens: LogicalLineTokens, + locator: &Locator, autofix: bool, - indent_level: usize, ) -> Vec { let mut diagnostics = vec![]; @@ -41,49 +44,58 @@ pub fn missing_whitespace( let mut num_rsqb = 0u32; let mut prev_lsqb = None; let mut prev_lbrace = None; - for (idx, (char, next_char)) in line.chars().tuple_windows().enumerate() { - match char { - '[' => { + + for (token, next_token) in tokens.iter().tuple_windows() { + let kind = token.kind(); + match kind { + TokenKind::Lsqb => { num_lsqb += 1; - prev_lsqb = Some(idx); + prev_lsqb = Some(token.start()); } - ']' => { + TokenKind::Rsqb => { num_rsqb += 1; } - '{' => { - prev_lbrace = Some(idx); + TokenKind::Lbrace => { + prev_lbrace = Some(token.start()); } - ',' | ';' | ':' if !next_char.is_whitespace() => { - if char == ':' && num_lsqb > num_rsqb && prev_lsqb > prev_lbrace { - continue; // Slice syntax, no space required - } - if char == ',' && matches!(next_char, ')' | ']') { - continue; // Allow tuple with only one element: (3,) - } - if char == ':' && next_char == '=' { - continue; // Allow assignment expression - } + TokenKind::Comma | TokenKind::Semi | TokenKind::Colon => { + let (start, end) = token.range(); + + let end_offset = locator.offset(end); + let after = &locator.contents()[end_offset..]; + + if !after.chars().next().map_or(false, |c| c.is_whitespace()) { + match (kind, next_token.kind()) { + (TokenKind::Colon, _) if num_lsqb > num_rsqb && prev_lsqb > prev_lbrace => { + continue; // Slice syntax, no space required + } + (TokenKind::Comma, TokenKind::Rpar | TokenKind::Rsqb) => { + continue; // Allow tuple with only one element: (3,) + } + (TokenKind::Colon, TokenKind::Equal) => { + continue; // Allow assignment expression + } + _ => {} + } - let kind = MissingWhitespace { - token: char.to_string(), - }; + let kind = MissingWhitespace { + token: match kind { + TokenKind::Comma => ",", + TokenKind::Semi => ";", + TokenKind::Colon => ":", + _ => unreachable!(), + } + .to_string(), + }; - let mut diagnostic = Diagnostic::new( - kind, - Range::new( - Location::new(row, indent_level + idx), - Location::new(row, indent_level + idx), - ), - ); + let mut diagnostic = Diagnostic::new(kind, Range::new(start, start)); - if autofix { - diagnostic.amend(Edit::insertion( - " ".to_string(), - Location::new(row, indent_level + idx + 1), - )); + if autofix { + diagnostic.amend(Edit::insertion(" ".to_string(), end)); + } + diagnostics.push(diagnostic); } - diagnostics.push(diagnostic); } _ => {} } @@ -93,10 +105,9 @@ pub fn missing_whitespace( #[cfg(not(feature = "logical_lines"))] pub fn missing_whitespace( - _line: &str, - _row: usize, + tokens: LogicalLineTokens, + locator: &Locator, _autofix: bool, - indent_level: usize, ) -> Vec { vec![] } diff --git a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_after_keyword.rs b/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_after_keyword.rs index 7b20f573135fb2..f670a7cae737a4 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_after_keyword.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_after_keyword.rs @@ -1,13 +1,16 @@ #![allow(dead_code, unused_imports, unused_variables)] +use itertools::Itertools; use rustpython_parser::ast::Location; use rustpython_parser::Tok; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; use crate::rules::pycodestyle::helpers::{is_keyword_token, is_singleton_token}; +use crate::rules::pycodestyle::logical_lines::LogicalLineTokens; #[violation] pub struct MissingWhitespaceAfterKeyword; @@ -22,22 +25,23 @@ impl Violation for MissingWhitespaceAfterKeyword { /// E275 #[cfg(feature = "logical_lines")] pub fn missing_whitespace_after_keyword( - tokens: &[(Location, &Tok, Location)], + tokens: LogicalLineTokens, ) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; - for (tok0, tok1) in tokens.iter().zip(&tokens[1..]) { - if tok0.2 == tok1.0 - && is_keyword_token(tok0.1) - && !is_singleton_token(tok0.1) - && *tok0.1 != Tok::Async - && *tok0.1 != Tok::Await - && !(*tok0.1 == Tok::Except && *tok1.1 == Tok::Star) - && !(*tok0.1 == Tok::Yield && *tok1.1 == Tok::Rpar) - && *tok1.1 != Tok::Colon - && *tok1.1 != Tok::Newline + for (tok0, tok1) in tokens.iter().zip(tokens.iter().skip(1)) { + let tok0_kind = tok0.kind(); + let tok1_kind = tok1.kind(); + + if tok0_kind.is_keyword() + && !tok0_kind.is_singleton() + && !matches!(tok0_kind, TokenKind::Async | TokenKind::Await) + && !(tok0_kind == TokenKind::Except && tok1_kind == TokenKind::Star) + && !(tok0_kind == TokenKind::Yield && tok1_kind == TokenKind::Rpar) + && !matches!(tok1_kind, TokenKind::Colon | TokenKind::Newline) + && tok0.end() == tok1.start() { - diagnostics.push((tok0.2, MissingWhitespaceAfterKeyword.into())); + diagnostics.push((tok0.end(), MissingWhitespaceAfterKeyword.into())); } } diagnostics @@ -45,7 +49,7 @@ pub fn missing_whitespace_after_keyword( #[cfg(not(feature = "logical_lines"))] pub fn missing_whitespace_after_keyword( - _tokens: &[(Location, &Tok, Location)], + _tokens: LogicalLineTokens, ) -> Vec<(Location, DiagnosticKind)> { vec![] } diff --git a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_around_operator.rs b/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_around_operator.rs index d80b20a2e9f595..f9419dd187aed9 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_around_operator.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_around_operator.rs @@ -6,11 +6,13 @@ use rustpython_parser::Tok; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; use crate::rules::pycodestyle::helpers::{ is_arithmetic_token, is_keyword_token, is_op_token, is_singleton_token, is_skip_comment_token, is_soft_keyword_token, is_unary_token, is_ws_needed_token, is_ws_optional_token, }; +use crate::rules::pycodestyle::logical_lines::LogicalLineTokens; // E225 #[violation] @@ -60,50 +62,53 @@ impl Violation for MissingWhitespaceAroundModuloOperator { #[cfg(feature = "logical_lines")] #[allow(clippy::if_same_then_else)] pub fn missing_whitespace_around_operator( - tokens: &[(Location, &Tok, Location)], + tokens: LogicalLineTokens, ) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; let mut needs_space_main: Option = Some(false); let mut needs_space_aux: Option = None; - let mut prev_end_aux: Option<&Location> = None; + let mut prev_end_aux: Option = None; let mut parens = 0u32; - let mut prev_type: Option<&Tok> = None; - let mut prev_end: Option<&Location> = None; + let mut prev_type: Option = None; + let mut prev_end: Option = None; - for (start, token, end) in tokens { - if is_skip_comment_token(token) { + for token in tokens { + let kind = token.kind(); + + if kind.is_skip_comment() { continue; } - if **token == Tok::Lpar || **token == Tok::Lambda { - parens += 1; - } else if **token == Tok::Rpar { - parens -= 1; - } + match kind { + TokenKind::Lpar | TokenKind::Lambda => parens += 1, + TokenKind::Rpar => parens -= 1, + _ => {} + }; + let needs_space = (needs_space_main.is_some() && needs_space_main.unwrap()) || needs_space_aux.is_some() || prev_end_aux.is_some(); if needs_space { - if Some(start) != prev_end { + if Some(token.start()) != prev_end { if !(needs_space_main.is_some() && needs_space_main.unwrap()) && (needs_space_aux.is_none() || !needs_space_aux.unwrap()) { diagnostics.push(( - *(prev_end_aux.unwrap()), + prev_end_aux.unwrap(), MissingWhitespaceAroundOperator.into(), )); } needs_space_main = Some(false); needs_space_aux = None; prev_end_aux = None; - } else if **token == Tok::Greater - && (prev_type == Some(&Tok::Less) || prev_type == Some(&Tok::Minus)) + } else if kind == TokenKind::Greater + && matches!(prev_type, Some(TokenKind::Less | TokenKind::Minus)) { // Tolerate the "<>" operator, even if running Python 3 // Deal with Python 3's annotated return value "->" - } else if prev_type == Some(&Tok::Slash) - && (**token == Tok::Comma || **token == Tok::Rpar || **token == Tok::Colon) - || (prev_type == Some(&Tok::Rpar) && **token == Tok::Colon) + } else if prev_type == Some(TokenKind::Slash) + && matches!(kind, TokenKind::Comma | TokenKind::Rpar | TokenKind::Colon) + || (prev_type == Some(TokenKind::Rpar) && kind == TokenKind::Colon) { // Tolerate the "/" operator in function definition // For more info see PEP570 @@ -111,22 +116,21 @@ pub fn missing_whitespace_around_operator( if (needs_space_main.is_some() && needs_space_main.unwrap()) || (needs_space_aux.is_some() && needs_space_aux.unwrap()) { - diagnostics - .push((*(prev_end.unwrap()), MissingWhitespaceAroundOperator.into())); - } else if prev_type != Some(&Tok::DoubleStar) { - if prev_type == Some(&Tok::Percent) { + diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into())); + } else if prev_type != Some(TokenKind::DoubleStar) { + if prev_type == Some(TokenKind::Percent) { diagnostics.push(( - *(prev_end_aux.unwrap()), + prev_end_aux.unwrap(), MissingWhitespaceAroundModuloOperator.into(), )); - } else if !is_arithmetic_token(prev_type.unwrap()) { + } else if !prev_type.unwrap().is_arithmetic() { diagnostics.push(( - *(prev_end_aux.unwrap()), + prev_end_aux.unwrap(), MissingWhitespaceAroundBitwiseOrShiftOperator.into(), )); } else { diagnostics.push(( - *(prev_end_aux.unwrap()), + prev_end_aux.unwrap(), MissingWhitespaceAroundArithmeticOperator.into(), )); } @@ -135,30 +139,30 @@ pub fn missing_whitespace_around_operator( needs_space_aux = None; prev_end_aux = None; } - } else if (is_op_token(token) || matches!(token, Tok::Name { .. })) && prev_end.is_some() { - if **token == Tok::Equal && parens > 0 { + } else if (kind.is_operator() || matches!(kind, TokenKind::Name)) && prev_end.is_some() { + if kind == TokenKind::Equal && parens > 0 { // Allow keyword args or defaults: foo(bar=None). - } else if is_ws_needed_token(token) { + } else if kind.is_whitespace_needed() { needs_space_main = Some(true); needs_space_aux = None; prev_end_aux = None; - } else if is_unary_token(token) { + } else if kind.is_unary() { // Check if the operator is used as a binary operator // Allow unary operators: -123, -x, +1. // Allow argument unpacking: foo(*args, **kwargs) - if (prev_type.is_some() - && is_op_token(prev_type.unwrap()) - && (prev_type == Some(&Tok::Rpar) - || prev_type == Some(&Tok::Rsqb) - || prev_type == Some(&Tok::Rbrace))) - || (!is_op_token(prev_type.unwrap()) && !is_keyword_token(prev_type.unwrap())) - && (!is_soft_keyword_token(prev_type.unwrap())) - { - needs_space_main = None; - needs_space_aux = None; - prev_end_aux = None; + if let Some(prev_type) = prev_type { + if (matches!( + prev_type, + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace + )) || (!prev_type.is_operator() && !prev_type.is_keyword()) + && (!prev_type.is_soft_keyword()) + { + needs_space_main = None; + needs_space_aux = None; + prev_end_aux = None; + } } - } else if is_ws_optional_token(token) { + } else if kind.is_whitespace_optional() { needs_space_main = None; needs_space_aux = None; prev_end_aux = None; @@ -169,20 +173,20 @@ pub fn missing_whitespace_around_operator( // trailing space matches opening space needs_space_main = None; prev_end_aux = prev_end; - needs_space_aux = Some(Some(start) != prev_end_aux); + needs_space_aux = Some(Some(token.start()) != prev_end_aux); } else if needs_space_main.is_some() && needs_space_main.unwrap() - && Some(start) == prev_end_aux + && Some(token.start()) == prev_end_aux { // A needed opening space was not found - diagnostics.push((*(prev_end.unwrap()), MissingWhitespaceAroundOperator.into())); + diagnostics.push((prev_end.unwrap(), MissingWhitespaceAroundOperator.into())); needs_space_main = Some(false); needs_space_aux = None; prev_end_aux = None; } } - prev_type = Some(*token); - prev_end = Some(end); + prev_type = Some(kind); + prev_end = Some(token.end()); } diagnostics @@ -190,7 +194,7 @@ pub fn missing_whitespace_around_operator( #[cfg(not(feature = "logical_lines"))] pub fn missing_whitespace_around_operator( - _tokens: &[(Location, &Tok, Location)], + _tokens: LogicalLineTokens, ) -> Vec<(Location, DiagnosticKind)> { vec![] } diff --git a/crates/ruff/src/rules/pycodestyle/rules/mod.rs b/crates/ruff/src/rules/pycodestyle/rules/mod.rs index c35145bce356ca..46b6ffd198dd8b 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/mod.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/mod.rs @@ -87,6 +87,7 @@ mod whitespace_around_named_parameter_equals; mod whitespace_before_comment; mod whitespace_before_parameters; +#[derive(Copy, Clone, Eq, PartialEq)] enum Whitespace { None, Single, diff --git a/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs b/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs index 6c7abe0e7e59be..711de2a64ee8da 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/space_around_operator.rs @@ -6,11 +6,13 @@ use rustpython_parser::ast::Location; use rustpython_parser::Tok; use crate::rules::pycodestyle::helpers::{is_op_token, is_ws_needed_token}; +use crate::rules::pycodestyle::logical_lines::LogicalLineTokens; use crate::rules::pycodestyle::rules::Whitespace; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::source_code::Locator; +use ruff_python_ast::token_kind::TokenKind; use ruff_python_ast::types::Range; /// ## What it does @@ -132,14 +134,15 @@ impl Violation for MultipleSpacesAfterOperator { /// E221, E222, E223, E224 #[cfg(feature = "logical_lines")] pub fn space_around_operator( - tokens: &[(Location, &Tok, Location)], + tokens: LogicalLineTokens, locator: &Locator, ) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; - for (start, token, end) in tokens { - if is_operator_token(token) { - let start_offset = locator.offset(*start); + for token in tokens { + if token.kind().is_operator() { + let (start, end) = token.range(); + let start_offset = locator.offset(start); let before = &locator.contents()[..start_offset]; match Whitespace::trailing(before) { @@ -154,11 +157,11 @@ pub fn space_around_operator( _ => {} } - let end_offset = locator.offset(*end); + let end_offset = locator.offset(end); let after = &locator.contents()[end_offset..]; match Whitespace::leading(after) { - Whitespace::Tab => diagnostics.push((*end, TabAfterOperator.into())), - Whitespace::Many => diagnostics.push((*end, MultipleSpacesAfterOperator.into())), + Whitespace::Tab => diagnostics.push((end, TabAfterOperator.into())), + Whitespace::Many => diagnostics.push((end, MultipleSpacesAfterOperator.into())), _ => {} } } @@ -167,44 +170,47 @@ pub fn space_around_operator( diagnostics } -const fn is_operator_token(token: &Tok) -> bool { +const fn is_operator_token(token: TokenKind) -> bool { matches!( token, - Tok::Plus - | Tok::Minus - | Tok::Star - | Tok::Slash - | Tok::Vbar - | Tok::Amper - | Tok::Less - | Tok::Greater - | Tok::Equal - | Tok::Percent - | Tok::NotEqual - | Tok::LessEqual - | Tok::GreaterEqual - | Tok::CircumFlex - | Tok::LeftShift - | Tok::RightShift - | Tok::DoubleStar - | Tok::PlusEqual - | Tok::MinusEqual - | Tok::StarEqual - | Tok::SlashEqual - | Tok::PercentEqual - | Tok::AmperEqual - | Tok::VbarEqual - | Tok::CircumflexEqual - | Tok::LeftShiftEqual - | Tok::RightShiftEqual - | Tok::DoubleStarEqual - | Tok::DoubleSlash - | Tok::DoubleSlashEqual - | Tok::ColonEqual + TokenKind::Plus + | TokenKind::Minus + | TokenKind::Star + | TokenKind::Slash + | TokenKind::Vbar + | TokenKind::Amper + | TokenKind::Less + | TokenKind::Greater + | TokenKind::Equal + | TokenKind::Percent + | TokenKind::NotEqual + | TokenKind::LessEqual + | TokenKind::GreaterEqual + | TokenKind::CircumFlex + | TokenKind::LeftShift + | TokenKind::RightShift + | TokenKind::DoubleStar + | TokenKind::PlusEqual + | TokenKind::MinusEqual + | TokenKind::StarEqual + | TokenKind::SlashEqual + | TokenKind::PercentEqual + | TokenKind::AmperEqual + | TokenKind::VbarEqual + | TokenKind::CircumflexEqual + | TokenKind::LeftShiftEqual + | TokenKind::RightShiftEqual + | TokenKind::DoubleStarEqual + | TokenKind::DoubleSlash + | TokenKind::DoubleSlashEqual + | TokenKind::ColonEqual ) } #[cfg(not(feature = "logical_lines"))] -pub fn space_around_operator(_line: &str) -> Vec<(usize, DiagnosticKind)> { +pub fn space_around_operator( + _tokens: LogicalLineTokens, + locator: &Locator, +) -> Vec<(usize, DiagnosticKind)> { vec![] } diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs index 86257bd6f48248..1a12b65cd22d5a 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs @@ -6,11 +6,13 @@ use rustpython_parser::ast::Location; use rustpython_parser::Tok; use crate::rules::pycodestyle::helpers::is_keyword_token; +use crate::rules::pycodestyle::logical_lines::LogicalLineTokens; use crate::rules::pycodestyle::rules::Whitespace; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::source_code::Locator; +use ruff_python_ast::token_kind::TokenKind; /// ## What it does /// Checks for extraneous whitespace after keywords. @@ -118,14 +120,15 @@ impl Violation for TabBeforeKeyword { /// E271, E272, E273, E274 #[cfg(feature = "logical_lines")] pub fn whitespace_around_keywords( - tokens: &[(Location, &Tok, Location)], + tokens: LogicalLineTokens, locator: &Locator, ) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; - for (start, token, end) in tokens { - if is_keyword_token(token) { - let start_offset = locator.offset(*start); + for token in tokens { + if token.kind().is_keyword() { + let (start, end) = token.range(); + let start_offset = locator.offset(start); let before = &locator.contents()[..start_offset]; match Whitespace::trailing(before) { @@ -140,11 +143,11 @@ pub fn whitespace_around_keywords( _ => {} } - let end_offset = locator.offset(*end); + let end_offset = locator.offset(end); let after = &locator.contents()[end_offset..]; match Whitespace::leading(after) { - Whitespace::Tab => diagnostics.push((*end, TabAfterKeyword.into())), - Whitespace::Many => diagnostics.push((*end, MultipleSpacesAfterKeyword.into())), + Whitespace::Tab => diagnostics.push((end, TabAfterKeyword.into())), + Whitespace::Many => diagnostics.push((end, MultipleSpacesAfterKeyword.into())), _ => {} } } @@ -154,6 +157,9 @@ pub fn whitespace_around_keywords( } #[cfg(not(feature = "logical_lines"))] -pub fn whitespace_around_keywords(_line: &str) -> Vec<(usize, DiagnosticKind)> { +pub fn whitespace_around_keywords( + _tokens: LogicalLineTokens, + locator: &Locator, +) -> Vec<(usize, DiagnosticKind)> { vec![] } diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_named_parameter_equals.rs b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_named_parameter_equals.rs index 6c729df73eb2ea..127b80523f1c3e 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_named_parameter_equals.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_named_parameter_equals.rs @@ -8,9 +8,11 @@ use rustpython_parser::Tok; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; #[cfg(feature = "logical_lines")] use crate::rules::pycodestyle::helpers::is_op_token; +use crate::rules::pycodestyle::logical_lines::LogicalLineTokens; #[violation] pub struct UnexpectedSpacesAroundKeywordParameterEquals; @@ -32,11 +34,11 @@ impl Violation for MissingWhitespaceAroundParameterEquals { } } -fn is_in_def(tokens: &[(Location, &Tok, Location)]) -> bool { - for (_, tok, _) in tokens { - match tok { - Tok::Async | Tok::Indent | Tok::Dedent => continue, - Tok::Def => return true, +fn is_in_def(tokens: &LogicalLineTokens) -> bool { + for token in tokens { + match token.kind() { + TokenKind::Async | TokenKind::Indent | TokenKind::Dedent => continue, + TokenKind::Def => return true, _ => return false, } } @@ -47,74 +49,87 @@ fn is_in_def(tokens: &[(Location, &Tok, Location)]) -> bool { /// E251, E252 #[cfg(feature = "logical_lines")] pub fn whitespace_around_named_parameter_equals( - tokens: &[(Location, &Tok, Location)], + tokens: LogicalLineTokens, ) -> Vec<(Location, DiagnosticKind)> { let mut diagnostics = vec![]; let mut parens = 0; let mut require_space = false; let mut no_space = false; let mut annotated_func_arg = false; - let mut prev_end: Option<&Location> = None; + let mut prev_end: Option = None; - let in_def = is_in_def(tokens); + let in_def = is_in_def(&tokens); - for (start, token, end) in tokens { - if **token == Tok::NonLogicalNewline { + for token in tokens { + let kind = token.kind(); + + if kind == TokenKind::NonLogicalNewline { continue; } if no_space { no_space = false; - if Some(start) != prev_end { + if Some(token.start()) != prev_end { diagnostics.push(( - *(prev_end.unwrap()), + prev_end.unwrap(), UnexpectedSpacesAroundKeywordParameterEquals.into(), )); } } if require_space { require_space = false; + let start = token.start(); if Some(start) == prev_end { - diagnostics.push((*start, MissingWhitespaceAroundParameterEquals.into())); + diagnostics.push((start, MissingWhitespaceAroundParameterEquals.into())); } } - if is_op_token(token) { - if **token == Tok::Lpar || **token == Tok::Lsqb { - parens += 1; - } else if **token == Tok::Rpar || **token == Tok::Rsqb { - parens -= 1; - } else if in_def && **token == Tok::Colon && parens == 1 { - annotated_func_arg = true; - } else if parens == 1 && **token == Tok::Comma { - annotated_func_arg = false; - } else if parens > 0 && **token == Tok::Equal { - if annotated_func_arg && parens == 1 { - require_space = true; - if Some(start) == prev_end { - diagnostics.push((*start, MissingWhitespaceAroundParameterEquals.into())); - } - } else { - no_space = true; - if Some(start) != prev_end { - diagnostics.push(( - *(prev_end.unwrap()), - UnexpectedSpacesAroundKeywordParameterEquals.into(), - )); + if kind.is_operator() { + match kind { + TokenKind::Lpar | TokenKind::Lsqb => { + parens += 1; + } + TokenKind::Rpar | TokenKind::Rsqb => { + parens -= 1; + } + + TokenKind::Colon if parens == 1 && in_def => { + annotated_func_arg = true; + } + TokenKind::Comma if parens == 1 => { + annotated_func_arg = false; + } + TokenKind::Equal if parens > 0 => { + if annotated_func_arg && parens == 1 { + require_space = true; + let start = token.start(); + if Some(start) == prev_end { + diagnostics + .push((start, MissingWhitespaceAroundParameterEquals.into())); + } + } else { + no_space = true; + if Some(token.start()) != prev_end { + diagnostics.push(( + prev_end.unwrap(), + UnexpectedSpacesAroundKeywordParameterEquals.into(), + )); + } } } + _ => {} } if parens < 1 { annotated_func_arg = false; } } - prev_end = Some(end); + prev_end = Some(token.end()); } diagnostics } #[cfg(not(feature = "logical_lines"))] pub fn whitespace_around_named_parameter_equals( - _tokens: &[(Location, &Tok, Location)], + _tokens: LogicalLineTokens, _line: &str, ) -> Vec<(Location, DiagnosticKind)> { vec![] diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_comment.rs b/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_comment.rs index 293ebcd8e57a4c..a250149cfcd445 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_comment.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_comment.rs @@ -3,10 +3,12 @@ use rustpython_parser::ast::Location; use rustpython_parser::Tok; +use crate::rules::pycodestyle::logical_lines::LogicalLineTokens; use ruff_diagnostics::DiagnosticKind; use ruff_diagnostics::Violation; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::source_code::Locator; +use ruff_python_ast::token_kind::TokenKind; use ruff_python_ast::types::Range; /// ## What it does @@ -141,23 +143,28 @@ impl Violation for MultipleLeadingHashesForBlockComment { /// E261, E262, E265, E266 #[cfg(feature = "logical_lines")] pub fn whitespace_before_comment( - tokens: &[(Location, &Tok, Location)], + tokens: LogicalLineTokens, locator: &Locator, ) -> Vec<(Range, DiagnosticKind)> { let mut diagnostics = vec![]; let mut prev_end = Location::new(0, 0); - for (start, tok, end) in tokens { - if let Tok::Comment(text) = tok { + for token in tokens { + let kind = token.kind(); + + if let TokenKind::Comment = kind { + let (start, end) = token.range(); let line = locator.slice(Range::new( Location::new(start.row(), 0), Location::new(start.row(), start.column()), )); + let text = locator.slice(Range::new(start, end)); + let is_inline_comment = !line.trim().is_empty(); if is_inline_comment { if prev_end.row() == start.row() && start.column() < prev_end.column() + 2 { diagnostics.push(( - Range::new(prev_end, *start), + Range::new(prev_end, start), TooFewSpacesBeforeInlineComment.into(), )); } @@ -177,23 +184,22 @@ pub fn whitespace_before_comment( if is_inline_comment { if bad_prefix.is_some() || comment.chars().next().map_or(false, char::is_whitespace) { - diagnostics.push((Range::new(*start, *end), NoSpaceAfterInlineComment.into())); + diagnostics.push((Range::new(start, end), NoSpaceAfterInlineComment.into())); } } else if let Some(bad_prefix) = bad_prefix { if bad_prefix != '!' || start.row() > 1 { if bad_prefix != '#' { - diagnostics - .push((Range::new(*start, *end), NoSpaceAfterBlockComment.into())); + diagnostics.push((Range::new(start, end), NoSpaceAfterBlockComment.into())); } else if !comment.is_empty() { diagnostics.push(( - Range::new(*start, *end), + Range::new(start, end), MultipleLeadingHashesForBlockComment.into(), )); } } } - } else if !matches!(tok, Tok::NonLogicalNewline) { - prev_end = *end; + } else if !matches!(kind, TokenKind::NonLogicalNewline) { + prev_end = token.end(); } } diagnostics @@ -201,7 +207,7 @@ pub fn whitespace_before_comment( #[cfg(not(feature = "logical_lines"))] pub fn whitespace_before_comment( - _tokens: &[(Location, &Tok, Location)], + _tokens: LogicalLineTokens, _locator: &Locator, ) -> Vec<(Range, DiagnosticKind)> { vec![] diff --git a/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_parameters.rs b/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_parameters.rs index 542da9343cfc2f..41ab9993025741 100644 --- a/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_parameters.rs +++ b/crates/ruff/src/rules/pycodestyle/rules/whitespace_before_parameters.rs @@ -5,10 +5,12 @@ use rustpython_parser::Tok; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::token_kind::TokenKind; use ruff_python_ast::types::Range; use crate::registry::AsRule; use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token, is_soft_keyword_token}; +use crate::rules::pycodestyle::logical_lines::LogicalLineTokens; #[violation] pub struct WhitespaceBeforeParameters { @@ -30,27 +32,36 @@ impl AlwaysAutofixableViolation for WhitespaceBeforeParameters { /// E211 #[cfg(feature = "logical_lines")] -pub fn whitespace_before_parameters( - tokens: &[(Location, &Tok, Location)], - autofix: bool, -) -> Vec { +pub fn whitespace_before_parameters(tokens: LogicalLineTokens, autofix: bool) -> Vec { let mut diagnostics = vec![]; - let (_, mut prev_token, mut prev_end) = tokens.first().unwrap(); - for (idx, (start, tok, end)) in tokens.iter().enumerate() { - if is_op_token(tok) - && (**tok == Tok::Lpar || **tok == Tok::Lsqb) - && *start != prev_end - && (matches!(prev_token, Tok::Name { .. }) - || matches!(prev_token, Tok::Rpar | Tok::Rsqb | Tok::Rbrace)) - && (idx < 2 || *(tokens[idx - 2].1) != Tok::Class) - && !is_keyword_token(tok) - && !is_soft_keyword_token(tok) + let previous = tokens.first().unwrap(); + + let mut pre_pre_kind: Option = None; + let mut prev_token = previous.kind(); + let mut prev_end = previous.end(); + + for (idx, token) in tokens.iter().enumerate() { + let kind = token.kind(); + + if (kind == TokenKind::Lpar || kind == TokenKind::Lsqb) + && token.start() != prev_end + && matches!( + prev_token, + TokenKind::Name | TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace + ) + && (pre_pre_kind != Some(TokenKind::Class)) { let start = Location::new(prev_end.row(), prev_end.column()); + let end = token.end(); let end = Location::new(end.row(), end.column() - 1); let kind: WhitespaceBeforeParameters = WhitespaceBeforeParameters { - bracket: tok.to_string(), + bracket: if kind == TokenKind::Lpar { + "'('" + } else { + "'['" + } + .to_string(), }; let mut diagnostic = Diagnostic::new(kind, Range::new(start, end)); @@ -60,16 +71,14 @@ pub fn whitespace_before_parameters( } diagnostics.push(diagnostic); } - prev_token = *tok; - prev_end = *end; + pre_pre_kind = Some(prev_token); + prev_token = kind; + prev_end = token.end(); } diagnostics } #[cfg(not(feature = "logical_lines"))] -pub fn whitespace_before_parameters( - _tokens: &[(Location, &Tok, Location)], - _autofix: bool, -) -> Vec { +pub fn whitespace_before_parameters(_tokens: LogicalLineTokens, _autofix: bool) -> Vec { vec![] } diff --git a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E222_E22.py.snap b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E222_E22.py.snap index 27129520fa4059..4caf44678ae38c 100644 --- a/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E222_E22.py.snap +++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruff__rules__pycodestyle__tests__E222_E22.py.snap @@ -2,6 +2,58 @@ source: crates/ruff/src/rules/pycodestyle/mod.rs expression: diagnostics --- +- kind: + name: MultipleSpacesAfterOperator + body: Multiple spaces after operator + suggestion: ~ + fixable: false + location: + row: 9 + column: 4 + end_location: + row: 9 + column: 4 + fix: ~ + parent: ~ +- kind: + name: MultipleSpacesAfterOperator + body: Multiple spaces after operator + suggestion: ~ + fixable: false + location: + row: 10 + column: 4 + end_location: + row: 10 + column: 4 + fix: ~ + parent: ~ +- kind: + name: MultipleSpacesAfterOperator + body: Multiple spaces after operator + suggestion: ~ + fixable: false + location: + row: 13 + column: 8 + end_location: + row: 13 + column: 8 + fix: ~ + parent: ~ +- kind: + name: MultipleSpacesAfterOperator + body: Multiple spaces after operator + suggestion: ~ + fixable: false + location: + row: 15 + column: 8 + end_location: + row: 15 + column: 8 + fix: ~ + parent: ~ - kind: name: MultipleSpacesAfterOperator body: Multiple spaces after operator @@ -67,4 +119,17 @@ expression: diagnostics column: 6 fix: ~ parent: ~ +- kind: + name: MultipleSpacesAfterOperator + body: Multiple spaces after operator + suggestion: ~ + fixable: false + location: + row: 168 + column: 17 + end_location: + row: 168 + column: 17 + fix: ~ + parent: ~ diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index afb3a925586f4c..872547620fba29 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -12,6 +12,7 @@ pub mod relocate; pub mod scope; pub mod source_code; pub mod str; +pub mod token_kind; pub mod types; pub mod typing; pub mod visibility; diff --git a/crates/ruff_python_ast/src/token_kind.rs b/crates/ruff_python_ast/src/token_kind.rs new file mode 100644 index 00000000000000..74758dd2027c20 --- /dev/null +++ b/crates/ruff_python_ast/src/token_kind.rs @@ -0,0 +1,455 @@ +use rustpython_parser::Tok; + +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub enum TokenKind { + /// Token value for a name, commonly known as an identifier. + Name, + /// Token value for an integer. + Int, + /// Token value for a floating point number. + Float, + /// Token value for a complex number. + Complex, + /// Token value for a string. + String, + /// Token value for a comment. These are filtered out of the token stream prior to parsing. + Comment, + /// Token value for a newline. + Newline, + /// Token value for a newline that is not a logical line break. These are filtered out of + /// the token stream prior to parsing. + NonLogicalNewline, + /// Token value for an indent. + Indent, + /// Token value for a dedent. + Dedent, + EndOfFile, + /// Token value for a left parenthesis `(`. + Lpar, + /// Token value for a right parenthesis `)`. + Rpar, + /// Token value for a left square bracket `[`. + Lsqb, + /// Token value for a right square bracket `]`. + Rsqb, + /// Token value for a colon `:`. + Colon, + /// Token value for a comma `,`. + Comma, + /// Token value for a semicolon `;`. + Semi, + /// Token value for plus `+`. + Plus, + /// Token value for minus `-`. + Minus, + /// Token value for star `*`. + Star, + /// Token value for slash `/`. + Slash, + /// Token value for vertical bar `|`. + Vbar, + /// Token value for ampersand `&`. + Amper, + /// Token value for less than `<`. + Less, + /// Token value for greater than `>`. + Greater, + /// Token value for equal `=`. + Equal, + /// Token value for dot `.`. + Dot, + /// Token value for percent `%`. + Percent, + /// Token value for left bracket `{`. + Lbrace, + /// Token value for right bracket `}`. + Rbrace, + /// Token value for double equal `==`. + EqEqual, + /// Token value for not equal `!=`. + NotEqual, + /// Token value for less than or equal `<=`. + LessEqual, + /// Token value for greater than or equal `>=`. + GreaterEqual, + /// Token value for tilde `~`. + Tilde, + /// Token value for caret `^`. + CircumFlex, + /// Token value for left shift `<<`. + LeftShift, + /// Token value for right shift `>>`. + RightShift, + /// Token value for double star `**`. + DoubleStar, + /// Token value for double star equal `**=`. + DoubleStarEqual, + /// Token value for plus equal `+=`. + PlusEqual, + /// Token value for minus equal `-=`. + MinusEqual, + /// Token value for star equal `*=`. + StarEqual, + /// Token value for slash equal `/=`. + SlashEqual, + /// Token value for percent equal `%=`. + PercentEqual, + /// Token value for ampersand equal `&=`. + AmperEqual, + /// Token value for vertical bar equal `|=`. + VbarEqual, + /// Token value for caret equal `^=`. + CircumflexEqual, + /// Token value for left shift equal `<<=`. + LeftShiftEqual, + /// Token value for right shift equal `>>=`. + RightShiftEqual, + /// Token value for double slash `//`. + DoubleSlash, + /// Token value for double slash equal `//=`. + DoubleSlashEqual, + /// Token value for colon equal `:=`. + ColonEqual, + /// Token value for at `@`. + At, + /// Token value for at equal `@=`. + AtEqual, + /// Token value for arrow `->`. + Rarrow, + /// Token value for ellipsis `...`. + Ellipsis, + + // Self documenting. + // Keywords (alphabetically): + False, + None, + True, + + And, + As, + Assert, + Async, + Await, + Break, + Class, + Continue, + Def, + Del, + Elif, + Else, + Except, + Finally, + For, + From, + Global, + If, + Import, + In, + Is, + Lambda, + Nonlocal, + Not, + Or, + Pass, + Raise, + Return, + Try, + While, + Match, + Case, + With, + Yield, + + // RustPython specific. + StartModule, + StartInteractive, + StartExpression, +} + +impl TokenKind { + pub const fn is_whitespace_needed(&self) -> bool { + matches!( + self, + TokenKind::DoubleStarEqual + | TokenKind::StarEqual + | TokenKind::SlashEqual + | TokenKind::DoubleSlashEqual + | TokenKind::PlusEqual + | TokenKind::MinusEqual + | TokenKind::NotEqual + | TokenKind::Less + | TokenKind::Greater + | TokenKind::PercentEqual + | TokenKind::CircumflexEqual + | TokenKind::AmperEqual + | TokenKind::VbarEqual + | TokenKind::EqEqual + | TokenKind::LessEqual + | TokenKind::GreaterEqual + | TokenKind::LeftShiftEqual + | TokenKind::RightShiftEqual + | TokenKind::Equal + | TokenKind::And + | TokenKind::Or + | TokenKind::In + | TokenKind::Is + | TokenKind::Rarrow + ) + } + + pub const fn is_whitespace_optional(&self) -> bool { + self.is_arithmetic() + || matches!( + self, + TokenKind::CircumFlex + | TokenKind::Amper + | TokenKind::Vbar + | TokenKind::LeftShift + | TokenKind::RightShift + | TokenKind::Percent + ) + } + + pub const fn is_unary(&self) -> bool { + matches!( + self, + TokenKind::Plus + | TokenKind::Minus + | TokenKind::Star + | TokenKind::DoubleStar + | TokenKind::RightShift + ) + } + + pub const fn is_keyword(&self) -> bool { + matches!( + self, + TokenKind::False + | TokenKind::True + | TokenKind::None + | TokenKind::And + | TokenKind::As + | TokenKind::Assert + | TokenKind::Await + | TokenKind::Break + | TokenKind::Class + | TokenKind::Continue + | TokenKind::Def + | TokenKind::Del + | TokenKind::Elif + | TokenKind::Else + | TokenKind::Except + | TokenKind::Finally + | TokenKind::For + | TokenKind::From + | TokenKind::Global + | TokenKind::If + | TokenKind::Import + | TokenKind::In + | TokenKind::Is + | TokenKind::Lambda + | TokenKind::Nonlocal + | TokenKind::Not + | TokenKind::Or + | TokenKind::Pass + | TokenKind::Raise + | TokenKind::Return + | TokenKind::Try + | TokenKind::While + | TokenKind::With + | TokenKind::Yield + ) + } + + pub const fn is_operator(&self) -> bool { + matches!( + self, + TokenKind::Lpar + | TokenKind::Rpar + | TokenKind::Lsqb + | TokenKind::Rsqb + | TokenKind::Comma + | TokenKind::Semi + | TokenKind::Plus + | TokenKind::Minus + | TokenKind::Star + | TokenKind::Slash + | TokenKind::Vbar + | TokenKind::Amper + | TokenKind::Less + | TokenKind::Greater + | TokenKind::Equal + | TokenKind::Dot + | TokenKind::Percent + | TokenKind::Lbrace + | TokenKind::Rbrace + | TokenKind::NotEqual + | TokenKind::LessEqual + | TokenKind::GreaterEqual + | TokenKind::Tilde + | TokenKind::CircumFlex + | TokenKind::LeftShift + | TokenKind::RightShift + | TokenKind::DoubleStar + | TokenKind::PlusEqual + | TokenKind::MinusEqual + | TokenKind::StarEqual + | TokenKind::SlashEqual + | TokenKind::PercentEqual + | TokenKind::AmperEqual + | TokenKind::VbarEqual + | TokenKind::CircumflexEqual + | TokenKind::LeftShiftEqual + | TokenKind::RightShiftEqual + | TokenKind::DoubleStarEqual + | TokenKind::DoubleSlash + | TokenKind::DoubleSlashEqual + | TokenKind::At + | TokenKind::AtEqual + | TokenKind::Rarrow + | TokenKind::Ellipsis + | TokenKind::ColonEqual + | TokenKind::Colon + ) + } + + pub const fn is_singleton(&self) -> bool { + matches!(self, TokenKind::False | TokenKind::True | TokenKind::None) + } + + pub const fn is_skip_comment(&self) -> bool { + matches!( + self, + TokenKind::Newline + | TokenKind::Indent + | TokenKind::Dedent + | TokenKind::NonLogicalNewline + | TokenKind::Comment + ) + } + + pub const fn is_arithmetic(&self) -> bool { + matches!( + self, + TokenKind::DoubleStar + | TokenKind::Star + | TokenKind::Plus + | TokenKind::Minus + | TokenKind::Slash + | TokenKind::At + ) + } + + pub const fn is_soft_keyword(&self) -> bool { + matches!(self, TokenKind::Match | TokenKind::Case) + } + + pub const fn from_token(token: &Tok) -> Self { + match token { + Tok::Name { .. } => TokenKind::Name, + Tok::Int { .. } => TokenKind::Int, + Tok::Float { .. } => TokenKind::Float, + Tok::Complex { .. } => TokenKind::Complex, + Tok::String { .. } => TokenKind::String, + Tok::Comment(_) => TokenKind::Comment, + Tok::Newline => TokenKind::Newline, + Tok::NonLogicalNewline => TokenKind::NonLogicalNewline, + Tok::Indent => TokenKind::Indent, + Tok::Dedent => TokenKind::Dedent, + Tok::EndOfFile => TokenKind::EndOfFile, + Tok::Lpar => TokenKind::Lpar, + Tok::Rpar => TokenKind::Rpar, + Tok::Lsqb => TokenKind::Lsqb, + Tok::Rsqb => TokenKind::Rsqb, + Tok::Colon => TokenKind::Colon, + Tok::Comma => TokenKind::Comma, + Tok::Semi => TokenKind::Semi, + Tok::Plus => TokenKind::Plus, + Tok::Minus => TokenKind::Minus, + Tok::Star => TokenKind::Star, + Tok::Slash => TokenKind::Slash, + Tok::Vbar => TokenKind::Vbar, + Tok::Amper => TokenKind::Amper, + Tok::Less => TokenKind::Less, + Tok::Greater => TokenKind::Greater, + Tok::Equal => TokenKind::Equal, + Tok::Dot => TokenKind::Dot, + Tok::Percent => TokenKind::Percent, + Tok::Lbrace => TokenKind::Lbrace, + Tok::Rbrace => TokenKind::Rbrace, + Tok::EqEqual => TokenKind::EqEqual, + Tok::NotEqual => TokenKind::NotEqual, + Tok::LessEqual => TokenKind::LessEqual, + Tok::GreaterEqual => TokenKind::GreaterEqual, + Tok::Tilde => TokenKind::Tilde, + Tok::CircumFlex => TokenKind::CircumFlex, + Tok::LeftShift => TokenKind::LeftShift, + Tok::RightShift => TokenKind::RightShift, + Tok::DoubleStar => TokenKind::DoubleStar, + Tok::DoubleStarEqual => TokenKind::DoubleStarEqual, + Tok::PlusEqual => TokenKind::PlusEqual, + Tok::MinusEqual => TokenKind::MinusEqual, + Tok::StarEqual => TokenKind::StarEqual, + Tok::SlashEqual => TokenKind::SlashEqual, + Tok::PercentEqual => TokenKind::PercentEqual, + Tok::AmperEqual => TokenKind::AmperEqual, + Tok::VbarEqual => TokenKind::VbarEqual, + Tok::CircumflexEqual => TokenKind::CircumflexEqual, + Tok::LeftShiftEqual => TokenKind::LeftShiftEqual, + Tok::RightShiftEqual => TokenKind::RightShiftEqual, + Tok::DoubleSlash => TokenKind::DoubleSlash, + Tok::DoubleSlashEqual => TokenKind::DoubleSlashEqual, + Tok::ColonEqual => TokenKind::ColonEqual, + Tok::At => TokenKind::At, + Tok::AtEqual => TokenKind::AtEqual, + Tok::Rarrow => TokenKind::Rarrow, + Tok::Ellipsis => TokenKind::Ellipsis, + Tok::False => TokenKind::False, + Tok::None => TokenKind::None, + Tok::True => TokenKind::True, + Tok::And => TokenKind::And, + Tok::As => TokenKind::As, + Tok::Assert => TokenKind::Assert, + Tok::Async => TokenKind::Async, + Tok::Await => TokenKind::Await, + Tok::Break => TokenKind::Break, + Tok::Class => TokenKind::Class, + Tok::Continue => TokenKind::Continue, + Tok::Def => TokenKind::Def, + Tok::Del => TokenKind::Del, + Tok::Elif => TokenKind::Elif, + Tok::Else => TokenKind::Else, + Tok::Except => TokenKind::Except, + Tok::Finally => TokenKind::Finally, + Tok::For => TokenKind::For, + Tok::From => TokenKind::From, + Tok::Global => TokenKind::Global, + Tok::If => TokenKind::If, + Tok::Import => TokenKind::Import, + Tok::In => TokenKind::In, + Tok::Is => TokenKind::Is, + Tok::Lambda => TokenKind::Lambda, + Tok::Nonlocal => TokenKind::Nonlocal, + Tok::Not => TokenKind::Not, + Tok::Or => TokenKind::Or, + Tok::Pass => TokenKind::Pass, + Tok::Raise => TokenKind::Raise, + Tok::Return => TokenKind::Return, + Tok::Try => TokenKind::Try, + Tok::While => TokenKind::While, + Tok::Match => TokenKind::Match, + Tok::Case => TokenKind::Case, + Tok::With => TokenKind::With, + Tok::Yield => TokenKind::Yield, + Tok::StartModule => TokenKind::StartModule, + Tok::StartInteractive => TokenKind::StartInteractive, + Tok::StartExpression => TokenKind::StartExpression, + } + } +} + +impl From<&Tok> for TokenKind { + fn from(value: &Tok) -> Self { + Self::from_token(value) + } +}