From 93bfa3727ee939f11c664a5199e8fcd3bf285b0a Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Sun, 10 Mar 2024 12:52:01 +0000 Subject: [PATCH] Reduce duplication -- use an enum of enums --- .../rules/avoidable_escaped_quote.rs | 4 +- crates/ruff_python_parser/src/lexer.rs | 28 +- .../src/string_token_flags.rs | 243 +++++++----------- 3 files changed, 112 insertions(+), 163 deletions(-) diff --git a/crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs b/crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs index 8c1756fbc23e1e..7dd89e38c0869a 100644 --- a/crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs +++ b/crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs @@ -188,7 +188,7 @@ pub(crate) fn avoidable_escaped_quote( let mut diagnostic = Diagnostic::new(AvoidableEscapedQuote, tok_range); let fixed_contents = format!( "{prefix}{quote}{value}{quote}", - prefix = kind.prefix_str(), + prefix = kind.prefix(), quote = quotes_settings.inline_quotes.opposite().as_char(), value = unescape_string( string_contents, @@ -322,7 +322,7 @@ pub(crate) fn unnecessary_escaped_quote( let mut diagnostic = Diagnostic::new(UnnecessaryEscapedQuote, tok_range); let fixed_contents = format!( "{prefix}{quote}{value}{quote}", - prefix = kind.prefix_str(), + prefix = kind.prefix(), quote = leading.as_char(), value = unescape_string(string_contents, leading.opposite().as_char()) ); diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 10ffc5981847bc..4efb7d34a8ade1 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -33,7 +33,7 @@ use std::{char, cmp::Ordering, str::FromStr}; use unicode_ident::{is_xid_continue, is_xid_start}; -use ruff_python_ast::{Int, IpyEscapeKind}; +use ruff_python_ast::{FStringPrefix, Int, IpyEscapeKind}; use ruff_text_size::{TextLen, TextRange, TextSize}; use crate::lexer::cursor::{Cursor, EOF_CHAR}; @@ -174,33 +174,29 @@ impl<'source> Lexer<'source> { match (first, self.cursor.first()) { ('f' | 'F', quote @ ('\'' | '"')) => { self.cursor.bump(); - return Ok(self.lex_fstring_start(quote, StringPrefix::Format)); + return Ok(self.lex_fstring_start(quote, FStringPrefix::Regular)); } ('r', 'f' | 'F') | ('f' | 'F', 'r') if is_quote(self.cursor.second()) => { self.cursor.bump(); let quote = self.cursor.bump().unwrap(); - return Ok( - self.lex_fstring_start(quote, StringPrefix::RawFormat { uppercase_r: false }) - ); + return Ok(self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: false })); } ('R', 'f' | 'F') | ('f' | 'F', 'R') if is_quote(self.cursor.second()) => { self.cursor.bump(); let quote = self.cursor.bump().unwrap(); - return Ok( - self.lex_fstring_start(quote, StringPrefix::RawFormat { uppercase_r: true }) - ); + return Ok(self.lex_fstring_start(quote, FStringPrefix::Raw { uppercase_r: true })); } (_, quote @ ('\'' | '"')) => { if let Ok(prefix) = StringPrefix::try_from(first) { self.cursor.bump(); - return self.lex_string(Some(prefix), quote); + return self.lex_string(prefix, quote); } } (_, second @ ('r' | 'R' | 'b' | 'B')) if is_quote(self.cursor.second()) => { self.cursor.bump(); if let Ok(prefix) = StringPrefix::try_from([first, second]) { let quote = self.cursor.bump().unwrap(); - return self.lex_string(Some(prefix), quote); + return self.lex_string(prefix, quote); } } _ => {} @@ -544,11 +540,11 @@ impl<'source> Lexer<'source> { } /// Lex a f-string start token. - fn lex_fstring_start(&mut self, quote: char, prefix: StringPrefix) -> Tok { + fn lex_fstring_start(&mut self, quote: char, prefix: FStringPrefix) -> Tok { #[cfg(debug_assertions)] debug_assert_eq!(self.cursor.previous(), quote); - let mut kind = StringKind::from_prefix(Some(prefix)); + let mut kind = StringKind::from_prefix(StringPrefix::Format(prefix)); if quote == '"' { kind = kind.with_double_quotes(); @@ -696,11 +692,7 @@ impl<'source> Lexer<'source> { } /// Lex a string literal. - fn lex_string( - &mut self, - prefix: Option, - quote: char, - ) -> Result { + fn lex_string(&mut self, prefix: StringPrefix, quote: char) -> Result { #[cfg(debug_assertions)] debug_assert_eq!(self.cursor.previous(), quote); @@ -1074,7 +1066,7 @@ impl<'source> Lexer<'source> { c if is_ascii_identifier_start(c) => self.lex_identifier(c)?, '0'..='9' => self.lex_number(c)?, '#' => return Ok((self.lex_comment(), self.token_range())), - '\'' | '"' => self.lex_string(None, c)?, + '\'' | '"' => self.lex_string(StringPrefix::default(), c)?, '=' => { if self.cursor.eat_char('=') { Tok::EqEqual diff --git a/crates/ruff_python_parser/src/string_token_flags.rs b/crates/ruff_python_parser/src/string_token_flags.rs index b7515a9b02fd05..b7154ab11d24bd 100644 --- a/crates/ruff_python_parser/src/string_token_flags.rs +++ b/crates/ruff_python_parser/src/string_token_flags.rs @@ -68,41 +68,15 @@ bitflags! { /// [String and Bytes literals]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals /// [PEP 701]: https://peps.python.org/pep-0701/ #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub(crate) enum StringPrefix { - /// The string has a `u` or `U` prefix. - /// While this prefix is a no-op at runtime, - /// strings with this prefix can have no other prefixes set. - Unicode, - - /// The string has an `r` or `R` prefix, meaning it is a raw string. - /// F-strings and byte-strings can be raw, - /// as can strings with no other prefixes. - /// U-strings cannot be raw. - Raw { uppercase: bool }, - - /// The string has a `f` or `F` prefix, meaning it is an f-string. - /// F-strings can also be raw strings, - /// but can have no other prefixes. - Format, - - /// The string has a `b` or `B` prefix. - /// This means that the string is a sequence of `int`s at runtime, - /// rather than a sequence of `str`s. - /// Bytestrings can also be raw strings, - /// but can have no other prefixes. - Bytes, - - /// A string that has has any one of the prefixes - /// `{"rf", "rF", "Rf", "RF", "fr", "fR", "Fr", "FR"}` - /// Semantically, these all have the same meaning: - /// the string is both an f-string and a raw-string - RawFormat { uppercase_r: bool }, - - /// A string that has has any one of the prefixes - /// `{"rb", "rB", "Rb", "RB", "br", "bR", "Br", "BR"}` - /// Semantically, these all have the same meaning: - /// the string is both an bytestring and a raw-string - RawBytes { uppercase_r: bool }, +pub enum StringPrefix { + /// Prefixes that indicate the string is a bytestring + Bytes(ByteStringPrefix), + + /// Prefixes that indicate the string is an f-string + Format(FStringPrefix), + + /// All other prefixes + Regular(StringLiteralPrefix), } impl TryFrom for StringPrefix { @@ -110,11 +84,11 @@ impl TryFrom for StringPrefix { fn try_from(value: char) -> Result { let result = match value { - 'r' => Self::Raw { uppercase: false }, - 'R' => Self::Raw { uppercase: true }, - 'u' | 'U' => Self::Unicode, - 'b' | 'B' => Self::Bytes, - 'f' | 'F' => Self::Format, + 'r' => Self::Regular(StringLiteralPrefix::Raw { uppercase: false }), + 'R' => Self::Regular(StringLiteralPrefix::Raw { uppercase: true }), + 'u' | 'U' => Self::Regular(StringLiteralPrefix::Unicode), + 'b' | 'B' => Self::Bytes(ByteStringPrefix::Regular), + 'f' | 'F' => Self::Format(FStringPrefix::Regular), _ => return Err(format!("Unexpected prefix '{value}'")), }; Ok(result) @@ -126,10 +100,18 @@ impl TryFrom<[char; 2]> for StringPrefix { fn try_from(value: [char; 2]) -> Result { let result = match value { - ['r', 'f' | 'F'] | ['f' | 'F', 'r'] => Self::RawFormat { uppercase_r: false }, - ['R', 'f' | 'F'] | ['f' | 'F', 'R'] => Self::RawFormat { uppercase_r: true }, - ['r', 'b' | 'B'] | ['b' | 'B', 'r'] => Self::RawBytes { uppercase_r: false }, - ['R', 'b' | 'B'] | ['b' | 'B', 'R'] => Self::RawBytes { uppercase_r: true }, + ['r', 'f' | 'F'] | ['f' | 'F', 'r'] => { + Self::Format(FStringPrefix::Raw { uppercase_r: false }) + } + ['R', 'f' | 'F'] | ['f' | 'F', 'R'] => { + Self::Format(FStringPrefix::Raw { uppercase_r: true }) + } + ['r', 'b' | 'B'] | ['b' | 'B', 'r'] => { + Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) + } + ['R', 'b' | 'B'] | ['b' | 'B', 'R'] => { + Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) + } _ => return Err(format!("Unexpected prefix '{}{}'", value[0], value[1])), }; Ok(result) @@ -139,51 +121,63 @@ impl TryFrom<[char; 2]> for StringPrefix { impl StringPrefix { const fn as_flags(self) -> StringFlags { match self { - Self::Bytes => StringFlags::B_PREFIX, - Self::Format => StringFlags::F_PREFIX, - Self::Raw { uppercase: true } => StringFlags::R_PREFIX_UPPER, - Self::Raw { uppercase: false } => StringFlags::R_PREFIX_LOWER, - Self::RawBytes { uppercase_r: true } => { - StringFlags::R_PREFIX_UPPER.union(StringFlags::B_PREFIX) + // regular strings + Self::Regular(StringLiteralPrefix::Empty) => StringFlags::empty(), + Self::Regular(StringLiteralPrefix::Unicode) => StringFlags::U_PREFIX, + Self::Regular(StringLiteralPrefix::Raw { uppercase: false }) => { + StringFlags::R_PREFIX_LOWER + } + Self::Regular(StringLiteralPrefix::Raw { uppercase: true }) => { + StringFlags::R_PREFIX_UPPER } - Self::RawBytes { uppercase_r: false } => { - StringFlags::R_PREFIX_LOWER.union(StringFlags::B_PREFIX) + + // bytestrings + Self::Bytes(ByteStringPrefix::Regular) => StringFlags::B_PREFIX, + Self::Bytes(ByteStringPrefix::Raw { uppercase_r: false }) => { + StringFlags::B_PREFIX.union(StringFlags::R_PREFIX_LOWER) } - Self::RawFormat { uppercase_r: true } => { - StringFlags::R_PREFIX_UPPER.union(StringFlags::F_PREFIX) + Self::Bytes(ByteStringPrefix::Raw { uppercase_r: true }) => { + StringFlags::B_PREFIX.union(StringFlags::R_PREFIX_UPPER) + } + + // f-strings + Self::Format(FStringPrefix::Regular) => StringFlags::F_PREFIX, + Self::Format(FStringPrefix::Raw { uppercase_r: false }) => { + StringFlags::F_PREFIX.union(StringFlags::R_PREFIX_LOWER) } - Self::RawFormat { uppercase_r: false } => { - StringFlags::R_PREFIX_LOWER.union(StringFlags::F_PREFIX) + Self::Format(FStringPrefix::Raw { uppercase_r: true }) => { + StringFlags::F_PREFIX.union(StringFlags::R_PREFIX_UPPER) } - Self::Unicode => StringFlags::U_PREFIX, } } const fn as_str(self) -> &'static str { match self { - Self::Bytes => "b", - Self::Format => "f", - Self::Unicode => "u", - Self::Raw { uppercase: true } => "R", - Self::Raw { uppercase: false } => "r", - Self::RawBytes { uppercase_r: true } => "Rb", - Self::RawBytes { uppercase_r: false } => "rb", - Self::RawFormat { uppercase_r: true } => "Rf", - Self::RawFormat { uppercase_r: false } => "rf", + Self::Regular(regular_prefix) => regular_prefix.as_str(), + Self::Bytes(bytestring_prefix) => bytestring_prefix.as_str(), + Self::Format(fstring_prefix) => fstring_prefix.as_str(), } } } +impl fmt::Display for StringPrefix { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +impl Default for StringPrefix { + fn default() -> Self { + Self::Regular(StringLiteralPrefix::Empty) + } +} + #[derive(Default, Clone, Copy, PartialEq, Eq, Hash)] pub struct StringKind(StringFlags); impl StringKind { - pub(crate) const fn from_prefix(prefix: Option) -> Self { - if let Some(prefix) = prefix { - Self(prefix.as_flags()) - } else { - Self(StringFlags::empty()) - } + pub(crate) const fn from_prefix(prefix: StringPrefix) -> Self { + Self(prefix.as_flags()) } /// Does the string have a `u` or `U` prefix? @@ -238,61 +232,45 @@ impl StringKind { } } - const fn prefix(self) -> Option { + pub const fn prefix(self) -> StringPrefix { + // f-strings if self.0.contains(StringFlags::F_PREFIX) { if self.0.contains(StringFlags::R_PREFIX_LOWER) { - return Some(StringPrefix::RawFormat { uppercase_r: false }); + return StringPrefix::Format(FStringPrefix::Raw { uppercase_r: false }); } if self.0.contains(StringFlags::R_PREFIX_UPPER) { - return Some(StringPrefix::RawFormat { uppercase_r: true }); + return StringPrefix::Format(FStringPrefix::Raw { uppercase_r: true }); } - return Some(StringPrefix::Format); + return StringPrefix::Format(FStringPrefix::Regular); } + + // bytestrings if self.0.contains(StringFlags::B_PREFIX) { if self.0.contains(StringFlags::R_PREFIX_LOWER) { - return Some(StringPrefix::RawBytes { uppercase_r: true }); + return StringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: true }); } if self.0.contains(StringFlags::R_PREFIX_LOWER) { - return Some(StringPrefix::RawBytes { uppercase_r: false }); + return StringPrefix::Bytes(ByteStringPrefix::Raw { uppercase_r: false }); } - return Some(StringPrefix::Bytes); + return StringPrefix::Bytes(ByteStringPrefix::Regular); } + + // all other strings if self.0.contains(StringFlags::R_PREFIX_LOWER) { - return Some(StringPrefix::Raw { uppercase: false }); + return StringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: false }); } if self.0.contains(StringFlags::R_PREFIX_UPPER) { - return Some(StringPrefix::Raw { uppercase: true }); + return StringPrefix::Regular(StringLiteralPrefix::Raw { uppercase: true }); } if self.0.contains(StringFlags::U_PREFIX) { - return Some(StringPrefix::Unicode); - } - None - } - - /// A `str` representation of the prefixes used (if any) - /// in the string's opener. The order of the prefixes is normalized, - /// and all casing is normalized to lowercase except for `r` prefixes. - /// - /// See - /// for why we track the casing of the `r` prefix, - /// but not for any other prefix. - /// - /// Examples: - /// - `"foo"` -> `""` - /// - `B'foo'` -> `"b"` - /// - `"rf"{bar}"` -> `"rf"` - /// - `BR'{foo}'` -> `"Rb"` - pub const fn prefix_str(self) -> &'static str { - if let Some(prefix) = self.prefix() { - prefix.as_str() - } else { - "" + return StringPrefix::Regular(StringLiteralPrefix::Unicode); } + StringPrefix::Regular(StringLiteralPrefix::Empty) } /// The length of the prefixes used (if any) in the string's opener. pub fn prefix_len(self) -> TextSize { - self.prefix_str().text_len() + self.prefix().as_str().text_len() } /// The length of the quotes used to start and close the string. @@ -323,7 +301,7 @@ impl StringKind { pub fn format_string_contents(self, contents: &str) -> String { format!( "{}{}{}{}", - self.prefix_str(), + self.prefix(), self.quote_str(), contents, self.quote_str() @@ -346,7 +324,7 @@ impl StringKind { impl fmt::Debug for StringKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("StringKind") - .field("prefix", &self.prefix_str()) + .field("prefix", &self.prefix().as_str()) .field("triple_quoted", &self.is_triple_quoted()) .field("quote_style", &self.quote_style()) .finish() @@ -362,22 +340,13 @@ impl From for ruff_python_ast::StringLiteralFlags { if value.is_triple_quoted() { new = new.with_triple_quotes(); } - new.with_prefix(match value.prefix() { - None => StringLiteralPrefix::Empty, - Some(StringPrefix::Unicode) => StringLiteralPrefix::Unicode, - Some(StringPrefix::Raw { uppercase: true }) => { - StringLiteralPrefix::Raw { uppercase: true } - } - Some(StringPrefix::Raw { uppercase: false }) => { - StringLiteralPrefix::Raw { uppercase: false } - } - Some(StringPrefix::Bytes | StringPrefix::RawBytes { .. }) => { - panic!("Attempting to convert a bytestring into a non-bytestring!") - } - Some(StringPrefix::Format | StringPrefix::RawFormat { .. }) => { - panic!("Attempting to convert an f-string into a non-fstring!") - } - }) + let StringPrefix::Regular(prefix) = value.prefix() else { + panic!( + "Attempting to convert {} into a regular string", + value.prefix() + ) + }; + new.with_prefix(prefix) } } @@ -390,16 +359,10 @@ impl From for ruff_python_ast::BytesLiteralFlags { if value.is_triple_quoted() { new = new.with_triple_quotes(); } - new.with_prefix(match value.prefix() { - Some(StringPrefix::Bytes) => ByteStringPrefix::Regular, - Some(StringPrefix::RawBytes { uppercase_r: true }) => { - ByteStringPrefix::Raw { uppercase_r: true } - } - Some(StringPrefix::RawBytes { uppercase_r: false }) => { - ByteStringPrefix::Raw { uppercase_r: false } - } - _ => panic!("Attempting to convert a non-bytestring into a bytestring!"), - }) + let StringPrefix::Bytes(bytestring_prefix) = value.prefix() else { + panic!("Attempting to convert {} into a bytestring", value.prefix()) + }; + new.with_prefix(bytestring_prefix) } } @@ -412,15 +375,9 @@ impl From for ruff_python_ast::FStringFlags { if value.is_triple_quoted() { new = new.with_triple_quotes(); } - new.with_prefix(match value.prefix() { - Some(StringPrefix::Format) => FStringPrefix::Regular, - Some(StringPrefix::RawFormat { uppercase_r: false }) => { - FStringPrefix::Raw { uppercase_r: false } - } - Some(StringPrefix::RawFormat { uppercase_r: true }) => { - FStringPrefix::Raw { uppercase_r: true } - } - _ => panic!("Attempting to convert a non-f-string into an f-string!"), - }) + let StringPrefix::Format(fstring_prefix) = value.prefix() else { + panic!("Attempting to convert {} into an f-string", value.prefix()) + }; + new.with_prefix(fstring_prefix) } }