diff --git a/crates/ruff_linter/src/checkers/ast/analyze/string_like.rs b/crates/ruff_linter/src/checkers/ast/analyze/string_like.rs index 3aaafe4d761fb..6a7f723bc7dd8 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/string_like.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/string_like.rs @@ -33,4 +33,7 @@ pub(crate) fn string_like(string_like: StringLike, checker: &mut Checker) { if checker.enabled(Rule::UnnecessaryEscapedQuote) { flake8_quotes::rules::unnecessary_escaped_quote(checker, string_like); } + if checker.enabled(Rule::AvoidableEscapedQuote) && checker.settings.flake8_quotes.avoid_escape { + flake8_quotes::rules::avoidable_escaped_quote(checker, string_like); + } } diff --git a/crates/ruff_linter/src/checkers/tokens.rs b/crates/ruff_linter/src/checkers/tokens.rs index 8370d6e1750da..704926c005414 100644 --- a/crates/ruff_linter/src/checkers/tokens.rs +++ b/crates/ruff_linter/src/checkers/tokens.rs @@ -16,7 +16,7 @@ use crate::registry::{AsRule, Rule}; use crate::rules::pycodestyle::rules::BlankLinesChecker; use crate::rules::{ eradicate, flake8_commas, flake8_executable, flake8_fixme, flake8_implicit_str_concat, - flake8_pyi, flake8_quotes, flake8_todos, pycodestyle, pygrep_hooks, pylint, pyupgrade, ruff, + flake8_pyi, flake8_todos, pycodestyle, pygrep_hooks, pylint, pyupgrade, ruff, }; use crate::settings::LinterSettings; @@ -122,10 +122,6 @@ pub(crate) fn check_tokens( ); } - if settings.rules.enabled(Rule::AvoidableEscapedQuote) && settings.flake8_quotes.avoid_escape { - flake8_quotes::rules::avoidable_escaped_quote(&mut diagnostics, tokens, locator, settings); - } - if settings.rules.any_enabled(&[ Rule::SingleLineImplicitStringConcatenation, Rule::MultiLineImplicitStringConcatenation, diff --git a/crates/ruff_linter/src/lex/docstring_detection.rs b/crates/ruff_linter/src/lex/docstring_detection.rs deleted file mode 100644 index 33f8a4538acf0..0000000000000 --- a/crates/ruff_linter/src/lex/docstring_detection.rs +++ /dev/null @@ -1,122 +0,0 @@ -//! Extract docstrings via tokenization. -//! -//! See: -//! -//! TODO(charlie): Consolidate with the existing AST-based docstring extraction. - -use ruff_python_parser::Tok; - -#[derive(Default, Copy, Clone)] -enum State { - // Start of the module: first string gets marked as a docstring. - #[default] - ExpectModuleDocstring, - // After seeing a class definition, we're waiting for the block colon (and do bracket - // counting). - ExpectClassColon, - // After seeing the block colon in a class definition, we expect a docstring. - ExpectClassDocstring, - // Same as ExpectClassColon, but for function definitions. - ExpectFunctionColon, - // Same as ExpectClassDocstring, but for function definitions. - ExpectFunctionDocstring, - // Skip tokens until we observe a `class` or `def`. - Other, -} - -#[derive(Default)] -pub(crate) struct StateMachine { - state: State, - bracket_count: usize, -} - -impl StateMachine { - pub(crate) fn consume(&mut self, tok: &Tok) -> bool { - match tok { - Tok::NonLogicalNewline - | Tok::Newline - | Tok::Indent - | Tok::Dedent - | Tok::Comment(..) => false, - - Tok::String { .. } => { - if matches!( - self.state, - State::ExpectModuleDocstring - | State::ExpectClassDocstring - | State::ExpectFunctionDocstring - ) { - self.state = State::Other; - true - } else { - false - } - } - Tok::Class => { - self.state = State::ExpectClassColon; - self.bracket_count = 0; - - false - } - - Tok::Def => { - self.state = State::ExpectFunctionColon; - self.bracket_count = 0; - - false - } - - Tok::Colon => { - if self.bracket_count == 0 { - if matches!(self.state, State::ExpectClassColon) { - self.state = State::ExpectClassDocstring; - } else if matches!(self.state, State::ExpectFunctionColon) { - self.state = State::ExpectFunctionDocstring; - } - } - - false - } - - Tok::Lpar | Tok::Lbrace | Tok::Lsqb => { - self.bracket_count = self.bracket_count.saturating_add(1); - if matches!( - self.state, - State::ExpectModuleDocstring - | State::ExpectClassDocstring - | State::ExpectFunctionDocstring - ) { - self.state = State::Other; - } - false - } - - Tok::Rpar | Tok::Rbrace | Tok::Rsqb => { - self.bracket_count = self.bracket_count.saturating_sub(1); - if matches!( - self.state, - State::ExpectModuleDocstring - | State::ExpectClassDocstring - | State::ExpectFunctionDocstring - ) { - self.state = State::Other; - } - - false - } - - _ => { - if matches!( - self.state, - State::ExpectModuleDocstring - | State::ExpectClassDocstring - | State::ExpectFunctionDocstring - ) { - self.state = State::Other; - } - - false - } - } - } -} diff --git a/crates/ruff_linter/src/lex/mod.rs b/crates/ruff_linter/src/lex/mod.rs deleted file mode 100644 index c6673c215d638..0000000000000 --- a/crates/ruff_linter/src/lex/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub(crate) mod docstring_detection; diff --git a/crates/ruff_linter/src/lib.rs b/crates/ruff_linter/src/lib.rs index 857c7fadae5fc..4d2ff36cb58fb 100644 --- a/crates/ruff_linter/src/lib.rs +++ b/crates/ruff_linter/src/lib.rs @@ -24,7 +24,6 @@ mod docstrings; mod fix; pub mod fs; mod importer; -mod lex; pub mod line_width; pub mod linter; pub mod logging; diff --git a/crates/ruff_linter/src/registry.rs b/crates/ruff_linter/src/registry.rs index 3d0ebafcb4e3c..03a24e452f8c9 100644 --- a/crates/ruff_linter/src/registry.rs +++ b/crates/ruff_linter/src/registry.rs @@ -256,7 +256,6 @@ impl Rule { | Rule::MixedSpacesAndTabs | Rule::TrailingWhitespace => LintSource::PhysicalLines, Rule::AmbiguousUnicodeCharacterComment - | Rule::AvoidableEscapedQuote | Rule::BlanketNOQA | Rule::BlanketTypeIgnore | Rule::BlankLineAfterDecorator diff --git a/crates/ruff_linter/src/rules/flake8_quotes/helpers.rs b/crates/ruff_linter/src/rules/flake8_quotes/helpers.rs index 4b3184e69a770..9646c474dad5c 100644 --- a/crates/ruff_linter/src/rules/flake8_quotes/helpers.rs +++ b/crates/ruff_linter/src/rules/flake8_quotes/helpers.rs @@ -1,3 +1,12 @@ +use ruff_python_ast::AnyStringKind; +use ruff_text_size::TextLen; + +/// Returns the raw contents of the string given the string's contents and kind. +/// This is a string without the prefix and quotes. +pub(super) fn raw_contents(contents: &str, kind: AnyStringKind) -> &str { + &contents[kind.opener_len().to_usize()..(contents.text_len() - kind.closer_len()).to_usize()] +} + /// Return `true` if the haystack contains an escaped quote. pub(super) fn contains_escaped_quote(haystack: &str, quote: char) -> bool { for index in memchr::memchr_iter(quote as u8, haystack.as_bytes()) { diff --git a/crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs b/crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs index 20a9ededa03b1..560c1734c3b62 100644 --- a/crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs +++ b/crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs @@ -1,15 +1,16 @@ use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::Tok; +use ruff_python_ast::visitor::{walk_f_string, Visitor}; +use ruff_python_ast::{self as ast, AnyStringKind, StringLike}; use ruff_source_file::Locator; -use ruff_text_size::TextRange; +use ruff_text_size::{Ranged, TextRange, TextSize}; -use crate::lex::docstring_detection::StateMachine; +use crate::checkers::ast::Checker; +use crate::rules::flake8_quotes; use crate::settings::LinterSettings; -use super::super::helpers::{contains_escaped_quote, unescape_string}; -use super::super::settings::Quote; +use flake8_quotes::helpers::{contains_escaped_quote, raw_contents, unescape_string}; +use flake8_quotes::settings::Quote; /// ## What it does /// Checks for strings that include escaped quotes, and suggests changing @@ -49,197 +50,303 @@ impl AlwaysFixableViolation for AvoidableEscapedQuote { } } -struct FStringContext { - /// Whether to check for escaped quotes in the f-string. - check_for_escaped_quote: bool, - /// The range of the f-string start token. - start_range: TextRange, - /// The ranges of the f-string middle tokens containing escaped quotes. - middle_ranges_with_escapes: Vec, +/// Q003 +pub(crate) fn avoidable_escaped_quote(checker: &mut Checker, string_like: StringLike) { + if checker.semantic().in_docstring() + || checker.semantic().in_string_type_definition() + // This rule has support for strings nested inside another f-strings but they're checked + // via the outermost f-string. This means that we shouldn't be checking any nested string + // or f-string. + || checker.semantic().in_f_string_replacement_field() + { + return; + } + + let mut rule_checker = AvoidableEscapedQuoteChecker::new(checker.locator(), checker.settings); + + match string_like { + StringLike::String(expr) => { + for string_literal in &expr.value { + rule_checker.visit_string_literal(string_literal); + } + } + StringLike::Bytes(expr) => { + for bytes_literal in &expr.value { + rule_checker.visit_bytes_literal(bytes_literal); + } + } + StringLike::FString(expr) => { + for part in &expr.value { + match part { + ast::FStringPart::Literal(string_literal) => { + rule_checker.visit_string_literal(string_literal); + } + ast::FStringPart::FString(f_string) => { + rule_checker.visit_f_string(f_string); + } + } + } + } + } + + checker.diagnostics.extend(rule_checker.into_diagnostics()); +} + +/// Checks for `Q003` violations using the [`Visitor`] implementation. +#[derive(Debug)] +struct AvoidableEscapedQuoteChecker<'a> { + locator: &'a Locator<'a>, + quotes_settings: &'a flake8_quotes::settings::Settings, + supports_pep701: bool, + diagnostics: Vec, } -impl FStringContext { - fn new(check_for_escaped_quote: bool, fstring_start_range: TextRange) -> Self { +impl<'a> AvoidableEscapedQuoteChecker<'a> { + fn new(locator: &'a Locator<'a>, settings: &'a LinterSettings) -> Self { Self { - check_for_escaped_quote, - start_range: fstring_start_range, - middle_ranges_with_escapes: vec![], + locator, + quotes_settings: &settings.flake8_quotes, + supports_pep701: settings.target_version.supports_pep701(), + diagnostics: vec![], } } - /// Update the context to not check for escaped quotes, and clear any - /// existing reported ranges. - fn ignore_escaped_quotes(&mut self) { - self.check_for_escaped_quote = false; - self.middle_ranges_with_escapes.clear(); + /// Consumes the checker and returns a vector of [`Diagnostic`] found during the visit. + fn into_diagnostics(self) -> Vec { + self.diagnostics } +} - fn push_fstring_middle_range(&mut self, range: TextRange) { - self.middle_ranges_with_escapes.push(range); +impl Visitor<'_> for AvoidableEscapedQuoteChecker<'_> { + fn visit_string_literal(&mut self, string_literal: &'_ ast::StringLiteral) { + if let Some(diagnostic) = check_string_or_bytes( + self.locator, + self.quotes_settings, + string_literal.range(), + AnyStringKind::from(string_literal.flags), + ) { + self.diagnostics.push(diagnostic); + } } -} -/// Q003 -pub(crate) fn avoidable_escaped_quote( - diagnostics: &mut Vec, - lxr: &[LexResult], - locator: &Locator, - settings: &LinterSettings, -) { - let quotes_settings = &settings.flake8_quotes; - let supports_pep701 = settings.target_version.supports_pep701(); - let mut fstrings: Vec = Vec::new(); - let mut state_machine = StateMachine::default(); - - for &(ref tok, tok_range) in lxr.iter().flatten() { - let is_docstring = state_machine.consume(tok); - if is_docstring { - continue; + fn visit_bytes_literal(&mut self, bytes_literal: &'_ ast::BytesLiteral) { + if let Some(diagnostic) = check_string_or_bytes( + self.locator, + self.quotes_settings, + bytes_literal.range(), + AnyStringKind::from(bytes_literal.flags), + ) { + self.diagnostics.push(diagnostic); } + } - if !supports_pep701 { - // If this is a string or a start of a f-string which is inside another - // f-string, we won't check for escaped quotes for the entire f-string - // if the target version doesn't support PEP 701. For example: - // - // ```python - // f"\"foo\" {'nested'}" - // # ^^^^^^^^ - // # We're here - // ``` - // - // If we try to fix the above example, the outer and inner quote - // will be the same which is invalid pre 3.12: - // - // ```python - // f'"foo" {'nested'}" - // ``` - if matches!(tok, Tok::String { .. } | Tok::FStringStart(_)) { - if let Some(fstring_context) = fstrings.last_mut() { - fstring_context.ignore_escaped_quotes(); - continue; - } + fn visit_f_string(&mut self, f_string: &'_ ast::FString) { + // If the target version doesn't support PEP 701, skip this entire f-string if it contains + // any string literal in any of the expression element. For example: + // + // ```python + // f"\"foo\" {'nested'}" + // ``` + // + // If we try to fix the above example, the outer and inner quote will be the same which is + // invalid for any Python version before 3.12: + // + // ```python + // f'"foo" {'nested'}" + // ``` + // + // Note that this check needs to be done globally to ignore the entire f-string. It is + // implicitly global in that we avoid recursing into this f-string if this is the case. + if !self.supports_pep701 { + let contains_any_string = { + let mut visitor = ContainsAnyString::default(); + // We need to use the `walk_f_string` instead of `visit_f_string` to avoid + // considering the top level f-string. + walk_f_string(&mut visitor, f_string); + visitor.result + }; + if contains_any_string { + return; } } - match tok { - Tok::String { - value: string_contents, - kind, - } => { - if kind.is_raw_string() || kind.is_triple_quoted() { - continue; - } + let opposite_quote_char = self.quotes_settings.inline_quotes.opposite().as_char(); - // Check if we're using the preferred quotation style. - if Quote::from(kind.quote_style()) != quotes_settings.inline_quotes { - continue; - } - - if contains_escaped_quote(string_contents, quotes_settings.inline_quotes.as_char()) - && !contains_quote( - string_contents, - quotes_settings.inline_quotes.opposite().as_char(), - ) - { - let mut diagnostic = Diagnostic::new(AvoidableEscapedQuote, tok_range); - let fixed_contents = format!( - "{prefix}{quote}{value}{quote}", - prefix = kind.prefix(), - quote = quotes_settings.inline_quotes.opposite().as_char(), - value = unescape_string( - string_contents, - quotes_settings.inline_quotes.as_char() - ) - ); - diagnostic.set_fix(Fix::safe_edit(Edit::range_replacement( - fixed_contents, - tok_range, - ))); - diagnostics.push(diagnostic); - } - } - Tok::FStringStart(kind) => { - // Check for escaped quote only if we're using the preferred quotation - // style and it isn't a triple-quoted f-string. - let check_for_escaped_quote = !kind.is_triple_quoted() - && Quote::from(kind.quote_style()) == quotes_settings.inline_quotes; - fstrings.push(FStringContext::new(check_for_escaped_quote, tok_range)); + // If any literal part of this f-string contains the quote character which is opposite to + // the configured inline quotes, we can't change the quote style for this f-string. For + // example: + // + // ```py + // f"\"hello\" {x} 'world'" + // ``` + // + // If we try to fix the above example, the f-string will end in the middle and "world" will + // be considered as a variable which is outside this f-string: + // + // ```py + // f'"hello" {x} 'world'' + // # ^ + // # f-string ends here now + // ``` + // + // The check is local to this f-string and it shouldn't check for any literal parts of any + // nested f-string. This is correct because by this point, we know that the target version + // is 3.12 or that this f-string doesn't have any strings nested in it. For example: + // + // ```py + // f'\'normal\' {f'\'nested\' {x} "double quotes"'} normal' + // ``` + // + // This contains a nested f-string but if we reached here that means the target version + // supports PEP 701. The double quotes in the nested f-string shouldn't affect the outer + // f-string because the following is valid for Python version 3.12 and later: + // + // ```py + // f"'normal' {f'\'nested\' {x} "double quotes"'} normal" + // ``` + if !f_string + .literals() + .any(|literal| contains_quote(literal, opposite_quote_char)) + { + if let Some(diagnostic) = check_f_string(self.locator, self.quotes_settings, f_string) { + self.diagnostics.push(diagnostic); } - Tok::FStringMiddle { - value: string_contents, - kind, - } if !kind.is_raw_string() => { - let Some(context) = fstrings.last_mut() else { - continue; - }; - if !context.check_for_escaped_quote { - continue; - } - // If any part of the f-string contains the opposite quote, - // we can't change the quote style in the entire f-string. - if contains_quote( - string_contents, - quotes_settings.inline_quotes.opposite().as_char(), - ) { - context.ignore_escaped_quotes(); - continue; - } - if contains_escaped_quote(string_contents, quotes_settings.inline_quotes.as_char()) - { - context.push_fstring_middle_range(tok_range); - } - } - Tok::FStringEnd => { - let Some(context) = fstrings.pop() else { - continue; - }; - if context.middle_ranges_with_escapes.is_empty() { - // There are no `FStringMiddle` tokens containing any escaped - // quotes. - continue; - } - let mut diagnostic = Diagnostic::new( - AvoidableEscapedQuote, - TextRange::new(context.start_range.start(), tok_range.end()), - ); - let fstring_start_edit = Edit::range_replacement( - // No need for `r`/`R` as we don't perform the checks - // for raw strings. - format!("f{}", quotes_settings.inline_quotes.opposite().as_char()), - context.start_range, - ); - let fstring_middle_and_end_edits = context - .middle_ranges_with_escapes - .iter() - .map(|&range| { - Edit::range_replacement( - unescape_string( - locator.slice(range), - quotes_settings.inline_quotes.as_char(), - ), - range, - ) - }) - .chain(std::iter::once( - // `FStringEnd` edit - Edit::range_replacement( - quotes_settings - .inline_quotes - .opposite() - .as_char() - .to_string(), - tok_range, - ), - )); - diagnostic.set_fix(Fix::safe_edits( - fstring_start_edit, - fstring_middle_and_end_edits, - )); - diagnostics.push(diagnostic); - } - _ => {} } + + walk_f_string(self, f_string); + } +} + +/// Checks for unnecessary escaped quotes in a string or bytes literal. +/// +/// # Panics +/// +/// If the string kind is an f-string. +fn check_string_or_bytes( + locator: &Locator, + quotes_settings: &flake8_quotes::settings::Settings, + range: TextRange, + kind: AnyStringKind, +) -> Option { + assert!(!kind.is_f_string()); + + if kind.is_triple_quoted() || kind.is_raw_string() { + return None; + } + + // Check if we're using the preferred quotation style. + if Quote::from(kind.quote_style()) != quotes_settings.inline_quotes { + return None; + } + + let contents = raw_contents(locator.slice(range), kind); + + if !contains_escaped_quote(contents, quotes_settings.inline_quotes.as_char()) + || contains_quote(contents, quotes_settings.inline_quotes.opposite().as_char()) + { + return None; + } + + let mut diagnostic = Diagnostic::new(AvoidableEscapedQuote, range); + let fixed_contents = format!( + "{prefix}{quote}{value}{quote}", + prefix = kind.prefix(), + quote = quotes_settings.inline_quotes.opposite().as_char(), + value = unescape_string(contents, quotes_settings.inline_quotes.as_char()) + ); + diagnostic.set_fix(Fix::safe_edit(Edit::range_replacement( + fixed_contents, + range, + ))); + Some(diagnostic) +} + +/// Checks for unnecessary escaped quotes in an f-string. +fn check_f_string( + locator: &Locator, + quotes_settings: &flake8_quotes::settings::Settings, + f_string: &ast::FString, +) -> Option { + let ast::FString { flags, range, .. } = f_string; + + if flags.is_triple_quoted() || flags.prefix().is_raw() { + return None; + } + + // Check if we're using the preferred quotation style. + if Quote::from(flags.quote_style()) != quotes_settings.inline_quotes { + return None; + } + + let quote_char = quotes_settings.inline_quotes.as_char(); + let opposite_quote_char = quotes_settings.inline_quotes.opposite().as_char(); + + let mut edits = vec![]; + for literal in f_string.literals() { + let content = locator.slice(literal); + if !contains_escaped_quote(content, quote_char) { + continue; + } + edits.push(Edit::range_replacement( + unescape_string(content, quote_char), + literal.range(), + )); + } + + if edits.is_empty() { + return None; + } + + // Replacement for the f-string opening quote. We don't perform the check for raw and + // triple-quoted f-strings, so no need to account for them. + let start_edit = Edit::range_replacement( + format!("f{opposite_quote_char}"), + TextRange::at( + range.start(), + // Prefix + quote char + TextSize::new(2), + ), + ); + + // Replacement for the f-string ending quote. We don't perform the check for triple-quoted + // f-string, so no need to account for them. + edits.push(Edit::range_replacement( + opposite_quote_char.to_string(), + TextRange::at( + // Offset would either be the end offset of the start edit in case there are no + // elements in the f-string (e.g., `f""`) or the end offset of the last f-string + // element (e.g., `f"hello"`). + f_string + .elements + .last() + .map_or_else(|| start_edit.end(), Ranged::end), + // Quote char + TextSize::new(1), + ), + )); + + Some( + Diagnostic::new(AvoidableEscapedQuote, *range).with_fix(Fix::safe_edits(start_edit, edits)), + ) +} + +#[derive(Debug, Default)] +struct ContainsAnyString { + result: bool, +} + +impl Visitor<'_> for ContainsAnyString { + fn visit_string_literal(&mut self, _: &'_ ast::StringLiteral) { + self.result = true; + } + + fn visit_bytes_literal(&mut self, _: &'_ ast::BytesLiteral) { + self.result = true; + } + + fn visit_f_string(&mut self, _: &'_ ast::FString) { + self.result = true; + // We don't need to recurse into this f-string now that we already know the result. } } diff --git a/crates/ruff_linter/src/rules/flake8_quotes/rules/unnecessary_escaped_quote.rs b/crates/ruff_linter/src/rules/flake8_quotes/rules/unnecessary_escaped_quote.rs index a485396bc4e87..251e196287286 100644 --- a/crates/ruff_linter/src/rules/flake8_quotes/rules/unnecessary_escaped_quote.rs +++ b/crates/ruff_linter/src/rules/flake8_quotes/rules/unnecessary_escaped_quote.rs @@ -2,11 +2,11 @@ use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::{self as ast, AnyStringKind, StringLike}; use ruff_source_file::Locator; -use ruff_text_size::{Ranged, TextLen, TextRange}; +use ruff_text_size::{Ranged, TextRange}; use crate::checkers::ast::Checker; -use super::super::helpers::{contains_escaped_quote, unescape_string}; +use super::super::helpers::{contains_escaped_quote, raw_contents, unescape_string}; /// ## What it does /// Checks for strings that include unnecessarily escaped quotes. @@ -47,6 +47,10 @@ impl AlwaysFixableViolation for UnnecessaryEscapedQuote { /// Q004 pub(crate) fn unnecessary_escaped_quote(checker: &mut Checker, string_like: StringLike) { + if checker.semantic().in_docstring() { + return; + } + let locator = checker.locator(); match string_like { @@ -147,9 +151,3 @@ fn check_f_string(locator: &Locator, f_string: &ast::FString) -> Option &str { - &contents[kind.opener_len().to_usize()..(contents.text_len() - kind.closer_len()).to_usize()] -}