From de2bd87f27c98438fae79087e0dc65c6099d5dc4 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Mon, 18 Sep 2023 21:35:31 +0530 Subject: [PATCH] Add f-string ranges builder to support nested f-strings --- crates/ruff/src/checkers/ast/mod.rs | 2 +- .../ruff_python_index/src/fstring_ranges.rs | 61 +++++++++ crates/ruff_python_index/src/indexer.rs | 123 ++++++++++-------- crates/ruff_python_index/src/lib.rs | 1 + 4 files changed, 133 insertions(+), 54 deletions(-) create mode 100644 crates/ruff_python_index/src/fstring_ranges.rs diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index 27a8a2a7d31b0f..365cb6d16a4f75 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -183,7 +183,7 @@ impl<'a> Checker<'a> { // Find the quote character used to start the containing f-string. let expr = self.semantic.current_expression()?; - let string_range = self.indexer.f_string_range(expr.start())?; + let string_range = self.indexer.fstring_ranges().innermost(expr.start())?; let trailing_quote = trailing_quote(self.locator.slice(string_range))?; // Invert the quote character, if it's a single quote. diff --git a/crates/ruff_python_index/src/fstring_ranges.rs b/crates/ruff_python_index/src/fstring_ranges.rs new file mode 100644 index 00000000000000..15fc08f7a11b7d --- /dev/null +++ b/crates/ruff_python_index/src/fstring_ranges.rs @@ -0,0 +1,61 @@ +use std::collections::BTreeMap; + +use ruff_python_parser::Tok; +use ruff_text_size::{TextRange, TextSize}; + +/// Stores the ranges of all f-strings in a file sorted by [`TextRange::start`]. +/// There can be multiple overlapping ranges for nested f-strings. +#[derive(Debug)] +pub struct FStringRanges { + raw: BTreeMap, +} + +impl FStringRanges { + /// Return the [`TextRange`] of the innermost f-string at the given offset. + pub fn innermost(&self, offset: TextSize) -> Option { + self.raw + .range(..=offset) + .rev() + .find(|(_, range)| range.contains(offset)) + .map(|(_, range)| *range) + } + + /// Return the [`TextRange`] of the outermost f-string at the given offset. + pub fn outermost(&self, offset: TextSize) -> Option { + self.raw + .range(..=offset) + .find(|(_, range)| range.contains(offset)) + .map(|(_, range)| *range) + } + + #[cfg(test)] + pub(crate) fn ranges(&self) -> impl Iterator + '_ { + self.raw.values().copied() + } +} + +#[derive(Default)] +pub(crate) struct FStringRangesBuilder { + start_locations: Vec, + raw: BTreeMap, +} + +impl FStringRangesBuilder { + pub(crate) fn visit_token(&mut self, token: &Tok, range: TextRange) { + match token { + Tok::FStringStart => { + self.start_locations.push(range.start()); + } + Tok::FStringEnd => { + if let Some(start) = self.start_locations.pop() { + self.raw.insert(start, TextRange::new(start, range.end())); + } + } + _ => {} + } + } + + pub(crate) fn finish(self) -> FStringRanges { + FStringRanges { raw: self.raw } + } +} diff --git a/crates/ruff_python_index/src/indexer.rs b/crates/ruff_python_index/src/indexer.rs index a503ad20f25dda..3af272c527b4f2 100644 --- a/crates/ruff_python_index/src/indexer.rs +++ b/crates/ruff_python_index/src/indexer.rs @@ -1,7 +1,6 @@ //! Struct used to index source code, to enable efficient lookup of tokens that //! are omitted from the AST (e.g., commented lines). -use crate::CommentRangesBuilder; use ruff_python_ast::Stmt; use ruff_python_parser::lexer::LexResult; use ruff_python_parser::Tok; @@ -11,15 +10,17 @@ use ruff_python_trivia::{ use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange, TextSize}; +use crate::fstring_ranges::{FStringRanges, FStringRangesBuilder}; +use crate::CommentRangesBuilder; + pub struct Indexer { comment_ranges: CommentRanges, /// Stores the start offset of continuation lines. continuation_lines: Vec, - /// The range of all f-string in the source document. The ranges are sorted by their - /// [`TextRange::start`] position in increasing order. No two ranges are overlapping. - f_string_ranges: Vec, + /// The range of all f-string in the source document. + fstring_ranges: FStringRanges, } impl Indexer { @@ -27,12 +28,8 @@ impl Indexer { assert!(TextSize::try_from(locator.contents().len()).is_ok()); let mut comment_ranges_builder = CommentRangesBuilder::default(); + let mut fstring_ranges_builder = FStringRangesBuilder::default(); let mut continuation_lines = Vec::new(); - let mut f_string_ranges = Vec::new(); - // Range for the first f-string start token in a f-string that could - // potentially contain nested f-strings. - let mut first_f_string_start_range = None; - let mut f_string_start_count = 0u32; // Token, end let mut prev_end = TextSize::default(); let mut prev_token: Option<&Tok> = None; @@ -63,42 +60,20 @@ impl Indexer { } comment_ranges_builder.visit_token(tok, *range); + fstring_ranges_builder.visit_token(tok, *range); - match tok { - Tok::Newline | Tok::NonLogicalNewline => { - line_start = range.end(); - } - Tok::FStringStart => { - f_string_start_count += 1; - if f_string_start_count == 1 { - first_f_string_start_range = Some(*range); - } - } - Tok::FStringEnd => { - // This is always going to be > 0, because the lexer will only - // emit the end token if there was a start token to begin with. - f_string_start_count -= 1; - if f_string_start_count == 0 { - if let Some(start_range) = first_f_string_start_range.take() { - let f_string_range = TextRange::new(start_range.start(), range.end()); - f_string_ranges.push(f_string_range); - - if matches!(locator.slice(range), "'''" | r#"""""#) { - triple_quoted_string_ranges.push(f_string_range); - } - } - } - } - _ => {} + if matches!(tok, Tok::Newline | Tok::NonLogicalNewline) { + line_start = range.end(); } prev_token = Some(tok); prev_end = range.end(); } + Self { comment_ranges: comment_ranges_builder.finish(), continuation_lines, - f_string_ranges, + fstring_ranges: fstring_ranges_builder.finish(), } } @@ -107,6 +82,11 @@ impl Indexer { &self.comment_ranges } + /// Returns the byte offset ranges of f-strings. + pub const fn fstring_ranges(&self) -> &FStringRanges { + &self.fstring_ranges + } + /// Returns the line start positions of continuations (backslash). pub fn continuation_line_starts(&self) -> &[TextSize] { &self.continuation_lines @@ -118,22 +98,6 @@ impl Indexer { self.continuation_lines.binary_search(&line_start).is_ok() } - /// Return the [`TextRange`] of the f-string containing a given offset. - pub fn f_string_range(&self, offset: TextSize) -> Option { - let Ok(string_range_index) = self.f_string_ranges.binary_search_by(|range| { - if offset < range.start() { - std::cmp::Ordering::Greater - } else if range.contains(offset) { - std::cmp::Ordering::Equal - } else { - std::cmp::Ordering::Less - } - }) else { - return None; - }; - Some(self.f_string_ranges[string_range_index]) - } - /// Returns `true` if a statement or expression includes at least one comment. pub fn has_comments(&self, node: &T, locator: &Locator) -> bool where @@ -269,7 +233,7 @@ mod tests { use ruff_python_parser::lexer::LexResult; use ruff_python_parser::{lexer, Mode}; use ruff_source_file::Locator; - use ruff_text_size::TextSize; + use ruff_text_size::{TextRange, TextSize}; use crate::Indexer; @@ -353,4 +317,57 @@ import os ] ); } + + #[test] + fn test_f_string_ranges() { + let contents = r#" +f"normal f-string" +f"start {f"inner {f"another"}"} end" +f"implicit " f"concatenation" +"# + .trim(); + let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); + let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); + assert_eq!( + indexer.fstring_ranges().ranges().collect::>(), + &[ + TextRange::new(TextSize::from(0), TextSize::from(18)), + TextRange::new(TextSize::from(19), TextSize::from(55)), + TextRange::new(TextSize::from(28), TextSize::from(49)), + TextRange::new(TextSize::from(37), TextSize::from(47)), + TextRange::new(TextSize::from(56), TextSize::from(68)), + TextRange::new(TextSize::from(69), TextSize::from(85)), + ] + ); + } + + #[test] + fn test_triple_quoted_f_string_ranges() { + let contents = r#" +f""" +this is one +multiline f-string +""" +f''' +and this is +another +''' +f""" +this is a {f"""nested multiline +f-string"""} +""" +"# + .trim(); + let lxr: Vec = lexer::lex(contents, Mode::Module).collect(); + let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents)); + assert_eq!( + indexer.fstring_ranges().ranges().collect::>(), + &[ + TextRange::new(TextSize::from(0), TextSize::from(39)), + TextRange::new(TextSize::from(40), TextSize::from(68)), + TextRange::new(TextSize::from(69), TextSize::from(122)), + TextRange::new(TextSize::from(85), TextSize::from(117)), + ] + ); + } } diff --git a/crates/ruff_python_index/src/lib.rs b/crates/ruff_python_index/src/lib.rs index 2e585ca5df0e1a..288009e90e80cf 100644 --- a/crates/ruff_python_index/src/lib.rs +++ b/crates/ruff_python_index/src/lib.rs @@ -1,4 +1,5 @@ mod comment_ranges; +mod fstring_ranges; mod indexer; pub use comment_ranges::CommentRangesBuilder;