Skip to content

Commit

Permalink
Add f-string ranges builder to support nested f-strings
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed Sep 18, 2023
1 parent c488fc4 commit de2bd87
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 54 deletions.
2 changes: 1 addition & 1 deletion crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ impl<'a> Checker<'a> {

// Find the quote character used to start the containing f-string.
let expr = self.semantic.current_expression()?;
let string_range = self.indexer.f_string_range(expr.start())?;
let string_range = self.indexer.fstring_ranges().innermost(expr.start())?;
let trailing_quote = trailing_quote(self.locator.slice(string_range))?;

// Invert the quote character, if it's a single quote.
Expand Down
61 changes: 61 additions & 0 deletions crates/ruff_python_index/src/fstring_ranges.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
use std::collections::BTreeMap;

use ruff_python_parser::Tok;
use ruff_text_size::{TextRange, TextSize};

/// Stores the ranges of all f-strings in a file sorted by [`TextRange::start`].
/// There can be multiple overlapping ranges for nested f-strings.
#[derive(Debug)]
pub struct FStringRanges {
raw: BTreeMap<TextSize, TextRange>,
}

impl FStringRanges {
/// Return the [`TextRange`] of the innermost f-string at the given offset.
pub fn innermost(&self, offset: TextSize) -> Option<TextRange> {
self.raw
.range(..=offset)
.rev()
.find(|(_, range)| range.contains(offset))
.map(|(_, range)| *range)
}

/// Return the [`TextRange`] of the outermost f-string at the given offset.
pub fn outermost(&self, offset: TextSize) -> Option<TextRange> {
self.raw
.range(..=offset)
.find(|(_, range)| range.contains(offset))
.map(|(_, range)| *range)
}

#[cfg(test)]
pub(crate) fn ranges(&self) -> impl Iterator<Item = TextRange> + '_ {
self.raw.values().copied()
}
}

#[derive(Default)]
pub(crate) struct FStringRangesBuilder {
start_locations: Vec<TextSize>,
raw: BTreeMap<TextSize, TextRange>,
}

impl FStringRangesBuilder {
pub(crate) fn visit_token(&mut self, token: &Tok, range: TextRange) {
match token {
Tok::FStringStart => {
self.start_locations.push(range.start());
}
Tok::FStringEnd => {
if let Some(start) = self.start_locations.pop() {
self.raw.insert(start, TextRange::new(start, range.end()));
}
}
_ => {}
}
}

pub(crate) fn finish(self) -> FStringRanges {
FStringRanges { raw: self.raw }
}
}
123 changes: 70 additions & 53 deletions crates/ruff_python_index/src/indexer.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
//! Struct used to index source code, to enable efficient lookup of tokens that
//! are omitted from the AST (e.g., commented lines).
use crate::CommentRangesBuilder;
use ruff_python_ast::Stmt;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::Tok;
Expand All @@ -11,28 +10,26 @@ use ruff_python_trivia::{
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange, TextSize};

use crate::fstring_ranges::{FStringRanges, FStringRangesBuilder};
use crate::CommentRangesBuilder;

pub struct Indexer {
comment_ranges: CommentRanges,

/// Stores the start offset of continuation lines.
continuation_lines: Vec<TextSize>,

/// The range of all f-string in the source document. The ranges are sorted by their
/// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
f_string_ranges: Vec<TextRange>,
/// The range of all f-string in the source document.
fstring_ranges: FStringRanges,
}

impl Indexer {
pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self {
assert!(TextSize::try_from(locator.contents().len()).is_ok());

let mut comment_ranges_builder = CommentRangesBuilder::default();
let mut fstring_ranges_builder = FStringRangesBuilder::default();
let mut continuation_lines = Vec::new();
let mut f_string_ranges = Vec::new();
// Range for the first f-string start token in a f-string that could
// potentially contain nested f-strings.
let mut first_f_string_start_range = None;
let mut f_string_start_count = 0u32;
// Token, end
let mut prev_end = TextSize::default();
let mut prev_token: Option<&Tok> = None;
Expand Down Expand Up @@ -63,42 +60,20 @@ impl Indexer {
}

comment_ranges_builder.visit_token(tok, *range);
fstring_ranges_builder.visit_token(tok, *range);

match tok {
Tok::Newline | Tok::NonLogicalNewline => {
line_start = range.end();
}
Tok::FStringStart => {
f_string_start_count += 1;
if f_string_start_count == 1 {
first_f_string_start_range = Some(*range);
}
}
Tok::FStringEnd => {
// This is always going to be > 0, because the lexer will only
// emit the end token if there was a start token to begin with.
f_string_start_count -= 1;
if f_string_start_count == 0 {
if let Some(start_range) = first_f_string_start_range.take() {
let f_string_range = TextRange::new(start_range.start(), range.end());
f_string_ranges.push(f_string_range);

if matches!(locator.slice(range), "'''" | r#"""""#) {
triple_quoted_string_ranges.push(f_string_range);
}
}
}
}
_ => {}
if matches!(tok, Tok::Newline | Tok::NonLogicalNewline) {
line_start = range.end();
}

prev_token = Some(tok);
prev_end = range.end();
}

Self {
comment_ranges: comment_ranges_builder.finish(),
continuation_lines,
f_string_ranges,
fstring_ranges: fstring_ranges_builder.finish(),
}
}

Expand All @@ -107,6 +82,11 @@ impl Indexer {
&self.comment_ranges
}

/// Returns the byte offset ranges of f-strings.
pub const fn fstring_ranges(&self) -> &FStringRanges {
&self.fstring_ranges
}

/// Returns the line start positions of continuations (backslash).
pub fn continuation_line_starts(&self) -> &[TextSize] {
&self.continuation_lines
Expand All @@ -118,22 +98,6 @@ impl Indexer {
self.continuation_lines.binary_search(&line_start).is_ok()
}

/// Return the [`TextRange`] of the f-string containing a given offset.
pub fn f_string_range(&self, offset: TextSize) -> Option<TextRange> {
let Ok(string_range_index) = self.f_string_ranges.binary_search_by(|range| {
if offset < range.start() {
std::cmp::Ordering::Greater
} else if range.contains(offset) {
std::cmp::Ordering::Equal
} else {
std::cmp::Ordering::Less
}
}) else {
return None;
};
Some(self.f_string_ranges[string_range_index])
}

/// Returns `true` if a statement or expression includes at least one comment.
pub fn has_comments<T>(&self, node: &T, locator: &Locator) -> bool
where
Expand Down Expand Up @@ -269,7 +233,7 @@ mod tests {
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{lexer, Mode};
use ruff_source_file::Locator;
use ruff_text_size::TextSize;
use ruff_text_size::{TextRange, TextSize};

use crate::Indexer;

Expand Down Expand Up @@ -353,4 +317,57 @@ import os
]
);
}

#[test]
fn test_f_string_ranges() {
let contents = r#"
f"normal f-string"
f"start {f"inner {f"another"}"} end"
f"implicit " f"concatenation"
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.fstring_ranges().ranges().collect::<Vec<_>>(),
&[
TextRange::new(TextSize::from(0), TextSize::from(18)),
TextRange::new(TextSize::from(19), TextSize::from(55)),
TextRange::new(TextSize::from(28), TextSize::from(49)),
TextRange::new(TextSize::from(37), TextSize::from(47)),
TextRange::new(TextSize::from(56), TextSize::from(68)),
TextRange::new(TextSize::from(69), TextSize::from(85)),
]
);
}

#[test]
fn test_triple_quoted_f_string_ranges() {
let contents = r#"
f"""
this is one
multiline f-string
"""
f'''
and this is
another
'''
f"""
this is a {f"""nested multiline
f-string"""}
"""
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.fstring_ranges().ranges().collect::<Vec<_>>(),
&[
TextRange::new(TextSize::from(0), TextSize::from(39)),
TextRange::new(TextSize::from(40), TextSize::from(68)),
TextRange::new(TextSize::from(69), TextSize::from(122)),
TextRange::new(TextSize::from(85), TextSize::from(117)),
]
);
}
}
1 change: 1 addition & 0 deletions crates/ruff_python_index/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod comment_ranges;
mod fstring_ranges;
mod indexer;

pub use comment_ranges::CommentRangesBuilder;
Expand Down

0 comments on commit de2bd87

Please sign in to comment.