From 01123d5966e34118466428d840e09f012731b386 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Mon, 25 Sep 2023 13:06:30 +0530 Subject: [PATCH] Ignore quote escapes in expression part of f-string (#7597) This PR fixes the following issues w.r.t. the PEP 701 changes: 1. Mark all unformatted comments inside f-strings as formatted only _after_ the f-string has been formatted. 2. Do not escape or remove the quote escape when normalizing the expression part of a f-string. This PR also updates the `--files-with-errors` number to be 1 less. This is because we can now parse the [`test_fstring.py`](https://discord.com/channels/1039017663004942429/1082324263199064206/1154633274887516254) file in the CPython repository which contains the new f-string syntax. This is also the file which updates the similarity index for CPython compared to main. `cargo test -p ruff_python_formatter` | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76051 | 1789 | 1632 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 323 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99979 | 3496 | 22 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | | project | similarity index | total files | changed files | |--------------|------------------:|------------------:|------------------:| | cpython | 0.76083 | 1789 | 1631 | | django | 0.99983 | 2760 | 36 | | transformers | 0.99963 | 2587 | 323 | | twine | 1.00000 | 33 | 0 | | typeshed | 0.99979 | 3496 | 22 | | warehouse | 0.99967 | 648 | 15 | | zulip | 0.99972 | 1437 | 21 | --- .../src/expression/string.rs | 60 ++++++++++++------- scripts/formatter_ecosystem_checks.sh | 2 +- 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index 65228230f4d33..b7078459cf3d2 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -138,16 +138,8 @@ impl<'a> FormatString<'a> { impl<'a> Format> for FormatString<'a> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { - // TODO(dhruvmanila): With PEP 701, comments can be inside f-strings. - // This is to mark all of those comments as formatted but we need to - // figure out how to handle them. - if matches!(self.string, AnyString::FString(_)) { - f.context() - .comments() - .mark_verbatim_node_comments_formatted(self.string.into()); - } let locator = f.context().locator(); - match self.layout { + let result = match self.layout { StringLayout::Default => { if self.string.is_implicit_concatenated() { in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f) @@ -170,7 +162,19 @@ impl<'a> Format> for FormatString<'a> { StringLayout::ImplicitConcatenatedStringInBinaryLike => { FormatStringContinuation::new(self.string).fmt(f) } + }; + // TODO(dhruvmanila): With PEP 701, comments can be inside f-strings. + // This is to mark all of those comments as formatted but we need to + // figure out how to handle them. Note that this needs to be done only + // after the f-string is formatted, so only for all the non-formatted + // comments. + if let AnyString::FString(fstring) = self.string { + let comments = f.context().comments(); + fstring.values.iter().for_each(|value| { + comments.mark_verbatim_node_comments_formatted(value.into()); + }); } + result } } @@ -435,11 +439,7 @@ impl StringPart { } }; - let normalized = normalize_string( - locator.slice(self.content_range), - quotes, - self.prefix.is_raw_string(), - ); + let normalized = normalize_string(locator.slice(self.content_range), quotes, self.prefix); NormalizedString { prefix: self.prefix, @@ -531,6 +531,10 @@ impl StringPrefix { pub(super) const fn is_raw_string(self) -> bool { self.contains(StringPrefix::RAW) || self.contains(StringPrefix::RAW_UPPER) } + + pub(super) const fn is_fstring(self) -> bool { + self.contains(StringPrefix::F_STRING) + } } impl Format> for StringPrefix { @@ -770,7 +774,7 @@ impl Format> for StringQuotes { /// with the provided [`StringQuotes`] style. /// /// Returns the normalized string and whether it contains new lines. -fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow { +fn normalize_string(input: &str, quotes: StringQuotes, prefix: StringPrefix) -> Cow { // The normalized string if `input` is not yet normalized. // `output` must remain empty if `input` is already normalized. let mut output = String::new(); @@ -782,14 +786,30 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow let preferred_quote = style.as_char(); let opposite_quote = style.invert().as_char(); - let mut chars = input.char_indices(); + let mut chars = input.char_indices().peekable(); + + let is_raw = prefix.is_raw_string(); + let is_fstring = prefix.is_fstring(); + let mut formatted_value_nesting = 0u32; while let Some((index, c)) = chars.next() { + if is_fstring && matches!(c, '{' | '}') { + if chars.peek().copied().is_some_and(|(_, next)| next == c) { + // Skip over the second character of the double braces + chars.next(); + } else if c == '{' { + formatted_value_nesting += 1; + } else { + // Safe to assume that `c == '}'` here because of the matched pattern above + formatted_value_nesting = formatted_value_nesting.saturating_sub(1); + } + continue; + } if c == '\r' { output.push_str(&input[last_index..index]); // Skip over the '\r' character, keep the `\n` - if input.as_bytes().get(index + 1).copied() == Some(b'\n') { + if chars.peek().copied().is_some_and(|(_, next)| next == '\n') { chars.next(); } // Replace the `\r` with a `\n` @@ -800,9 +820,9 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow last_index = index + '\r'.len_utf8(); } else if !quotes.triple && !is_raw { if c == '\\' { - if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) { + if let Some((_, next)) = chars.peek().copied() { #[allow(clippy::if_same_then_else)] - if next == opposite_quote { + if next == opposite_quote && formatted_value_nesting == 0 { // Remove the escape by ending before the backslash and starting again with the quote chars.next(); output.push_str(&input[last_index..index]); @@ -815,7 +835,7 @@ fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow chars.next(); } } - } else if c == preferred_quote { + } else if c == preferred_quote && formatted_value_nesting == 0 { // Escape the quote output.push_str(&input[last_index..index]); output.push('\\'); diff --git a/scripts/formatter_ecosystem_checks.sh b/scripts/formatter_ecosystem_checks.sh index 46966c91cc224..5c099cf7046e5 100755 --- a/scripts/formatter_ecosystem_checks.sh +++ b/scripts/formatter_ecosystem_checks.sh @@ -64,7 +64,7 @@ git -C "$dir/cpython" checkout 1a1bfc28912a39b500c578e9f10a8a222638d411 time cargo run --bin ruff_dev -- format-dev --stability-check \ --error-file "$target/progress_projects_errors.txt" --log-file "$target/progress_projects_log.txt" --stats-file "$target/progress_projects_stats.txt" \ - --files-with-errors 16 --multi-project "$dir" || ( + --files-with-errors 15 --multi-project "$dir" || ( echo "Ecosystem check failed" cat "$target/progress_projects_log.txt" exit 1