From 7b2a6b214b891f0da742f946e69a070127f9469c Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Mon, 12 Feb 2024 20:36:20 +0530 Subject: [PATCH] Preview minimal f-string formatting --- .../ruff/expression/fstring.options.json | 8 + .../test/fixtures/ruff/expression/fstring.py | 142 ++++ .../src/comments/placement.rs | 22 + crates/ruff_python_formatter/src/context.rs | 85 +- .../src/expression/expr_f_string.rs | 18 + crates/ruff_python_formatter/src/options.rs | 9 + .../src/other/bytes_literal.rs | 2 +- .../src/other/f_string.rs | 108 ++- .../src/other/f_string_element.rs | 229 ++++++ crates/ruff_python_formatter/src/other/mod.rs | 1 + .../src/other/string_literal.rs | 2 +- crates/ruff_python_formatter/src/preview.rs | 5 + .../ruff_python_formatter/src/string/mod.rs | 30 +- ...bility@cases__preview_long_strings.py.snap | 22 +- ...__preview_long_strings__regression.py.snap | 35 +- .../format@expression__fstring.py.snap | 746 +++++++++++++++++- 16 files changed, 1411 insertions(+), 53 deletions(-) create mode 100644 crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.options.json create mode 100644 crates/ruff_python_formatter/src/other/f_string_element.rs diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.options.json b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.options.json new file mode 100644 index 00000000000000..e3c32249eeffb6 --- /dev/null +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.options.json @@ -0,0 +1,8 @@ +[ + { + "preview": "enabled" + }, + { + "preview": "disabled" + } +] diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.py index 017d243f1f08ea..1e51406fc91023 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/expression/fstring.py @@ -62,3 +62,145 @@ x = f'''a{""}b''' y = f'''c{1}d"""e''' z = f'''a{""}b''' f'''c{1}d"""e''' + +# F-String formatting test cases (Preview) + +# Simple expression with a mix of debug expression and comments. +x = f"{a}" +x = f"{ + a = }" +x = f"{ # comment + a }" +x = f"{ # comment + a = }" + +# Remove the parentheses as adding them doesn't make then fit within the line length limit. +# This is similar to how we format it before f-string formatting. +aaaaaaaaaaa = ( + f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd } cccccccccc" +) +# Here, we would use the best fit layout to put the f-string indented on the next line +# similar to the next example. +aaaaaaaaaaa = f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc } cccccccccc" +aaaaaaaaaaa = ( + f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc } cccccccccc" +) + +# This should never add the optional parentheses because even after adding them, the +# f-string exceeds the line length limit. +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc" + +# Multiple larger expressions which exceeds the line length limit. Here, we need to decide +# whether to split at the first or second expression. This should work similarly to the +# assignment statement formatting where we split from right to left in preview mode. +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" + +# The above example won't split but when we start introducing line breaks: +x = f"aaaaaaaaaaaa { + bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb + } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { + ddddddddddddddd } eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd + } eeeeeeeeeeeeee" + +# But, in case comments are present, we would split at the expression containing the +# comments: +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb # comment + } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb + } cccccccccccccccccccc { # comment + ddddddddddddddd } eeeeeeeeeeeeee" + +# Here, the expression part itself starts with a curly brace so we need to add an extra +# space between the opening curly brace and the expression. +x = f"{ {'x': 1, 'y': 2} }" +# Although the extra space isn't required before the ending curly brace, we add it for +# consistency. +x = f"{ {'x': 1, 'y': 2}}" +x = f"{ {'x': 1, 'y': 2} = }" +x = f"{ # comment + {'x': 1, 'y': 2} }" +x = f"{ # comment + {'x': 1, 'y': 2} = }" + +# But, in this case, we would split the expression itself because it exceeds the line +# length limit so we need not add the extra space. +xxxxxxx = f"{ + {'aaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbb', 'ccccccccccccccccccccc'} +}" +# And, split the expression itself because it exceeds the line length. +xxxxxxx = f"{ + {'aaaaaaaaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbbbbbbb', 'cccccccccccccccccccccccccc'} +}" + +# Triple-quoted strings +# It's ok to use the same quote char for the inner string if it's single-quoted. +f"""test {'inner'}""" +f"""test {"inner"}""" +# But if the inner string is also triple-quoted then we should preserve the existing quotes. +f"""test {'''inner'''}""" + +# Comments + +# No comments should be dropped! +f"{ # comment 1 + # comment 2 + foo # comment 3 + # comment 4 +}" # comment 5 +# comment 6 + +# Conversion flags +# +# This is not a valid Python code because of the additional whitespace between the `!` +# and conversion type. But, our parser isn't strict about this. This should probably be +# removed once we have a strict parser. +x = f"aaaaaaaaa { x ! r }" + +# Even in the case of debug expresions, we only need to preserve the whitespace within +# the expression part of the replacement field. +x = f"aaaaaaaaa { x = ! r }" + +# Combine conversion flags with format specifiers +x = f"{x = ! s + :>0 + + }" +# This is interesting. There can be a comment after the format specifier but only if it's +# on it's own line. Refer to https://github.com/astral-sh/ruff/pull/7787 for more details. +# We'll format is as trailing comments. +x = f"{x !s + :>0 + # comment + }" + +x = f""" +{ # dangling comment 1 + x = :.0{y # dangling comment 2 + }f}""" + +# Here, the debug expression is in a nested f-string so we should start preserving +# whitespaces from that point onwards. This means we should format the outer f-string. +x = f"""{"foo " + # comment 1 + f"{ x = + + }" # comment 2 + } + """ + +# Mix of various features. +f"{ # dangling comment 1 + foo # after foo + :>{ + x # after x + } + # dangling comment 2 + # dangling comment 3 +} woah {x}" diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs index 2d958ebee98f33..b6709473e83610 100644 --- a/crates/ruff_python_formatter/src/comments/placement.rs +++ b/crates/ruff_python_formatter/src/comments/placement.rs @@ -289,6 +289,28 @@ fn handle_enclosed_comment<'a>( } } AnyNodeRef::FString(fstring) => CommentPlacement::dangling(fstring, comment), + AnyNodeRef::FStringExpressionElement(_) => { + // Handle comments after the format specifier (should be rare): + // + // ```python + // f"literal { + // expr:.3f + // # comment + // }" + // ``` + // + // This is a valid comment placement. + if matches!( + comment.preceding_node(), + Some( + AnyNodeRef::FStringExpressionElement(_) | AnyNodeRef::FStringLiteralElement(_) + ) + ) { + CommentPlacement::dangling(comment.enclosing_node(), comment) + } else { + handle_bracketed_end_of_line_comment(comment, locator) + } + } AnyNodeRef::ExprList(_) | AnyNodeRef::ExprSet(_) | AnyNodeRef::ExprListComp(_) diff --git a/crates/ruff_python_formatter/src/context.rs b/crates/ruff_python_formatter/src/context.rs index b5dc85fcb3039d..d3dc848daa6ccf 100644 --- a/crates/ruff_python_formatter/src/context.rs +++ b/crates/ruff_python_formatter/src/context.rs @@ -1,11 +1,22 @@ use crate::comments::Comments; -use crate::string::QuoteChar; +use crate::string::{QuoteChar, StringQuotes}; use crate::PyFormatOptions; use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode}; use ruff_source_file::Locator; use std::fmt::{Debug, Formatter}; use std::ops::{Deref, DerefMut}; +/// Location of the expression which is currently being formatted. +#[derive(Copy, Clone, Debug, Default)] +pub(crate) enum ExpressionLocation { + /// The expression is inside an f-string, in the replacement field i.e., `f"foo {x}"`. + /// + /// The containing `StringQuotes` is surrounding f-string quote information. + InsideFString(StringQuotes), + #[default] + Other, +} + #[derive(Clone)] pub struct PyFormatContext<'a> { options: PyFormatOptions, @@ -22,6 +33,7 @@ pub struct PyFormatContext<'a> { /// quote style that is inverted from the one here in order to ensure that /// the formatted Python code will be valid. docstring: Option, + expression_location: ExpressionLocation, } impl<'a> PyFormatContext<'a> { @@ -33,6 +45,7 @@ impl<'a> PyFormatContext<'a> { node_level: NodeLevel::TopLevel(TopLevelStatementPosition::Other), indent_level: IndentLevel::new(0), docstring: None, + expression_location: ExpressionLocation::Other, } } @@ -86,6 +99,14 @@ impl<'a> PyFormatContext<'a> { } } + pub(crate) fn expression_location(&self) -> ExpressionLocation { + self.expression_location + } + + pub(crate) fn set_expression_location(&mut self, expression_location: ExpressionLocation) { + self.expression_location = expression_location; + } + /// Returns `true` if preview mode is enabled. pub(crate) const fn is_preview(&self) -> bool { self.options.preview().is_enabled() @@ -332,3 +353,65 @@ where .set_indent_level(self.saved_level); } } + +pub(crate) struct WithExprLocation<'a, B, D> +where + D: DerefMut, + B: Buffer>, +{ + buffer: D, + saved_location: ExpressionLocation, +} + +impl<'a, B, D> WithExprLocation<'a, B, D> +where + D: DerefMut, + B: Buffer>, +{ + pub(crate) fn new(expr_location: ExpressionLocation, mut buffer: D) -> Self { + let context = buffer.state_mut().context_mut(); + let saved_location = context.expression_location(); + + context.set_expression_location(expr_location); + + Self { + buffer, + saved_location, + } + } +} + +impl<'a, B, D> Deref for WithExprLocation<'a, B, D> +where + D: DerefMut, + B: Buffer>, +{ + type Target = B; + + fn deref(&self) -> &Self::Target { + &self.buffer + } +} + +impl<'a, B, D> DerefMut for WithExprLocation<'a, B, D> +where + D: DerefMut, + B: Buffer>, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.buffer + } +} + +impl<'a, B, D> Drop for WithExprLocation<'a, B, D> +where + D: DerefMut, + B: Buffer>, +{ + fn drop(&mut self) { + self.buffer + .state_mut() + .context_mut() + .set_expression_location(self.saved_location); + } +} diff --git a/crates/ruff_python_formatter/src/expression/expr_f_string.rs b/crates/ruff_python_formatter/src/expression/expr_f_string.rs index dcbb85520e9d1b..76af5630c707f5 100644 --- a/crates/ruff_python_formatter/src/expression/expr_f_string.rs +++ b/crates/ruff_python_formatter/src/expression/expr_f_string.rs @@ -48,6 +48,24 @@ impl NeedsParentheses for ExprFString { ) -> OptionalParentheses { if self.value.is_implicit_concatenated() { OptionalParentheses::Multiline + // TODO(dhruvmanila): Ideally what we want here is a new variant which + // is something like: + // - If the expression fits by just adding the parentheses, then add them and + // avoid breaking the f-string expression. So, + // ``` + // xxxxxxxxx = ( + // f"aaaaaaaaaaaa { xxxxxxx + yyyyyyyy } bbbbbbbbbbbbb" + // ) + // ``` + // - But, if the expression is too long to fit even with parentheses, then + // don't add the parentheses and instead break the expression at `soft_line_break`. + // ``` + // xxxxxxxxx = f"aaaaaaaaaaaa { + // xxxxxxxxx + yyyyyyyyyy + // } bbbbbbbbbbbbb" + // ``` + // This isn't decided yet, refer to the relevant discussion: + // https://github.com/astral-sh/ruff/discussions/9785 } else if AnyString::FString(self).is_multiline(context.source()) { OptionalParentheses::Never } else { diff --git a/crates/ruff_python_formatter/src/options.rs b/crates/ruff_python_formatter/src/options.rs index 8deaf926e41125..7b74c7b0d5a3cc 100644 --- a/crates/ruff_python_formatter/src/options.rs +++ b/crates/ruff_python_formatter/src/options.rs @@ -466,3 +466,12 @@ pub enum PythonVersion { Py311, Py312, } + +impl PythonVersion { + /// Return `true` if the current version supports [PEP 701]. + /// + /// [PEP 701]: https://peps.python.org/pep-0701/ + pub fn supports_pep_701(self) -> bool { + self >= Self::Py312 + } +} diff --git a/crates/ruff_python_formatter/src/other/bytes_literal.rs b/crates/ruff_python_formatter/src/other/bytes_literal.rs index 63011c2e3e6cff..42ed8d81c4d789 100644 --- a/crates/ruff_python_formatter/src/other/bytes_literal.rs +++ b/crates/ruff_python_formatter/src/other/bytes_literal.rs @@ -17,7 +17,7 @@ impl FormatNodeRule for FormatBytesLiteral { Quoting::CanChange, &locator, f.options().quote_style(), - f.context().docstring(), + f.context(), is_hex_codes_in_unicode_sequences_enabled(f.context()), ) .fmt(f) diff --git a/crates/ruff_python_formatter/src/other/f_string.rs b/crates/ruff_python_formatter/src/other/f_string.rs index eb5458c1c83247..4f626325108072 100644 --- a/crates/ruff_python_formatter/src/other/f_string.rs +++ b/crates/ruff_python_formatter/src/other/f_string.rs @@ -1,9 +1,15 @@ +use ruff_formatter::write; use ruff_python_ast::FString; use ruff_text_size::Ranged; use crate::prelude::*; use crate::preview::is_hex_codes_in_unicode_sequences_enabled; -use crate::string::{Quoting, StringNormalizer}; +use crate::preview::is_pep_701_enabled; +use crate::string::{ + choose_quotes, Quoting, StringNormalizer, StringPart, StringPrefix, StringQuotes, +}; + +use super::f_string_element::FormatFStringElement; /// Formats an f-string which is part of a larger f-string expression. /// @@ -25,27 +31,85 @@ impl<'a> FormatFString<'a> { impl Format> for FormatFString<'_> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { let locator = f.context().locator(); + let comments = f.context().comments().clone(); + + if !is_pep_701_enabled(f.context()) { + let result = StringNormalizer::from_source(self.value.range(), &locator) + .normalize( + self.quoting, + &locator, + f.options().quote_style(), + f.context(), + is_hex_codes_in_unicode_sequences_enabled(f.context()), + ) + .fmt(f); + self.value.elements.iter().for_each(|value| { + comments.mark_verbatim_node_comments_formatted(value.into()); + }); + return result; + } + + let string = StringPart::from_source(self.value.range(), &locator); + + // TODO(dhruvmanila): This could probably be simplified for Python 3.12 specifically + // as same quotes can be re-used inside an f-string. + let quotes = choose_quotes( + &string, + &locator, + self.quoting, + f.options().quote_style(), + f.context(), + ); + + let is_multiline = + memchr::memchr2(b'\n', b'\r', locator.slice(self.value).as_bytes()).is_some(); + let context = FStringContext::new(string.prefix(), quotes, is_multiline); + + // Starting prefix and quote + write!(f, [string.prefix(), quotes])?; + + format_with(|f| { + f.join() + .entries( + self.value + .elements + .iter() + .map(|element| FormatFStringElement::new(element, context)), + ) + .finish() + }) + .fmt(f)?; + + // Ending quote + quotes.fmt(f) + } +} + +#[derive(Clone, Copy, Debug)] +pub(crate) struct FStringContext { + prefix: StringPrefix, + quotes: StringQuotes, + is_multiline: bool, +} + +impl FStringContext { + const fn new(prefix: StringPrefix, quotes: StringQuotes, is_multiline: bool) -> Self { + Self { + prefix, + quotes, + is_multiline, + } + } + + pub(crate) const fn quotes(self) -> StringQuotes { + self.quotes + } + + pub(crate) const fn prefix(self) -> StringPrefix { + self.prefix + } - let result = StringNormalizer::from_source(self.value.range(), &locator) - .normalize( - self.quoting, - &locator, - f.options().quote_style(), - f.context().docstring(), - is_hex_codes_in_unicode_sequences_enabled(f.context()), - ) - .fmt(f); - - // TODO(dhruvmanila): With PEP 701, comments can be inside f-strings. - // This is to mark all of those comments as formatted but we need to - // figure out how to handle them. Note that this needs to be done only - // after the f-string is formatted, so only for all the non-formatted - // comments. - let comments = f.context().comments(); - self.value.elements.iter().for_each(|value| { - comments.mark_verbatim_node_comments_formatted(value.into()); - }); - - result + pub(crate) const fn should_remove_soft_line_breaks(self) -> bool { + !self.is_multiline } } diff --git a/crates/ruff_python_formatter/src/other/f_string_element.rs b/crates/ruff_python_formatter/src/other/f_string_element.rs new file mode 100644 index 00000000000000..3ccea0c780b015 --- /dev/null +++ b/crates/ruff_python_formatter/src/other/f_string_element.rs @@ -0,0 +1,229 @@ +use std::borrow::Cow; + +use ruff_formatter::{format_args, write, RemoveSoftLinesBuffer}; +use ruff_python_ast::{ + ConversionFlag, Expr, FStringElement, FStringExpressionElement, FStringLiteralElement, +}; +use ruff_text_size::Ranged; + +use crate::comments::{dangling_open_parenthesis_comments, trailing_comments}; +use crate::context::{ExpressionLocation, NodeLevel, WithExprLocation, WithNodeLevel}; +use crate::prelude::*; +use crate::preview::is_hex_codes_in_unicode_sequences_enabled; +use crate::string::normalize_string; +use crate::verbatim::suppressed_node; + +use super::f_string::FStringContext; + +/// Formats an f-string element which is either a literal or a formatted expression. +/// +/// This delegates the actual formatting to the appropriate formatter. +pub(crate) struct FormatFStringElement<'a> { + element: &'a FStringElement, + context: FStringContext, +} + +impl<'a> FormatFStringElement<'a> { + pub(crate) fn new(element: &'a FStringElement, context: FStringContext) -> Self { + Self { element, context } + } +} + +impl Format> for FormatFStringElement<'_> { + fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { + match self.element { + FStringElement::Literal(string_literal) => { + FormatFStringLiteralElement::new(string_literal, self.context).fmt(f) + } + FStringElement::Expression(expression) => { + FormatFStringExpressionElement::new(expression, self.context).fmt(f) + } + } + } +} + +/// Formats an f-string literal element. +pub(crate) struct FormatFStringLiteralElement<'a> { + element: &'a FStringLiteralElement, + context: FStringContext, +} + +impl<'a> FormatFStringLiteralElement<'a> { + pub(crate) fn new(element: &'a FStringLiteralElement, context: FStringContext) -> Self { + Self { element, context } + } +} + +impl Format> for FormatFStringLiteralElement<'_> { + fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { + let literal_content = f.context().locator().slice(self.element.range()); + let normalized = normalize_string( + literal_content, + self.context.quotes(), + self.context.prefix(), + is_hex_codes_in_unicode_sequences_enabled(f.context()), + ); + match &normalized { + Cow::Borrowed(_) => source_text_slice(self.element.range()).fmt(f), + Cow::Owned(normalized) => text(normalized).fmt(f), + } + } +} + +/// Formats an f-string expression element. +pub(crate) struct FormatFStringExpressionElement<'a> { + element: &'a FStringExpressionElement, + context: FStringContext, +} + +impl<'a> FormatFStringExpressionElement<'a> { + pub(crate) fn new(element: &'a FStringExpressionElement, context: FStringContext) -> Self { + Self { element, context } + } +} + +impl Format> for FormatFStringExpressionElement<'_> { + fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { + let FStringExpressionElement { + expression, + debug_text, + conversion, + format_spec, + .. + } = self.element; + + let comments = f.context().comments().clone(); + + if let Some(debug_text) = debug_text { + token("{").fmt(f)?; + + // If debug text is present in a f-string, we'll mark all of the comments + // in this f-string as formatted. + comments.mark_verbatim_node_comments_formatted(self.element.into()); + + write!( + f, + [ + text(&debug_text.leading), + suppressed_node(&**expression), + text(&debug_text.trailing), + ] + )?; + + // Even if debug text is present, any whitespace between the + // conversion flag and the format spec doesn't need to be preserved. + match conversion { + ConversionFlag::Str => text("!s").fmt(f)?, + ConversionFlag::Ascii => text("!a").fmt(f)?, + ConversionFlag::Repr => text("!r").fmt(f)?, + ConversionFlag::None => (), + } + + if let Some(format_spec) = format_spec.as_deref() { + write!(f, [token(":"), suppressed_node(format_spec)])?; + } + + token("}").fmt(f) + } else { + let dangling_item_comments = comments.dangling(self.element); + let (dangling_open_parentheses_comments, trailing_format_spec_comments) = + dangling_item_comments.split_at( + dangling_item_comments + .partition_point(|comment| comment.start() < expression.start()), + ); + + let item = format_with(|f| { + let line_break_or_space = match expression.as_ref() { + // If an expression starts with a `{`, we need to add a space before the + // curly brace to avoid turning it into a literal curly with `{{`. + // + // For example, + // ```python + // f"{ {'x': 1, 'y': 2} }" + // # ^ ^ + // ``` + // + // We need to preserve the space highlighted by `^`. + Expr::Dict(_) | Expr::DictComp(_) | Expr::Set(_) | Expr::SetComp(_) => { + Some(soft_line_break_or_space()) + } + _ => None, + }; + + let f = &mut WithExprLocation::new( + ExpressionLocation::InsideFString(self.context.quotes()), + f, + ); + + write!(f, [line_break_or_space, expression.format()])?; + + // Conversion comes first, then the format spec. + match conversion { + ConversionFlag::Str => text("!s").fmt(f)?, + ConversionFlag::Ascii => text("!a").fmt(f)?, + ConversionFlag::Repr => text("!r").fmt(f)?, + ConversionFlag::None => (), + } + + if let Some(format_spec) = format_spec.as_deref() { + let elements = + format_with(|f| { + f.join() + .entries(format_spec.elements.iter().map(|element| { + FormatFStringElement::new(element, self.context) + })) + .finish() + }); + write!( + f, + [ + token(":"), + elements, + trailing_comments(trailing_format_spec_comments) + ] + )?; + } + + line_break_or_space.fmt(f) + }); + + let inner = format_with(|f| { + let mut buffer = RemoveSoftLinesBuffer::new(f); + + if dangling_open_parentheses_comments.is_empty() { + if self.context.should_remove_soft_line_breaks() { + write!(buffer, [group(&soft_block_indent(&item))]) + } else { + write!(f, [group(&soft_block_indent(&item))]) + } + } else { + if self.context.should_remove_soft_line_breaks() { + write!( + buffer, + [group(&format_args![ + dangling_open_parenthesis_comments( + dangling_open_parentheses_comments + ), + soft_block_indent(&item), + ])] + ) + } else { + write!( + f, + [group(&format_args![ + dangling_open_parenthesis_comments( + dangling_open_parentheses_comments + ), + soft_block_indent(&item), + ])] + ) + } + } + }); + + let mut f = WithNodeLevel::new(NodeLevel::ParenthesizedExpression, f); + + write!(f, [token("{"), inner, token("}")]) + } + } +} diff --git a/crates/ruff_python_formatter/src/other/mod.rs b/crates/ruff_python_formatter/src/other/mod.rs index d07339f717cbf5..2aace837913c4f 100644 --- a/crates/ruff_python_formatter/src/other/mod.rs +++ b/crates/ruff_python_formatter/src/other/mod.rs @@ -7,6 +7,7 @@ pub(crate) mod decorator; pub(crate) mod elif_else_clause; pub(crate) mod except_handler_except_handler; pub(crate) mod f_string; +pub(crate) mod f_string_element; pub(crate) mod f_string_part; pub(crate) mod identifier; pub(crate) mod keyword; diff --git a/crates/ruff_python_formatter/src/other/string_literal.rs b/crates/ruff_python_formatter/src/other/string_literal.rs index c8120721241cb3..69fd6069e82f00 100644 --- a/crates/ruff_python_formatter/src/other/string_literal.rs +++ b/crates/ruff_python_formatter/src/other/string_literal.rs @@ -63,7 +63,7 @@ impl Format> for FormatStringLiteral<'_> { self.layout.quoting(), &locator, quote_style, - f.context().docstring(), + f.context(), is_hex_codes_in_unicode_sequences_enabled(f.context()), ); diff --git a/crates/ruff_python_formatter/src/preview.rs b/crates/ruff_python_formatter/src/preview.rs index 712a7da4170881..f4a63921549c17 100644 --- a/crates/ruff_python_formatter/src/preview.rs +++ b/crates/ruff_python_formatter/src/preview.rs @@ -81,3 +81,8 @@ pub(crate) const fn is_multiline_string_handling_enabled(context: &PyFormatConte pub(crate) const fn is_format_module_docstring_enabled(context: &PyFormatContext) -> bool { context.is_preview() } + +/// Returns `true` if the [`PEP 701`](https://github.com/astral-sh/ruff/issues/7594) preview style is enabled. +pub(crate) fn is_pep_701_enabled(context: &PyFormatContext) -> bool { + context.is_preview() +} diff --git a/crates/ruff_python_formatter/src/string/mod.rs b/crates/ruff_python_formatter/src/string/mod.rs index 2d68054a421e41..e821103695c1ef 100644 --- a/crates/ruff_python_formatter/src/string/mod.rs +++ b/crates/ruff_python_formatter/src/string/mod.rs @@ -4,7 +4,7 @@ use std::iter::FusedIterator; use bitflags::bitflags; use memchr::memchr2; -use ruff_formatter::{format_args, write}; +use ruff_formatter::{format_args, write, FormatContext}; use ruff_python_ast::{ self as ast, Expr, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef, }; @@ -13,6 +13,7 @@ use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::comments::{leading_comments, trailing_comments}; +use crate::context::ExpressionLocation; use crate::expression::expr_f_string::f_string_quoting; use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space; use crate::other::f_string::FormatFString; @@ -326,18 +327,12 @@ impl StringNormalizer { quoting: Quoting, locator: &'a Locator, configured_style: QuoteStyle, - parent_docstring_quote_char: Option, + context: &PyFormatContext<'_>, normalize_hex: bool, ) -> NormalizedString<'a> { let raw_content = locator.slice(self.0.content_range()); - let quotes = choose_quotes( - &self.0, - locator, - quoting, - configured_style, - parent_docstring_quote_char, - ); + let quotes = choose_quotes(&self.0, locator, quoting, configured_style, context); let normalized = normalize_string(raw_content, quotes, self.0.prefix(), normalize_hex); @@ -475,7 +470,7 @@ pub(crate) fn choose_quotes( locator: &Locator, quoting: Quoting, configured_style: QuoteStyle, - parent_docstring_quote_char: Option, + context: &PyFormatContext<'_>, ) -> StringQuotes { // Per PEP 8, always prefer double quotes for triple-quoted strings. // Except when using quote-style-preserve. @@ -525,7 +520,7 @@ pub(crate) fn choose_quotes( // Overall this is a bit of a corner case and just inverting the // style from what the parent ultimately decided upon works, even // if it doesn't have perfect alignment with PEP8. - if let Some(quote) = parent_docstring_quote_char { + if let Some(quote) = context.docstring() { QuoteStyle::from(quote.invert()) } else if configured_style.is_preserve() { QuoteStyle::Preserve @@ -536,6 +531,19 @@ pub(crate) fn choose_quotes( configured_style }; + let quoting = + if let ExpressionLocation::InsideFString(f_string_quotes) = context.expression_location() { + if (f_string_quotes.is_triple() && !string.quotes().is_triple()) + || context.options().target_version().supports_pep_701() + { + quoting + } else { + Quoting::Preserve + } + } else { + quoting + }; + match quoting { Quoting::Preserve => string.quotes(), Quoting::CanChange => { diff --git a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@cases__preview_long_strings.py.snap b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@cases__preview_long_strings.py.snap index a589aeffed92e2..e95632baa85c1f 100644 --- a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@cases__preview_long_strings.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@cases__preview_long_strings.py.snap @@ -902,7 +902,7 @@ log.info(f"""Skipping: {'a' == 'b'} {desc['ms_name']} {money=} {dte=} {pos_share ) dict_with_lambda_values = { -@@ -524,61 +383,54 @@ +@@ -524,65 +383,58 @@ # Complex string concatenations with a method call in the middle. code = ( @@ -941,7 +941,7 @@ log.info(f"""Skipping: {'a' == 'b'} {desc['ms_name']} {money=} {dte=} {pos_share log.info( - "Skipping:" - f" {desc['db_id']} {foo('bar',x=123)} {'foo' != 'bar'} {(x := 'abc=')} {pos_share=} {desc['status']} {desc['exposure_max']}" -+ f'Skipping: {desc["db_id"]} {foo("bar",x=123)} {"foo" != "bar"} {(x := "abc=")} {pos_share=} {desc["status"]} {desc["exposure_max"]}' ++ f'Skipping: {desc["db_id"]} {foo("bar", x=123,)} {"foo" != "bar"} {(x := "abc=")} {pos_share=} {desc["status"]} {desc["exposure_max"]}' ) log.info( @@ -981,6 +981,18 @@ log.info(f"""Skipping: {'a' == 'b'} {desc['ms_name']} {money=} {dte=} {pos_share ) log.info( +- f"""Skipping: {"a" == 'b'} {desc["ms_name"]} {money=} {dte=} {pos_share=} {desc["status"]} {desc["exposure_max"]}""" ++ f"""Skipping: {"a" == "b"} {desc["ms_name"]} {money=} {dte=} {pos_share=} {desc["status"]} {desc["exposure_max"]}""" + ) + + log.info( +@@ -590,5 +442,5 @@ + ) + + log.info( +- f"""Skipping: {'a' == 'b'} {desc['ms_name']} {money=} {dte=} {pos_share=} {desc['status']} {desc['exposure_max']}""" ++ f"""Skipping: {"a" == "b"} {desc["ms_name"]} {money=} {dte=} {pos_share=} {desc["status"]} {desc["exposure_max"]}""" + ) ``` ## Ruff Output @@ -1394,7 +1406,7 @@ log.info( ) log.info( - f'Skipping: {desc["db_id"]} {foo("bar",x=123)} {"foo" != "bar"} {(x := "abc=")} {pos_share=} {desc["status"]} {desc["exposure_max"]}' + f'Skipping: {desc["db_id"]} {foo("bar", x=123,)} {"foo" != "bar"} {(x := "abc=")} {pos_share=} {desc["status"]} {desc["exposure_max"]}' ) log.info( @@ -1422,7 +1434,7 @@ log.info( ) log.info( - f"""Skipping: {"a" == 'b'} {desc["ms_name"]} {money=} {dte=} {pos_share=} {desc["status"]} {desc["exposure_max"]}""" + f"""Skipping: {"a" == "b"} {desc["ms_name"]} {money=} {dte=} {pos_share=} {desc["status"]} {desc["exposure_max"]}""" ) log.info( @@ -1430,7 +1442,7 @@ log.info( ) log.info( - f"""Skipping: {'a' == 'b'} {desc['ms_name']} {money=} {dte=} {pos_share=} {desc['status']} {desc['exposure_max']}""" + f"""Skipping: {"a" == "b"} {desc["ms_name"]} {money=} {dte=} {pos_share=} {desc["status"]} {desc["exposure_max"]}""" ) ``` diff --git a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@cases__preview_long_strings__regression.py.snap b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@cases__preview_long_strings__regression.py.snap index 762af6aa16c7ee..3d7e731a3659ba 100644 --- a/crates/ruff_python_formatter/tests/snapshots/black_compatibility@cases__preview_long_strings__regression.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/black_compatibility@cases__preview_long_strings__regression.py.snap @@ -832,7 +832,7 @@ s = f'Lorem Ipsum is simply dummy text of the printing and typesetting industry: some_commented_string = ( # This comment stays at the top. "This string is long but not so long that it needs hahahah toooooo be so greatttt" -@@ -279,36 +280,25 @@ +@@ -279,37 +280,26 @@ ) lpar_and_rpar_have_comments = func_call( # LPAR Comment @@ -852,31 +852,32 @@ s = f'Lorem Ipsum is simply dummy text of the printing and typesetting industry: - f" {'' if ID is None else ID} | perl -nE 'print if /^{field}:/'" -) +cmd_fstring = f"sudo -E deluge-console info --detailed --sort-reverse=time_added {'' if ID is None else ID} | perl -nE 'print if /^{field}:/'" ++ ++cmd_fstring = f"sudo -E deluge-console info --detailed --sort-reverse=time_added {'{{}}' if ID is None else ID} | perl -nE 'print if /^{field}:/'" -cmd_fstring = ( - "sudo -E deluge-console info --detailed --sort-reverse=time_added" - f" {'{{}}' if ID is None else ID} | perl -nE 'print if /^{field}:/'" -) -+cmd_fstring = f"sudo -E deluge-console info --detailed --sort-reverse=time_added {'{{}}' if ID is None else ID} | perl -nE 'print if /^{field}:/'" ++cmd_fstring = f"sudo -E deluge-console info --detailed --sort-reverse=time_added {{'' if ID is None else ID}} | perl -nE 'print if /^{field}:/'" -cmd_fstring = ( - "sudo -E deluge-console info --detailed --sort-reverse=time_added {'' if ID is" - f" None else ID}} | perl -nE 'print if /^{field}:/'" -) -+cmd_fstring = f"sudo -E deluge-console info --detailed --sort-reverse=time_added {{'' if ID is None else ID}} | perl -nE 'print if /^{field}:/'" - +fstring = f"This string really doesn't need to be an {{{{fstring}}}}, but this one most certainly, absolutely {does}." -+ + fstring = ( - "This string really doesn't need to be an {{fstring}}, but this one most" - f" certainly, absolutely {does}." + f"We have to remember to escape {braces}." " Like {these}." f" But not {this}." ) -- --fstring = f"We have to remember to escape {braces}. Like {{these}}. But not {this}." +-fstring = f"We have to remember to escape {braces}. Like {{these}}. But not {this}." +- class A: + class B: @@ -364,10 +354,7 @@ def foo(): if not hasattr(module, name): @@ -979,7 +980,13 @@ s = f'Lorem Ipsum is simply dummy text of the printing and typesetting industry: ) # The parens should NOT be removed in this case. -@@ -518,88 +494,78 @@ +@@ -513,93 +489,83 @@ + + + temp_msg = ( +- f"{f'{humanize_number(pos)}.': <{pound_len+2}} " ++ f"{f'{humanize_number(pos)}.': <{pound_len + 2}} " + f"{balance: <{bal_len + 5}} " f"<<{author.display_name}>>\n" ) @@ -1103,7 +1110,13 @@ s = f'Lorem Ipsum is simply dummy text of the printing and typesetting industry: "6. Click on Create Credential at the top." '7. At the top click the link for "API key".' "8. No application restrictions are needed. Click Create at the bottom." -@@ -613,55 +579,40 @@ +@@ -608,60 +574,45 @@ + + # It shouldn't matter if the string prefixes are capitalized. + temp_msg = ( +- f"{F'{humanize_number(pos)}.': <{pound_len+2}} " ++ f"{f'{humanize_number(pos)}.': <{pound_len + 2}} " + f"{balance: <{bal_len + 5}} " f"<<{author.display_name}>>\n" ) @@ -1688,7 +1701,7 @@ class X: temp_msg = ( - f"{f'{humanize_number(pos)}.': <{pound_len+2}} " + f"{f'{humanize_number(pos)}.': <{pound_len + 2}} " f"{balance: <{bal_len + 5}} " f"<<{author.display_name}>>\n" ) @@ -1773,7 +1786,7 @@ message = ( # It shouldn't matter if the string prefixes are capitalized. temp_msg = ( - f"{F'{humanize_number(pos)}.': <{pound_len+2}} " + f"{f'{humanize_number(pos)}.': <{pound_len + 2}} " f"{balance: <{bal_len + 5}} " f"<<{author.display_name}>>\n" ) diff --git a/crates/ruff_python_formatter/tests/snapshots/format@expression__fstring.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@expression__fstring.py.snap index d9bc028763812d..31b1ca8c35808d 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@expression__fstring.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@expression__fstring.py.snap @@ -68,9 +68,398 @@ result_f = ( x = f'''a{""}b''' y = f'''c{1}d"""e''' z = f'''a{""}b''' f'''c{1}d"""e''' + +# F-String formatting test cases (Preview) + +# Simple expression with a mix of debug expression and comments. +x = f"{a}" +x = f"{ + a = }" +x = f"{ # comment + a }" +x = f"{ # comment + a = }" + +# Remove the parentheses as adding them doesn't make then fit within the line length limit. +# This is similar to how we format it before f-string formatting. +aaaaaaaaaaa = ( + f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd } cccccccccc" +) +# Here, we would use the best fit layout to put the f-string indented on the next line +# similar to the next example. +aaaaaaaaaaa = f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc } cccccccccc" +aaaaaaaaaaa = ( + f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc } cccccccccc" +) + +# This should never add the optional parentheses because even after adding them, the +# f-string exceeds the line length limit. +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc" + +# Multiple larger expressions which exceeds the line length limit. Here, we need to decide +# whether to split at the first or second expression. This should work similarly to the +# assignment statement formatting where we split from right to left in preview mode. +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" + +# The above example won't split but when we start introducing line breaks: +x = f"aaaaaaaaaaaa { + bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb + } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { + ddddddddddddddd } eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd + } eeeeeeeeeeeeee" + +# But, in case comments are present, we would split at the expression containing the +# comments: +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb # comment + } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb + } cccccccccccccccccccc { # comment + ddddddddddddddd } eeeeeeeeeeeeee" + +# Here, the expression part itself starts with a curly brace so we need to add an extra +# space between the opening curly brace and the expression. +x = f"{ {'x': 1, 'y': 2} }" +# Although the extra space isn't required before the ending curly brace, we add it for +# consistency. +x = f"{ {'x': 1, 'y': 2}}" +x = f"{ {'x': 1, 'y': 2} = }" +x = f"{ # comment + {'x': 1, 'y': 2} }" +x = f"{ # comment + {'x': 1, 'y': 2} = }" + +# But, in this case, we would split the expression itself because it exceeds the line +# length limit so we need not add the extra space. +xxxxxxx = f"{ + {'aaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbb', 'ccccccccccccccccccccc'} +}" +# And, split the expression itself because it exceeds the line length. +xxxxxxx = f"{ + {'aaaaaaaaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbbbbbbb', 'cccccccccccccccccccccccccc'} +}" + +# Triple-quoted strings +# It's ok to use the same quote char for the inner string if it's single-quoted. +f"""test {'inner'}""" +f"""test {"inner"}""" +# But if the inner string is also triple-quoted then we should preserve the existing quotes. +f"""test {'''inner'''}""" + +# Comments + +# No comments should be dropped! +f"{ # comment 1 + # comment 2 + foo # comment 3 + # comment 4 +}" # comment 5 +# comment 6 + +# Conversion flags +# +# This is not a valid Python code because of the additional whitespace between the `!` +# and conversion type. But, our parser isn't strict about this. This should probably be +# removed once we have a strict parser. +x = f"aaaaaaaaa { x ! r }" + +# Even in the case of debug expresions, we only need to preserve the whitespace within +# the expression part of the replacement field. +x = f"aaaaaaaaa { x = ! r }" + +# Combine conversion flags with format specifiers +x = f"{x = ! s + :>0 + + }" +# This is interesting. There can be a comment after the format specifier but only if it's +# on it's own line. Refer to https://github.com/astral-sh/ruff/pull/7787 for more details. +# We'll format is as trailing comments. +x = f"{x !s + :>0 + # comment + }" + +x = f""" +{ # dangling comment 1 + x = :.0{y # dangling comment 2 + }f}""" + +# Here, the debug expression is in a nested f-string so we should start preserving +# whitespaces from that point onwards. This means we should format the outer f-string. +x = f"""{"foo " + # comment 1 + f"{ x = + + }" # comment 2 + } + """ + +# Mix of various features. +f"{ # dangling comment 1 + foo # after foo + :>{ + x # after x + } + # dangling comment 2 + # dangling comment 3 +} woah {x}" +``` + +## Outputs +### Output 1 +``` +indent-style = space +line-width = 88 +indent-width = 4 +quote-style = Double +line-ending = LineFeed +magic-trailing-comma = Respect +docstring-code = Disabled +docstring-code-line-width = "dynamic" +preview = Enabled +target_version = Py38 +source_type = Python +``` + +```python +(f"{one}" f"{two}") + + +rf"Not-so-tricky \"quote" + +# Regression test for fstrings dropping comments +result_f = ( + "Traceback (most recent call last):\n" + f' File "{__file__}", line {lineno_f + 5}, in _check_recursive_traceback_display\n' + " f()\n" + f' File "{__file__}", line {lineno_f + 1}, in f\n' + " f()\n" + f' File "{__file__}", line {lineno_f + 1}, in f\n' + " f()\n" + f' File "{__file__}", line {lineno_f + 1}, in f\n' + " f()\n" + # XXX: The following line changes depending on whether the tests + # are run through the interactive interpreter or with -m + # It also varies depending on the platform (stack size) + # Fortunately, we don't care about exactness here, so we use regex + r" \[Previous line repeated (\d+) more times\]" + "\n" + "RecursionError: maximum recursion depth exceeded\n" +) + + +# Regression for fstring dropping comments that were accidentally attached to +# an expression inside a formatted value +( + f"{1}" + # comment + "" +) + +( + f"{1}" # comment + f"{2}" +) + +( + f"{1}" f"{2}" # comment +) + +( + 1, + ( # comment + f"{2}" + ), +) + +( + ( + f"{1}" + # comment + ), + 2, +) + +# https://github.com/astral-sh/ruff/issues/6841 +x = f"""a{""}b""" +y = f'''c{1}d"""e''' +z = f"""a{""}b""" f'''c{1}d"""e''' + +# F-String formatting test cases (Preview) + +# Simple expression with a mix of debug expression and comments. +x = f"{a}" +x = f"{ + a = }" +x = f"{ # comment + a +}" +x = f"{ # comment + a = }" + +# Remove the parentheses as adding them doesn't make then fit within the line length limit. +# This is similar to how we format it before f-string formatting. +aaaaaaaaaaa = f"asaaaaaaaaaaaaaaaa {aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd} cccccccccc" +# Here, we would use the best fit layout to put the f-string indented on the next line +# similar to the next example. +aaaaaaaaaaa = ( + f"asaaaaaaaaaaaaaaaa {aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc} cccccccccc" +) +aaaaaaaaaaa = ( + f"asaaaaaaaaaaaaaaaa {aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc} cccccccccc" +) + +# This should never add the optional parentheses because even after adding them, the +# f-string exceeds the line length limit. +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa {"bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"} ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" +} ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc" + +# Multiple larger expressions which exceeds the line length limit. Here, we need to decide +# whether to split at the first or second expression. This should work similarly to the +# assignment statement formatting where we split from right to left in preview mode. +x = f"aaaaaaaaaaaa {bbbbbbbbbbbbbb} cccccccccccccccccccc {ddddddddddddddd} eeeeeeeeeeeeee" + +# The above example won't split but when we start introducing line breaks: +x = f"aaaaaaaaaaaa {bbbbbbbbbbbbbb} cccccccccccccccccccc { + ddddddddddddddd +} eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa {bbbbbbbbbbbbbb} cccccccccccccccccccc { + ddddddddddddddd +} eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa {bbbbbbbbbbbbbb} cccccccccccccccccccc { + ddddddddddddddd +} eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa {bbbbbbbbbbbbbb} cccccccccccccccccccc { + ddddddddddddddd +} eeeeeeeeeeeeee" + +# But, in case comments are present, we would split at the expression containing the +# comments: +x = f"aaaaaaaaaaaa { + bbbbbbbbbbbbbb # comment +} cccccccccccccccccccc {ddddddddddddddd} eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa {bbbbbbbbbbbbbb} cccccccccccccccccccc { # comment + ddddddddddddddd +} eeeeeeeeeeeeee" + +# Here, the expression part itself starts with a curly brace so we need to add an extra +# space between the opening curly brace and the expression. +x = f"{ {'x': 1, 'y': 2} }" +# Although the extra space isn't required before the ending curly brace, we add it for +# consistency. +x = f"{ {'x': 1, 'y': 2} }" +x = f"{ {'x': 1, 'y': 2} = }" +x = f"{ # comment + {'x': 1, 'y': 2} +}" +x = f"{ # comment + {'x': 1, 'y': 2} = }" + +# But, in this case, we would split the expression itself because it exceeds the line +# length limit so we need not add the extra space. +xxxxxxx = f"{ + {'aaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbb', 'ccccccccccccccccccccc'} +}" +# And, split the expression itself because it exceeds the line length. +xxxxxxx = f"{ + { + 'aaaaaaaaaaaaaaaaaaaaaaaaa', + 'bbbbbbbbbbbbbbbbbbbbbbbbbbb', + 'cccccccccccccccccccccccccc', + } +}" + +# Triple-quoted strings +# It's ok to use the same quote char for the inner string if it's single-quoted. +f"""test {"inner"}""" +f"""test {"inner"}""" +# But if the inner string is also triple-quoted then we should preserve the existing quotes. +f"""test {'''inner'''}""" + +# Comments + +# No comments should be dropped! +f"{ # comment 1 + # comment 2 + foo # comment 3 + # comment 4 +}" # comment 5 +# comment 6 + +# Conversion flags +# +# This is not a valid Python code because of the additional whitespace between the `!` +# and conversion type. But, our parser isn't strict about this. This should probably be +# removed once we have a strict parser. +x = f"aaaaaaaaa {x!r}" + +# Even in the case of debug expresions, we only need to preserve the whitespace within +# the expression part of the replacement field. +x = f"aaaaaaaaa { x = !r}" + +# Combine conversion flags with format specifiers +x = f"{x = !s:>0}" +# This is interesting. There can be a comment after the format specifier but only if it's +# on it's own line. Refer to https://github.com/astral-sh/ruff/pull/7787 for more details. +# We'll format is as trailing comments. +x = f"{ + x!s:>0 + # comment +}" + +x = f""" +{ # dangling comment 1 + x = :.0{y # dangling comment 2 + }f}""" + +# Here, the debug expression is in a nested f-string so we should start preserving +# whitespaces from that point onwards. This means we should format the outer f-string. +x = f"""{ + "foo " # comment 1 + + f"{ x = + + }" # comment 2 +} + """ + +# Mix of various features. +f"{ # dangling comment 1 + foo:>{ # after foo + x # after x + } + # dangling comment 2 + # dangling comment 3 +} woah {x}" +``` + + +### Output 2 +``` +indent-style = space +line-width = 88 +indent-width = 4 +quote-style = Double +line-ending = LineFeed +magic-trailing-comma = Respect +docstring-code = Disabled +docstring-code-line-width = "dynamic" +preview = Disabled +target_version = Py38 +source_type = Python ``` -## Output ```python (f"{one}" f"{two}") @@ -134,6 +523,361 @@ result_f = ( x = f"""a{""}b""" y = f'''c{1}d"""e''' z = f"""a{""}b""" f'''c{1}d"""e''' + +# F-String formatting test cases (Preview) + +# Simple expression with a mix of debug expression and comments. +x = f"{a}" +x = f"{ + a = }" +x = f"{ # comment + a }" +x = f"{ # comment + a = }" + +# Remove the parentheses as adding them doesn't make then fit within the line length limit. +# This is similar to how we format it before f-string formatting. +aaaaaaaaaaa = f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd } cccccccccc" +# Here, we would use the best fit layout to put the f-string indented on the next line +# similar to the next example. +aaaaaaaaaaa = ( + f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc } cccccccccc" +) +aaaaaaaaaaa = ( + f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc } cccccccccc" +) + +# This should never add the optional parentheses because even after adding them, the +# f-string exceeds the line length limit. +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc" +x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc" + +# Multiple larger expressions which exceeds the line length limit. Here, we need to decide +# whether to split at the first or second expression. This should work similarly to the +# assignment statement formatting where we split from right to left in preview mode. +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" + +# The above example won't split but when we start introducing line breaks: +x = f"aaaaaaaaaaaa { + bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb + } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { + ddddddddddddddd } eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd + } eeeeeeeeeeeeee" + +# But, in case comments are present, we would split at the expression containing the +# comments: +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb # comment + } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" +x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb + } cccccccccccccccccccc { # comment + ddddddddddddddd } eeeeeeeeeeeeee" + +# Here, the expression part itself starts with a curly brace so we need to add an extra +# space between the opening curly brace and the expression. +x = f"{ {'x': 1, 'y': 2} }" +# Although the extra space isn't required before the ending curly brace, we add it for +# consistency. +x = f"{ {'x': 1, 'y': 2}}" +x = f"{ {'x': 1, 'y': 2} = }" +x = f"{ # comment + {'x': 1, 'y': 2} }" +x = f"{ # comment + {'x': 1, 'y': 2} = }" + +# But, in this case, we would split the expression itself because it exceeds the line +# length limit so we need not add the extra space. +xxxxxxx = f"{ + {'aaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbb', 'ccccccccccccccccccccc'} +}" +# And, split the expression itself because it exceeds the line length. +xxxxxxx = f"{ + {'aaaaaaaaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbbbbbbb', 'cccccccccccccccccccccccccc'} +}" + +# Triple-quoted strings +# It's ok to use the same quote char for the inner string if it's single-quoted. +f"""test {'inner'}""" +f"""test {"inner"}""" +# But if the inner string is also triple-quoted then we should preserve the existing quotes. +f"""test {'''inner'''}""" + +# Comments + +# No comments should be dropped! +f"{ # comment 1 + # comment 2 + foo # comment 3 + # comment 4 +}" # comment 5 +# comment 6 + +# Conversion flags +# +# This is not a valid Python code because of the additional whitespace between the `!` +# and conversion type. But, our parser isn't strict about this. This should probably be +# removed once we have a strict parser. +x = f"aaaaaaaaa { x ! r }" + +# Even in the case of debug expresions, we only need to preserve the whitespace within +# the expression part of the replacement field. +x = f"aaaaaaaaa { x = ! r }" + +# Combine conversion flags with format specifiers +x = f"{x = ! s + :>0 + + }" +# This is interesting. There can be a comment after the format specifier but only if it's +# on it's own line. Refer to https://github.com/astral-sh/ruff/pull/7787 for more details. +# We'll format is as trailing comments. +x = f"{x !s + :>0 + # comment + }" + +x = f""" +{ # dangling comment 1 + x = :.0{y # dangling comment 2 + }f}""" + +# Here, the debug expression is in a nested f-string so we should start preserving +# whitespaces from that point onwards. This means we should format the outer f-string. +x = f"""{"foo " + # comment 1 + f"{ x = + + }" # comment 2 + } + """ + +# Mix of various features. +f"{ # dangling comment 1 + foo # after foo + :>{ + x # after x + } + # dangling comment 2 + # dangling comment 3 +} woah {x}" +``` + + +#### Preview changes +```diff +--- Stable ++++ Preview +@@ -6,13 +6,13 @@ + # Regression test for fstrings dropping comments + result_f = ( + "Traceback (most recent call last):\n" +- f' File "{__file__}", line {lineno_f+5}, in _check_recursive_traceback_display\n' ++ f' File "{__file__}", line {lineno_f + 5}, in _check_recursive_traceback_display\n' + " f()\n" +- f' File "{__file__}", line {lineno_f+1}, in f\n' ++ f' File "{__file__}", line {lineno_f + 1}, in f\n' + " f()\n" +- f' File "{__file__}", line {lineno_f+1}, in f\n' ++ f' File "{__file__}", line {lineno_f + 1}, in f\n' + " f()\n" +- f' File "{__file__}", line {lineno_f+1}, in f\n' ++ f' File "{__file__}", line {lineno_f + 1}, in f\n' + " f()\n" + # XXX: The following line changes depending on whether the tests + # are run through the interactive interpreter or with -m +@@ -67,64 +67,72 @@ + x = f"{a}" + x = f"{ + a = }" +-x = f"{ # comment +- a }" ++x = f"{ # comment ++ a ++}" + x = f"{ # comment + a = }" + + # Remove the parentheses as adding them doesn't make then fit within the line length limit. + # This is similar to how we format it before f-string formatting. +-aaaaaaaaaaa = f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd } cccccccccc" ++aaaaaaaaaaa = f"asaaaaaaaaaaaaaaaa {aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc + dddddddd} cccccccccc" + # Here, we would use the best fit layout to put the f-string indented on the next line + # similar to the next example. + aaaaaaaaaaa = ( +- f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc } cccccccccc" ++ f"asaaaaaaaaaaaaaaaa {aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc} cccccccccc" + ) + aaaaaaaaaaa = ( +- f"asaaaaaaaaaaaaaaaa { aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc } cccccccccc" ++ f"asaaaaaaaaaaaaaaaa {aaaaaaaaaaaa + bbbbbbbbbbbb + ccccccccccccccc} cccccccccc" + ) + + # This should never add the optional parentheses because even after adding them, the + # f-string exceeds the line length limit. +-x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc" ++x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa {"bbbbbbbbbbbbbbbbbbbbbbbbbbbbb"} ccccccccccccccc" + x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc" +-x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment +- "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" } ccccccccccccccc" ++x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment ++ "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" ++} ccccccccccccccc" + x = f"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa { # comment + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbb" = } ccccccccccccccc" + + # Multiple larger expressions which exceeds the line length limit. Here, we need to decide + # whether to split at the first or second expression. This should work similarly to the + # assignment statement formatting where we split from right to left in preview mode. +-x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" ++x = f"aaaaaaaaaaaa {bbbbbbbbbbbbbb} cccccccccccccccccccc {ddddddddddddddd} eeeeeeeeeeeeee" + + # The above example won't split but when we start introducing line breaks: +-x = f"aaaaaaaaaaaa { +- bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" +-x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb +- } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" +-x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { +- ddddddddddddddd } eeeeeeeeeeeeee" +-x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb } cccccccccccccccccccc { ddddddddddddddd +- } eeeeeeeeeeeeee" ++x = f"aaaaaaaaaaaa {bbbbbbbbbbbbbb} cccccccccccccccccccc { ++ ddddddddddddddd ++} eeeeeeeeeeeeee" ++x = f"aaaaaaaaaaaa {bbbbbbbbbbbbbb} cccccccccccccccccccc { ++ ddddddddddddddd ++} eeeeeeeeeeeeee" ++x = f"aaaaaaaaaaaa {bbbbbbbbbbbbbb} cccccccccccccccccccc { ++ ddddddddddddddd ++} eeeeeeeeeeeeee" ++x = f"aaaaaaaaaaaa {bbbbbbbbbbbbbb} cccccccccccccccccccc { ++ ddddddddddddddd ++} eeeeeeeeeeeeee" + + # But, in case comments are present, we would split at the expression containing the + # comments: +-x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb # comment +- } cccccccccccccccccccc { ddddddddddddddd } eeeeeeeeeeeeee" +-x = f"aaaaaaaaaaaa { bbbbbbbbbbbbbb +- } cccccccccccccccccccc { # comment +- ddddddddddddddd } eeeeeeeeeeeeee" ++x = f"aaaaaaaaaaaa { ++ bbbbbbbbbbbbbb # comment ++} cccccccccccccccccccc {ddddddddddddddd} eeeeeeeeeeeeee" ++x = f"aaaaaaaaaaaa {bbbbbbbbbbbbbb} cccccccccccccccccccc { # comment ++ ddddddddddddddd ++} eeeeeeeeeeeeee" + + # Here, the expression part itself starts with a curly brace so we need to add an extra + # space between the opening curly brace and the expression. + x = f"{ {'x': 1, 'y': 2} }" + # Although the extra space isn't required before the ending curly brace, we add it for + # consistency. +-x = f"{ {'x': 1, 'y': 2}}" ++x = f"{ {'x': 1, 'y': 2} }" + x = f"{ {'x': 1, 'y': 2} = }" + x = f"{ # comment +- {'x': 1, 'y': 2} }" ++ {'x': 1, 'y': 2} ++}" + x = f"{ # comment + {'x': 1, 'y': 2} = }" + +@@ -135,22 +143,26 @@ + }" + # And, split the expression itself because it exceeds the line length. + xxxxxxx = f"{ +- {'aaaaaaaaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbbbbbbb', 'cccccccccccccccccccccccccc'} ++ { ++ 'aaaaaaaaaaaaaaaaaaaaaaaaa', ++ 'bbbbbbbbbbbbbbbbbbbbbbbbbbb', ++ 'cccccccccccccccccccccccccc', ++ } + }" + + # Triple-quoted strings + # It's ok to use the same quote char for the inner string if it's single-quoted. +-f"""test {'inner'}""" + f"""test {"inner"}""" ++f"""test {"inner"}""" + # But if the inner string is also triple-quoted then we should preserve the existing quotes. + f"""test {'''inner'''}""" + + # Comments + + # No comments should be dropped! +-f"{ # comment 1 ++f"{ # comment 1 + # comment 2 +- foo # comment 3 ++ foo # comment 3 + # comment 4 + }" # comment 5 + # comment 6 +@@ -160,24 +172,21 @@ + # This is not a valid Python code because of the additional whitespace between the `!` + # and conversion type. But, our parser isn't strict about this. This should probably be + # removed once we have a strict parser. +-x = f"aaaaaaaaa { x ! r }" ++x = f"aaaaaaaaa {x!r}" + + # Even in the case of debug expresions, we only need to preserve the whitespace within + # the expression part of the replacement field. +-x = f"aaaaaaaaa { x = ! r }" ++x = f"aaaaaaaaa { x = !r}" + + # Combine conversion flags with format specifiers +-x = f"{x = ! s +- :>0 +- +- }" ++x = f"{x = !s:>0}" + # This is interesting. There can be a comment after the format specifier but only if it's + # on it's own line. Refer to https://github.com/astral-sh/ruff/pull/7787 for more details. + # We'll format is as trailing comments. +-x = f"{x !s +- :>0 +- # comment +- }" ++x = f"{ ++ x!s:>0 ++ # comment ++}" + + x = f""" + { # dangling comment 1 +@@ -186,19 +195,19 @@ + + # Here, the debug expression is in a nested f-string so we should start preserving + # whitespaces from that point onwards. This means we should format the outer f-string. +-x = f"""{"foo " + # comment 1 +- f"{ x = ++x = f"""{ ++ "foo " # comment 1 ++ + f"{ x = + +- }" # comment 2 +- } ++ }" # comment 2 ++} + """ + + # Mix of various features. + f"{ # dangling comment 1 +- foo # after foo +- :>{ +- x # after x +- } ++ foo:>{ # after foo ++ x # after x ++ } + # dangling comment 2 + # dangling comment 3 + } woah {x}" ```