diff --git a/crates/ruff_formatter/src/builders.rs b/crates/ruff_formatter/src/builders.rs index 658af7408ed38..235bed9021a98 100644 --- a/crates/ruff_formatter/src/builders.rs +++ b/crates/ruff_formatter/src/builders.rs @@ -308,11 +308,8 @@ impl std::fmt::Debug for Token { /// assert_eq!(printed.as_code(), r#""Hello 'Ruff'""#); /// assert_eq!(printed.sourcemap(), [ /// SourceMarker { source: TextSize::new(0), dest: TextSize::new(0) }, -/// SourceMarker { source: TextSize::new(0), dest: TextSize::new(7) }, /// SourceMarker { source: TextSize::new(8), dest: TextSize::new(7) }, -/// SourceMarker { source: TextSize::new(8), dest: TextSize::new(13) }, /// SourceMarker { source: TextSize::new(14), dest: TextSize::new(13) }, -/// SourceMarker { source: TextSize::new(14), dest: TextSize::new(14) }, /// SourceMarker { source: TextSize::new(20), dest: TextSize::new(14) }, /// ]); /// @@ -340,18 +337,18 @@ impl Format for SourcePosition { } } -/// Creates a text from a dynamic string with its optional start-position in the source document. +/// Creates a text from a dynamic string. +/// /// This is done by allocating a new string internally. -pub fn text(text: &str, position: Option) -> Text { +pub fn text(text: &str) -> Text { debug_assert_no_newlines(text); - Text { text, position } + Text { text } } #[derive(Eq, PartialEq)] pub struct Text<'a> { text: &'a str, - position: Option, } impl Format for Text<'_> @@ -359,10 +356,6 @@ where Context: FormatContext, { fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { - if let Some(position) = self.position { - source_position(position).fmt(f)?; - } - f.write_element(FormatElement::Text { text: self.text.to_string().into_boxed_str(), text_width: TextWidth::from_text(self.text, f.options().indent_width()), @@ -2292,7 +2285,7 @@ impl std::fmt::Debug for FormatWith { /// let mut join = f.join_with(&separator); /// /// for item in &self.items { -/// join.entry(&format_with(|f| write!(f, [text(item, None)]))); +/// join.entry(&format_with(|f| write!(f, [text(item)]))); /// } /// join.finish() /// })), @@ -2377,7 +2370,7 @@ where /// let mut count = 0; /// /// let value = format_once(|f| { -/// write!(f, [text(&std::format!("Formatted {count}."), None)]) +/// write!(f, [text(&std::format!("Formatted {count}."))]) /// }); /// /// format!(SimpleFormatContext::default(), [value]).expect("Formatting once works fine"); diff --git a/crates/ruff_formatter/src/format_element/document.rs b/crates/ruff_formatter/src/format_element/document.rs index d5e71fd495ce3..18ce0ef106e6d 100644 --- a/crates/ruff_formatter/src/format_element/document.rs +++ b/crates/ruff_formatter/src/format_element/document.rs @@ -346,10 +346,7 @@ impl Format> for &[FormatElement] { } FormatElement::SourcePosition(position) => { - write!( - f, - [text(&std::format!("source_position({position:?})"), None)] - )?; + write!(f, [text(&std::format!("source_position({position:?})"))])?; } FormatElement::LineSuffixBoundary => { @@ -360,7 +357,7 @@ impl Format> for &[FormatElement] { write!(f, [token("best_fitting(")])?; if *mode != BestFittingMode::FirstLine { - write!(f, [text(&std::format!("mode: {mode:?}, "), None)])?; + write!(f, [text(&std::format!("mode: {mode:?}, "))])?; } write!(f, [token("[")])?; @@ -392,17 +389,14 @@ impl Format> for &[FormatElement] { write!( f, [ - text(&std::format!(""), None), + text(&std::format!("")), space(), &&**interned, ] )?; } Some(reference) => { - write!( - f, - [text(&std::format!(""), None)] - )?; + write!(f, [text(&std::format!(""))])?; } } } @@ -421,7 +415,7 @@ impl Format> for &[FormatElement] { f, [ token(">"), ] )?; @@ -436,9 +430,9 @@ impl Format> for &[FormatElement] { token(")"), soft_line_break_or_space(), token("ERROR>") ] )?; @@ -470,7 +464,7 @@ impl Format> for &[FormatElement] { f, [ token("align("), - text(&count.to_string(), None), + text(&count.to_string()), token(","), space(), ] @@ -482,7 +476,7 @@ impl Format> for &[FormatElement] { f, [ token("line_suffix("), - text(&std::format!("{reserved_width:?}"), None), + text(&std::format!("{reserved_width:?}")), token(","), space(), ] @@ -499,11 +493,7 @@ impl Format> for &[FormatElement] { if let Some(group_id) = group.id() { write!( f, - [ - text(&std::format!("\"{group_id:?}\""), None), - token(","), - space(), - ] + [text(&std::format!("\"{group_id:?}\"")), token(","), space(),] )?; } @@ -524,11 +514,7 @@ impl Format> for &[FormatElement] { if let Some(group_id) = id { write!( f, - [ - text(&std::format!("\"{group_id:?}\""), None), - token(","), - space(), - ] + [text(&std::format!("\"{group_id:?}\"")), token(","), space(),] )?; } } @@ -561,7 +547,7 @@ impl Format> for &[FormatElement] { f, [ token("indent_if_group_breaks("), - text(&std::format!("\"{id:?}\""), None), + text(&std::format!("\"{id:?}\"")), token(","), space(), ] @@ -581,11 +567,7 @@ impl Format> for &[FormatElement] { if let Some(group_id) = condition.group_id { write!( f, - [ - text(&std::format!("\"{group_id:?}\""), None), - token(","), - space(), - ] + [text(&std::format!("\"{group_id:?}\"")), token(","), space()] )?; } } @@ -595,7 +577,7 @@ impl Format> for &[FormatElement] { f, [ token("label("), - text(&std::format!("\"{label_id:?}\""), None), + text(&std::format!("\"{label_id:?}\"")), token(","), space(), ] @@ -664,7 +646,7 @@ impl Format> for &[FormatElement] { ContentArrayEnd, token(")"), soft_line_break_or_space(), - text(&std::format!(">"), None), + text(&std::format!(">")), ] )?; } @@ -807,7 +789,7 @@ impl Format> for Condition { f, [ token("if_group_fits_on_line("), - text(&std::format!("\"{id:?}\""), None), + text(&std::format!("\"{id:?}\"")), token(")") ] ), @@ -816,7 +798,7 @@ impl Format> for Condition { f, [ token("if_group_breaks("), - text(&std::format!("\"{id:?}\""), None), + text(&std::format!("\"{id:?}\"")), token(")") ] ), diff --git a/crates/ruff_formatter/src/format_extensions.rs b/crates/ruff_formatter/src/format_extensions.rs index e363757743252..33ca7b58e1daa 100644 --- a/crates/ruff_formatter/src/format_extensions.rs +++ b/crates/ruff_formatter/src/format_extensions.rs @@ -32,7 +32,7 @@ pub trait MemoizeFormat { /// let value = self.value.get(); /// self.value.set(value + 1); /// - /// write!(f, [text(&std::format!("Formatted {value} times."), None)]) + /// write!(f, [text(&std::format!("Formatted {value} times."))]) /// } /// } /// @@ -110,7 +110,7 @@ where /// write!(f, [ /// token("Count:"), /// space(), - /// text(&std::format!("{current}"), None), + /// text(&std::format!("{current}")), /// hard_line_break() /// ])?; /// diff --git a/crates/ruff_formatter/src/lib.rs b/crates/ruff_formatter/src/lib.rs index 927a598c4ee9e..a78cfab1ebc0a 100644 --- a/crates/ruff_formatter/src/lib.rs +++ b/crates/ruff_formatter/src/lib.rs @@ -41,7 +41,7 @@ use std::marker::PhantomData; use std::num::{NonZeroU16, NonZeroU8, TryFromIntError}; use crate::format_element::document::Document; -use crate::printer::{Printer, PrinterOptions, SourceMapGeneration}; +use crate::printer::{Printer, PrinterOptions}; pub use arguments::{Argument, Arguments}; pub use buffer::{ Buffer, BufferExtensions, BufferSnapshot, Inspect, RemoveSoftLinesBuffer, VecBuffer, @@ -269,7 +269,6 @@ impl FormatOptions for SimpleFormatOptions { line_width: self.line_width, indent_style: self.indent_style, indent_width: self.indent_width, - source_map_generation: SourceMapGeneration::Enabled, ..PrinterOptions::default() } } @@ -433,28 +432,40 @@ impl Printed { std::mem::take(&mut self.verbatim_ranges) } - /// Slices the formatted code to the sub-slices that covers the passed `source_range`. + /// Slices the formatted code to the sub-slices that covers the passed `source_range` in `source`. /// /// The implementation uses the source map generated during formatting to find the closest range /// in the formatted document that covers `source_range` or more. The returned slice /// matches the `source_range` exactly (except indent, see below) if the formatter emits [`FormatElement::SourcePosition`] for /// the range's offsets. /// + /// ## Indentation + /// The indentation before `source_range.start` is replaced with the indentation returned by the formatter + /// to fix up incorrectly intended code. + /// /// Returns the entire document if the source map is empty. + /// + /// # Panics + /// If `source_range` points to offsets that are not in the bounds of `source`. #[must_use] - pub fn slice_range(self, source_range: TextRange) -> PrintedRange { + pub fn slice_range(self, source_range: TextRange, source: &str) -> PrintedRange { let mut start_marker: Option = None; let mut end_marker: Option = None; // Note: The printer can generate multiple source map entries for the same source position. // For example if you have: + // * token("a + b") + // * `source_position(276)` + // * `token(")")` // * `source_position(276)` - // * `token("def")` - // * `token("foo")` - // * `source_position(284)` - // The printer uses the source position 276 for both the tokens `def` and `foo` because that's the only position it knows of. + // * `hard_line_break` + // The printer uses the source position 276 for both the tokens `)` and the `\n` because + // there were multiple `source_position` entries in the IR with the same offset. + // This can happen if multiple nodes start or end at the same position. A common example + // for this are expressions and expression statement that always end at the same offset. // - // Warning: Source markers are often emitted sorted by their source position but it's not guaranteed. + // Warning: Source markers are often emitted sorted by their source position but it's not guaranteed + // and depends on the emitted `IR`. // They are only guaranteed to be sorted in increasing order by their destination position. for marker in self.sourcemap { // Take the closest start marker, but skip over start_markers that have the same start. @@ -471,17 +482,44 @@ impl Printed { } } - let start = start_marker.map(|marker| marker.dest).unwrap_or_default(); - let end = end_marker.map_or_else(|| self.code.text_len(), |marker| marker.dest); - let code_range = TextRange::new(start, end); + let (source_start, formatted_start) = start_marker + .map(|marker| (marker.source, marker.dest)) + .unwrap_or_default(); + + let (source_end, formatted_end) = end_marker + .map_or((source.text_len(), self.code.text_len()), |marker| { + (marker.source, marker.dest) + }); + + let source_range = TextRange::new(source_start, source_end); + let formatted_range = TextRange::new(formatted_start, formatted_end); + + // Extend both ranges to include the indentation + let source_range = extend_range_to_include_indent(source_range, source); + let formatted_range = extend_range_to_include_indent(formatted_range, &self.code); PrintedRange { - code: self.code[code_range].to_string(), + code: self.code[formatted_range].to_string(), source_range, } } } +/// Extends `range` backwards (by reducing `range.start`) to include any directly preceding whitespace (`\t` or ` `). +/// +/// # Panics +/// If `range.start` is out of `source`'s bounds. +fn extend_range_to_include_indent(range: TextRange, source: &str) -> TextRange { + let whitespace_len: TextSize = source[..usize::from(range.start())] + .chars() + .rev() + .take_while(|c| matches!(c, ' ' | '\t')) + .map(TextLen::text_len) + .sum(); + + TextRange::new(range.start() - whitespace_len, range.end()) +} + #[derive(Debug, Clone, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] @@ -537,7 +575,7 @@ pub type FormatResult = Result; /// impl Format for Paragraph { /// fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { /// write!(f, [ -/// text(&self.0, None), +/// text(&self.0), /// hard_line_break(), /// ]) /// } diff --git a/crates/ruff_formatter/src/printer/mod.rs b/crates/ruff_formatter/src/printer/mod.rs index 5725569e88233..8f4edd4e1ccb8 100644 --- a/crates/ruff_formatter/src/printer/mod.rs +++ b/crates/ruff_formatter/src/printer/mod.rs @@ -4,7 +4,7 @@ use drop_bomb::DebugDropBomb; use unicode_width::UnicodeWidthChar; pub use printer_options::*; -use ruff_text_size::{Ranged, TextLen, TextSize}; +use ruff_text_size::{TextLen, TextSize}; use crate::format_element::document::Document; use crate::format_element::tag::{Condition, GroupMode}; @@ -76,6 +76,9 @@ impl<'a> Printer<'a> { } } + // Push any pending marker + self.push_marker(); + Ok(Printed::new( self.state.buffer, None, @@ -97,42 +100,38 @@ impl<'a> Printer<'a> { let args = stack.top(); match element { - FormatElement::Space => self.print_text(Text::Token(" "), None), - FormatElement::Token { text } => self.print_text(Text::Token(text), None), - FormatElement::Text { text, text_width } => self.print_text( - Text::Text { - text, - text_width: *text_width, - }, - None, - ), + FormatElement::Space => self.print_text(Text::Token(" ")), + FormatElement::Token { text } => self.print_text(Text::Token(text)), + FormatElement::Text { text, text_width } => self.print_text(Text::Text { + text, + text_width: *text_width, + }), FormatElement::SourceCodeSlice { slice, text_width } => { let text = slice.text(self.source_code); - self.print_text( - Text::Text { - text, - text_width: *text_width, - }, - Some(slice.range()), - ); + self.print_text(Text::Text { + text, + text_width: *text_width, + }); } FormatElement::Line(line_mode) => { if args.mode().is_flat() && matches!(line_mode, LineMode::Soft | LineMode::SoftOrSpace) { if line_mode == &LineMode::SoftOrSpace { - self.print_text(Text::Token(" "), None); + self.print_text(Text::Token(" ")); } } else if self.state.line_suffixes.has_pending() { self.flush_line_suffixes(queue, stack, Some(element)); } else { // Only print a newline if the current line isn't already empty if self.state.line_width > 0 { + self.push_marker(); self.print_char('\n'); } // Print a second line break if this is an empty line if line_mode == &LineMode::Empty { + self.push_marker(); self.print_char('\n'); } @@ -145,14 +144,11 @@ impl<'a> Printer<'a> { } FormatElement::SourcePosition(position) => { - self.state.source_position = *position; // The printer defers printing indents until the next text // is printed. Pushing the marker now would mean that the // mapped range includes the indent range, which we don't want. - // Only add a marker if we're not in an indented context, e.g. at the end of the file. - if self.state.pending_indent.is_empty() { - self.push_marker(); - } + // Queue the source map position and emit it when printing the next character + self.state.pending_source_position = Some(*position); } FormatElement::LineSuffixBoundary => { @@ -444,7 +440,7 @@ impl<'a> Printer<'a> { Ok(print_mode) } - fn print_text(&mut self, text: Text, source_range: Option) { + fn print_text(&mut self, text: Text) { if !self.state.pending_indent.is_empty() { let (indent_char, repeat_count) = match self.options.indent_style() { IndentStyle::Tab => ('\t', 1), @@ -467,19 +463,6 @@ impl<'a> Printer<'a> { } } - // Insert source map markers before and after the token - // - // If the token has source position information the start marker - // will use the start position of the original token, and the end - // marker will use that position + the text length of the token - // - // If the token has no source position (was created by the formatter) - // both the start and end marker will use the last known position - // in the input source (from state.source_position) - if let Some(range) = source_range { - self.state.source_position = range.start(); - } - self.push_marker(); match text { @@ -502,21 +485,15 @@ impl<'a> Printer<'a> { } } } - - if let Some(range) = source_range { - self.state.source_position = range.end(); - } - - self.push_marker(); } fn push_marker(&mut self) { - if self.options.source_map_generation.is_disabled() { + let Some(source_position) = self.state.pending_source_position.take() else { return; - } + }; let marker = SourceMarker { - source: self.state.source_position, + source: source_position, dest: self.state.buffer.text_len(), }; @@ -897,7 +874,7 @@ enum FillPairLayout { struct PrinterState<'a> { buffer: String, source_markers: Vec, - source_position: TextSize, + pending_source_position: Option, pending_indent: Indention, measured_group_fits: bool, line_width: u32, @@ -1752,7 +1729,7 @@ a", let result = format_with_options( &format_args![ token("function main() {"), - block_indent(&text("let x = `This is a multiline\nstring`;", None)), + block_indent(&text("let x = `This is a multiline\nstring`;")), token("}"), hard_line_break() ], @@ -1769,7 +1746,7 @@ a", fn it_breaks_a_group_if_a_string_contains_a_newline() { let result = format(&FormatArrayElements { items: vec![ - &text("`This is a string spanning\ntwo lines`", None), + &text("`This is a string spanning\ntwo lines`"), &token("\"b\""), ], }); diff --git a/crates/ruff_formatter/src/printer/printer_options/mod.rs b/crates/ruff_formatter/src/printer/printer_options/mod.rs index efbe850cbf22e..a8895690f6387 100644 --- a/crates/ruff_formatter/src/printer/printer_options/mod.rs +++ b/crates/ruff_formatter/src/printer/printer_options/mod.rs @@ -14,10 +14,6 @@ pub struct PrinterOptions { /// The type of line ending to apply to the printed input pub line_ending: LineEnding, - - /// Whether the printer should build a source map that allows mapping positions in the source document - /// to positions in the formatted document. - pub source_map_generation: SourceMapGeneration, } impl<'a, O> From<&'a O> for PrinterOptions diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/range_formatting/end_of_file.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/range_formatting/end_of_file.py new file mode 100644 index 0000000000000..7ddef8019f5cd --- /dev/null +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/range_formatting/end_of_file.py @@ -0,0 +1 @@ +a + b \ No newline at end of file diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/range_formatting/parentheses.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/range_formatting/parentheses.py new file mode 100644 index 0000000000000..588dbe069fa77 --- /dev/null +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/range_formatting/parentheses.py @@ -0,0 +1,13 @@ +def needs_parentheses( ) -> bool: + return item.sizing_mode is None and item.width_policy == "auto" and item.height_policy == "automatic" + +def no_longer_needs_parentheses( ) -> bool: + return ( + item.width_policy == "auto" + and item.height_policy == "automatic" + ) + + +def format_range_after_inserted_parens (): + a and item.sizing_mode is None and item.width_policy == "auto" and item.height_policy == "automatic" + print("Format this" ) diff --git a/crates/ruff_python_formatter/resources/test/fixtures/ruff/range_formatting/regressions.py b/crates/ruff_python_formatter/resources/test/fixtures/ruff/range_formatting/regressions.py index 96f21de80dc09..3da2215eac359 100644 --- a/crates/ruff_python_formatter/resources/test/fixtures/ruff/range_formatting/regressions.py +++ b/crates/ruff_python_formatter/resources/test/fixtures/ruff/range_formatting/regressions.py @@ -67,3 +67,19 @@ def convert_str(value: str) -> str: # Trailing comment def test (): pass + + +def test_comment_indent(): +# A misaligned comment + print("test") + + +# This demonstrates the use case where a user inserts a new function or class after an existing function. +# In this case, we should avoid formatting the node that directly precedes the new function/class. +# However, the problem is that the preceding node **must** be formatted to determine the whitespace between the two statements. +def test_start (): + print("Ideally this gets not reformatted" ) + + +def new_function_inserted_after_test_start (): + print("This should get formatted" ) diff --git a/crates/ruff_python_formatter/src/comments/format.rs b/crates/ruff_python_formatter/src/comments/format.rs index 97e8496bb69af..13f1cdf2880f1 100644 --- a/crates/ruff_python_formatter/src/comments/format.rs +++ b/crates/ruff_python_formatter/src/comments/format.rs @@ -430,31 +430,25 @@ pub(crate) struct FormatNormalizedComment<'a> { impl Format> for FormatNormalizedComment<'_> { fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + let write_sourcemap = f.options().source_map_generation().is_enabled(); + + write_sourcemap + .then_some(source_position(self.range.start())) + .fmt(f)?; + match self.comment { Cow::Borrowed(borrowed) => { source_text_slice(TextRange::at(self.range.start(), borrowed.text_len())).fmt(f)?; - - // Write the end position if the borrowed comment is shorter than the original comment - // So that we still can map back the end of a comment to the formatted code. - if f.options().source_map_generation().is_enabled() - && self.range.len() != borrowed.text_len() - { - source_position(self.range.end()).fmt(f)?; - } - - Ok(()) } Cow::Owned(ref owned) => { - write!( - f, - [ - text(owned, Some(self.range.start())), - source_position(self.range.end()) - ] - ) + text(owned).fmt(f)?; } } + + write_sourcemap + .then_some(source_position(self.range.end())) + .fmt(f) } } diff --git a/crates/ruff_python_formatter/src/expression/expr_number_literal.rs b/crates/ruff_python_formatter/src/expression/expr_number_literal.rs index 014f2228e6b85..ab46ab3f0beaf 100644 --- a/crates/ruff_python_formatter/src/expression/expr_number_literal.rs +++ b/crates/ruff_python_formatter/src/expression/expr_number_literal.rs @@ -20,7 +20,7 @@ impl FormatNodeRule for FormatExprNumberLiteral { match normalized { Cow::Borrowed(_) => source_text_slice(range).fmt(f), - Cow::Owned(normalized) => text(&normalized, Some(range.start())).fmt(f), + Cow::Owned(normalized) => text(&normalized).fmt(f), } } Number::Float(_) => { @@ -30,7 +30,7 @@ impl FormatNodeRule for FormatExprNumberLiteral { match normalized { Cow::Borrowed(_) => source_text_slice(range).fmt(f), - Cow::Owned(normalized) => text(&normalized, Some(range.start())).fmt(f), + Cow::Owned(normalized) => text(&normalized).fmt(f), } } Number::Complex { .. } => { @@ -43,7 +43,7 @@ impl FormatNodeRule for FormatExprNumberLiteral { source_text_slice(range.sub_end(TextSize::from(1))).fmt(f)?; } Cow::Owned(normalized) => { - text(&normalized, Some(range.start())).fmt(f)?; + text(&normalized).fmt(f)?; } } diff --git a/crates/ruff_python_formatter/src/expression/mod.rs b/crates/ruff_python_formatter/src/expression/mod.rs index 4deab2ee015cb..ab0deee155f67 100644 --- a/crates/ruff_python_formatter/src/expression/mod.rs +++ b/crates/ruff_python_formatter/src/expression/mod.rs @@ -304,30 +304,25 @@ fn format_with_parentheses_comments( // Custom FormatNodeRule::fmt variant that only formats the inner comments let format_node_rule_fmt = format_with(|f| { // No need to handle suppression comments, those are statement only - leading_comments(leading_inner).fmt(f)?; - - let is_source_map_enabled = f.options().source_map_generation().is_enabled(); - - if is_source_map_enabled { - source_position(expression.start()).fmt(f)?; - } - - fmt_fields.fmt(f)?; - - if is_source_map_enabled { - source_position(expression.end()).fmt(f)?; - } - - trailing_comments(trailing_inner).fmt(f) + write!( + f, + [ + leading_comments(leading_inner), + fmt_fields, + trailing_comments(trailing_inner) + ] + ) }); // The actual parenthesized formatting - parenthesized("(", &format_node_rule_fmt, ")") - .with_dangling_comments(parentheses_comment) - .fmt(f)?; - trailing_comments(trailing_outer).fmt(f)?; - - Ok(()) + write!( + f, + [ + parenthesized("(", &format_node_rule_fmt, ")") + .with_dangling_comments(parentheses_comment), + trailing_comments(trailing_outer) + ] + ) } /// Wraps an expression in an optional parentheses except if its [`NeedsParentheses::needs_parentheses`] implementation diff --git a/crates/ruff_python_formatter/src/lib.rs b/crates/ruff_python_formatter/src/lib.rs index 7e1b4c3024f43..69daf521090f9 100644 --- a/crates/ruff_python_formatter/src/lib.rs +++ b/crates/ruff_python_formatter/src/lib.rs @@ -3,7 +3,7 @@ use tracing::Level; pub use range::format_range; use ruff_formatter::prelude::*; -use ruff_formatter::{format, FormatError, Formatted, PrintError, Printed, SourceCode}; +use ruff_formatter::{format, write, FormatError, Formatted, PrintError, Printed, SourceCode}; use ruff_python_ast::AstNode; use ruff_python_ast::Mod; use ruff_python_index::tokens_and_ranges; @@ -19,6 +19,7 @@ pub use crate::options::{ DocstringCode, DocstringCodeLineWidth, MagicTrailingComma, PreviewMode, PyFormatOptions, PythonVersion, QuoteStyle, }; +use crate::range::is_logical_line; pub use crate::shared_traits::{AsFormat, FormattedIter, FormattedIterExt, IntoFormat}; use crate::verbatim::suppressed_node; @@ -59,19 +60,27 @@ where } else { leading_comments(node_comments.leading).fmt(f)?; - let is_source_map_enabled = f.options().source_map_generation().is_enabled(); + let node_ref = node.as_any_node_ref(); - if is_source_map_enabled { - source_position(node.start()).fmt(f)?; - } + // Emit source map information for nodes that are valid "narrowing" targets + // in range formatting. Never emit source map information if they're disabled + // for performance reasons. + let emit_source_position = (is_logical_line(node_ref) || node_ref.is_mod_module()) + && f.options().source_map_generation().is_enabled(); - self.fmt_fields(node, f)?; + emit_source_position + .then_some(source_position(node.start())) + .fmt(f)?; - if is_source_map_enabled { - source_position(node.end()).fmt(f)?; - } + self.fmt_fields(node, f)?; - trailing_comments(node_comments.trailing).fmt(f) + write!( + f, + [ + emit_source_position.then_some(source_position(node.end())), + trailing_comments(node_comments.trailing) + ] + ) } } @@ -249,15 +258,36 @@ def main() -> None: #[ignore] #[test] fn range_formatting_quick_test() { - let source = r#"def test2( a): print("body" ) - "#; + let source = r#"def convert_str(value: str) -> str: # Trailing comment + """Return a string as-is.""" + + + + return value # Trailing comment +"#; + + let mut source = source.to_string(); + + let start = TextSize::try_from( + source + .find("") + .expect("Start marker not found"), + ) + .unwrap(); + + source.replace_range( + start.to_usize()..start.to_usize() + "".len(), + "", + ); + + let end = + TextSize::try_from(source.find("").expect("End marker not found")).unwrap(); - let start = TextSize::new(20); - let end = TextSize::new(35); + source.replace_range(end.to_usize()..end.to_usize() + "".len(), ""); let source_type = PySourceType::Python; let options = PyFormatOptions::from_source_type(source_type); - let printed = format_range(source, TextRange::new(start, end), options).unwrap(); + let printed = format_range(&source, TextRange::new(start, end), options).unwrap(); let mut formatted = source.to_string(); formatted.replace_range( @@ -267,9 +297,11 @@ def main() -> None: assert_eq!( formatted, - r#"def test2(a): - print("body") - "# + r#"print ( "format me" ) +print("format me") +print("format me") +print ( "format me" ) +print ( "format me" )"# ); } @@ -300,7 +332,7 @@ def main() -> None: while let Some(word) = words.next() { let is_last = words.peek().is_none(); let format_word = format_with(|f| { - write!(f, [text(word, None)])?; + write!(f, [text(word)])?; if is_last { write!(f, [token("\"")])?; diff --git a/crates/ruff_python_formatter/src/options.rs b/crates/ruff_python_formatter/src/options.rs index ecabc30d63bad..46f84d332d25e 100644 --- a/crates/ruff_python_formatter/src/options.rs +++ b/crates/ruff_python_formatter/src/options.rs @@ -230,7 +230,6 @@ impl FormatOptions for PyFormatOptions { line_width: self.line_width, line_ending: self.line_ending, indent_style: self.indent_style, - source_map_generation: self.source_map_generation, } } } diff --git a/crates/ruff_python_formatter/src/other/identifier.rs b/crates/ruff_python_formatter/src/other/identifier.rs index cc62f25947fff..e32312efea1b3 100644 --- a/crates/ruff_python_formatter/src/other/identifier.rs +++ b/crates/ruff_python_formatter/src/other/identifier.rs @@ -74,7 +74,7 @@ impl Format> for DotDelimitedIdentifier<'_> { .chars() .filter(|c| !is_python_whitespace(*c) && !matches!(c, '\n' | '\r' | '\\')) .collect(); - text(&no_whitespace, Some(self.0.start())).fmt(f) + text(&no_whitespace).fmt(f) } else { source_text_slice(self.0.range()).fmt(f) } diff --git a/crates/ruff_python_formatter/src/range.rs b/crates/ruff_python_formatter/src/range.rs index c969c3d2c729b..88e66fe19eec6 100644 --- a/crates/ruff_python_formatter/src/range.rs +++ b/crates/ruff_python_formatter/src/range.rs @@ -118,7 +118,7 @@ pub fn format_range( )?; let printed = formatted.print_with_indent(base_indent)?; - Ok(printed.slice_range(narrowed_range)) + Ok(printed.slice_range(narrowed_range, source)) } /// Finds the node with the minimum covering range of `range`. @@ -420,8 +420,13 @@ impl PreorderVisitor<'_> for NarrowRange<'_> { // Find the end offset of the closest node to the end offset of the formatting range. // We do this by iterating over end positions that we know generate source map entries end pick the end // that ends closest or after the searched range's end. - let trailing_comments = self.context.comments().trailing(node); - self.narrow(trailing_comments); + self.narrow( + self.context + .comments() + .trailing(node) + .iter() + .filter(|comment| comment.line_position().is_own_line()), + ); } fn visit_body(&mut self, body: &'_ [Stmt]) { @@ -552,7 +557,7 @@ impl NarrowRange<'_> { } } -const fn is_logical_line(node: AnyNodeRef) -> bool { +pub(crate) const fn is_logical_line(node: AnyNodeRef) -> bool { // Make sure to update [`FormatEnclosingLine`] when changing this. node.is_statement() || node.is_decorator() diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs index 54e28a618e0e3..59a0030eb46b8 100644 --- a/crates/ruff_python_formatter/src/statement/suite.rs +++ b/crates/ruff_python_formatter/src/statement/suite.rs @@ -772,9 +772,17 @@ impl Format> for DocstringStmt<'_> { f, [ leading_comments(node_comments.leading), + f.options() + .source_map_generation() + .is_enabled() + .then_some(source_position(self.docstring.start())), string_literal .format() .with_options(ExprStringLiteralKind::Docstring), + f.options() + .source_map_generation() + .is_enabled() + .then_some(source_position(self.docstring.end())), ] )?; diff --git a/crates/ruff_python_formatter/src/string/docstring.rs b/crates/ruff_python_formatter/src/string/docstring.rs index c6669c776818d..1086889605f16 100644 --- a/crates/ruff_python_formatter/src/string/docstring.rs +++ b/crates/ruff_python_formatter/src/string/docstring.rs @@ -4,6 +4,7 @@ use std::{borrow::Cow, collections::VecDeque}; +use ruff_formatter::printer::SourceMapGeneration; use ruff_python_parser::ParseError; use {once_cell::sync::Lazy, regex::Regex}; use { @@ -116,14 +117,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form let mut lines = docstring.lines().peekable(); // Start the string - write!( - f, - [ - normalized.prefix, - normalized.quotes, - source_position(normalized.start()), - ] - )?; + write!(f, [normalized.prefix, normalized.quotes])?; // We track where in the source docstring we are (in source code byte offsets) let mut offset = normalized.start(); @@ -152,7 +146,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form if already_normalized { source_text_slice(trimmed_line_range).fmt(f)?; } else { - text(trim_both, Some(trimmed_line_range.start())).fmt(f)?; + text(trim_both).fmt(f)?; } } offset += first.text_len(); @@ -205,7 +199,7 @@ pub(crate) fn format(normalized: &NormalizedString, f: &mut PyFormatter) -> Form space().fmt(f)?; } - write!(f, [source_position(normalized.end()), normalized.quotes]) + write!(f, [normalized.quotes]) } fn contains_unescaped_newline(haystack: &str) -> bool { @@ -404,7 +398,7 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { // prepend the in-docstring indentation to the string. let indent_len = indentation_length(trim_end) - self.stripped_indentation_length; let in_docstring_indent = " ".repeat(usize::from(indent_len)) + trim_end.trim_start(); - text(&in_docstring_indent, Some(line.offset)).fmt(self.f)?; + text(&in_docstring_indent).fmt(self.f)?; } else { // Take the string with the trailing whitespace removed, then also // skip the leading whitespace. @@ -414,11 +408,7 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { source_text_slice(trimmed_line_range).fmt(self.f)?; } else { // All indents are ascii spaces, so the slicing is correct. - text( - &trim_end[usize::from(self.stripped_indentation_length)..], - Some(trimmed_line_range.start()), - ) - .fmt(self.f)?; + text(&trim_end[usize::from(self.stripped_indentation_length)..]).fmt(self.f)?; } } @@ -495,7 +485,8 @@ impl<'ast, 'buf, 'fmt, 'src> DocstringLinePrinter<'ast, 'buf, 'fmt, 'src> { // tabs will get erased anyway, we just clobber them here // instead of later, and as a result, get more consistent // results. - .with_indent_style(IndentStyle::Space); + .with_indent_style(IndentStyle::Space) + .with_source_map_generation(SourceMapGeneration::Disabled); let printed = match docstring_format_source(options, self.quote_char, &codeblob) { Ok(printed) => printed, Err(FormatModuleError::FormatError(err)) => return Err(err), diff --git a/crates/ruff_python_formatter/src/string/mod.rs b/crates/ruff_python_formatter/src/string/mod.rs index 70b2288575ecd..40385d55dfdca 100644 --- a/crates/ruff_python_formatter/src/string/mod.rs +++ b/crates/ruff_python_formatter/src/string/mod.rs @@ -417,7 +417,7 @@ impl Format> for NormalizedString<'_> { source_text_slice(self.range()).fmt(f)?; } Cow::Owned(normalized) => { - text(normalized, Some(self.start())).fmt(f)?; + text(normalized).fmt(f)?; } } self.quotes.fmt(f) diff --git a/crates/ruff_python_formatter/src/verbatim.rs b/crates/ruff_python_formatter/src/verbatim.rs index 2eb4967f57423..101f6b05f6e7b 100644 --- a/crates/ruff_python_formatter/src/verbatim.rs +++ b/crates/ruff_python_formatter/src/verbatim.rs @@ -752,7 +752,14 @@ impl Format> for FormatVerbatimStatementRange { } } else { // Non empty line, write the text of the line - verbatim_text(trimmed_line_range).fmt(f)?; + write!( + f, + [ + source_position(trimmed_line_range.start()), + verbatim_text(trimmed_line_range), + source_position(trimmed_line_range.end()) + ] + )?; // Write the line separator that terminates the line, except if it is the last line (that isn't separated by a hard line break). if logical_line.has_trailing_newline { @@ -892,13 +899,7 @@ impl Format> for VerbatimText { write!(f, [source_text_slice(self.verbatim_range)])?; } Cow::Owned(cleaned) => { - write!( - f, - [ - text(&cleaned, Some(self.verbatim_range.start())), - source_position(self.verbatim_range.end()) - ] - )?; + text(&cleaned).fmt(f)?; } } @@ -957,7 +958,9 @@ impl Format> for FormatSuppressedNode<'_> { f, [ leading_comments(node_comments.leading), + source_position(verbatim_range.start()), verbatim_text(verbatim_range), + source_position(verbatim_range.end()), trailing_comments(node_comments.trailing) ] ) @@ -969,8 +972,17 @@ pub(crate) fn write_suppressed_clause_header( header: ClauseHeader, f: &mut PyFormatter, ) -> FormatResult<()> { + let range = header.range(f.context().source())?; + // Write the outer comments and format the node as verbatim - write!(f, [verbatim_text(header.range(f.context().source())?)])?; + write!( + f, + [ + source_position(range.start()), + verbatim_text(range), + source_position(range.end()) + ] + )?; let comments = f.context().comments(); header.visit(&mut |child| { diff --git a/crates/ruff_python_formatter/tests/snapshots/format@range_formatting__end_of_file.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@range_formatting__end_of_file.py.snap new file mode 100644 index 0000000000000..2ef6ce2bab207 --- /dev/null +++ b/crates/ruff_python_formatter/tests/snapshots/format@range_formatting__end_of_file.py.snap @@ -0,0 +1,14 @@ +--- +source: crates/ruff_python_formatter/tests/fixtures.rs +input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/range_formatting/end_of_file.py +--- +## Input +```python +a + b``` + +## Output +```python +a + b``` + + + diff --git a/crates/ruff_python_formatter/tests/snapshots/format@range_formatting__parentheses.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@range_formatting__parentheses.py.snap new file mode 100644 index 0000000000000..09ead4c89d270 --- /dev/null +++ b/crates/ruff_python_formatter/tests/snapshots/format@range_formatting__parentheses.py.snap @@ -0,0 +1,41 @@ +--- +source: crates/ruff_python_formatter/tests/fixtures.rs +input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/range_formatting/parentheses.py +--- +## Input +```python +def needs_parentheses( ) -> bool: + return item.sizing_mode is None and item.width_policy == "auto" and item.height_policy == "automatic" + +def no_longer_needs_parentheses( ) -> bool: + return ( + item.width_policy == "auto" + and item.height_policy == "automatic" + ) + + +def format_range_after_inserted_parens (): + a and item.sizing_mode is None and item.width_policy == "auto" and item.height_policy == "automatic" + print("Format this" ) +``` + +## Output +```python +def needs_parentheses( ) -> bool: + return ( + item.sizing_mode is None + and item.width_policy == "auto" + and item.height_policy == "automatic" + ) + +def no_longer_needs_parentheses( ) -> bool: + return item.width_policy == "auto" and item.height_policy == "automatic" + + +def format_range_after_inserted_parens (): + a and item.sizing_mode is None and item.width_policy == "auto" and item.height_policy == "automatic" + print("Format this") +``` + + + diff --git a/crates/ruff_python_formatter/tests/snapshots/format@range_formatting__regressions.py.snap b/crates/ruff_python_formatter/tests/snapshots/format@range_formatting__regressions.py.snap index c104acef5a9fa..914a83b6e59b9 100644 --- a/crates/ruff_python_formatter/tests/snapshots/format@range_formatting__regressions.py.snap +++ b/crates/ruff_python_formatter/tests/snapshots/format@range_formatting__regressions.py.snap @@ -73,6 +73,22 @@ def convert_str(value: str) -> str: # Trailing comment def test (): pass + + +def test_comment_indent(): +# A misaligned comment + print("test") + + +# This demonstrates the use case where a user inserts a new function or class after an existing function. +# In this case, we should avoid formatting the node that directly precedes the new function/class. +# However, the problem is that the preceding node **must** be formatted to determine the whitespace between the two statements. +def test_start (): + print("Ideally this gets not reformatted" ) + + +def new_function_inserted_after_test_start (): + print("This should get formatted" ) ``` ## Output @@ -133,6 +149,22 @@ def convert_str(value: str) -> str: # Trailing comment def test (): pass + + +def test_comment_indent(): + # A misaligned comment + print("test") + + +# This demonstrates the use case where a user inserts a new function or class after an existing function. +# In this case, we should avoid formatting the node that directly precedes the new function/class. +# However, the problem is that the preceding node **must** be formatted to determine the whitespace between the two statements. +def test_start (): + print("Ideally this gets not reformatted" ) + + +def new_function_inserted_after_test_start(): + print("This should get formatted") ``` diff --git a/crates/ruff_text_size/src/size.rs b/crates/ruff_text_size/src/size.rs index 7002d5a9c430a..e6d9a2dcbe1f9 100644 --- a/crates/ruff_text_size/src/size.rs +++ b/crates/ruff_text_size/src/size.rs @@ -72,7 +72,7 @@ impl TextSize { /// # use ruff_text_size::*; /// assert_eq!(TextSize::from(4).to_u32(), 4); /// ``` - pub fn to_u32(&self) -> u32 { + pub const fn to_u32(&self) -> u32 { self.raw } @@ -84,7 +84,7 @@ impl TextSize { /// # use ruff_text_size::*; /// assert_eq!(TextSize::from(4).to_usize(), 4); /// ``` - pub fn to_usize(&self) -> usize { + pub const fn to_usize(&self) -> usize { self.raw as usize } }