Skip to content

Commit

Permalink
Correctly handle implicit concatenated strings in docstrings positions
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser committed Oct 21, 2024
1 parent ead9c1a commit c49f8c7
Show file tree
Hide file tree
Showing 6 changed files with 212 additions and 26 deletions.
34 changes: 33 additions & 1 deletion crates/ruff_python_ast/src/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ pub enum StringLikePart<'a> {
FString(&'a ast::FString),
}

impl StringLikePart<'_> {
impl<'a> StringLikePart<'a> {
/// Returns the [`AnyStringFlags`] for the current string-like part.
pub fn flags(&self) -> AnyStringFlags {
match self {
Expand All @@ -524,6 +524,17 @@ impl StringLikePart<'_> {
self.end() - kind.closer_len(),
)
}

pub const fn is_string_literal(self) -> bool {
matches!(self, Self::String(_))
}

pub const fn as_string_literal(self) -> Option<&'a ast::StringLiteral> {
match self {
StringLikePart::String(value) => Some(value),
_ => None,
}
}
}

impl<'a> From<&'a ast::StringLiteral> for StringLikePart<'a> {
Expand Down Expand Up @@ -567,6 +578,7 @@ impl Ranged for StringLikePart<'_> {
/// An iterator over all the [`StringLikePart`] of a string-like expression.
///
/// This is created by the [`StringLike::parts`] method.
#[derive(Clone)]
pub enum StringLikePartIter<'a> {
String(std::slice::Iter<'a, ast::StringLiteral>),
Bytes(std::slice::Iter<'a, ast::BytesLiteral>),
Expand Down Expand Up @@ -603,5 +615,25 @@ impl<'a> Iterator for StringLikePartIter<'a> {
}
}

impl DoubleEndedIterator for StringLikePartIter<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
let part = match self {
StringLikePartIter::String(inner) => StringLikePart::String(inner.next_back()?),
StringLikePartIter::Bytes(inner) => StringLikePart::Bytes(inner.next_back()?),
StringLikePartIter::FString(inner) => {
let part = inner.next_back()?;
match part {
ast::FStringPart::Literal(string_literal) => {
StringLikePart::String(string_literal)
}
ast::FStringPart::FString(f_string) => StringLikePart::FString(f_string),
}
}
};

Some(part)
}
}

impl FusedIterator for StringLikePartIter<'_> {}
impl ExactSizeIterator for StringLikePartIter<'_> {}
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,35 @@
ddddddddddddddddddddddddddd,
]:
pass


##############################################################################
# In docstring positions
##############################################################################

def short_docstring():
"Implicit" "concatenated" "docstring"

def long_docstring():
"Loooooooooooooooooooooong" "doooooooooooooooooooocstriiiiiiiiiiiiiiiiiiiiiiiiiiiiiiing" "exceding the line width" "but it should be concatenated anyways because it is single line"

def docstring_with_leading_whitespace():
" This is a " "implicit" "concatenated" "docstring"

def docstring_with_trailing_whitespace():
"This is a " "implicit" "concatenated" "docstring "

def docstring_with_leading_empty_parts():
" " " " "" "This is a " "implicit" "concatenated" "docstring"

def docstring_with_trailing_empty_parts():
"This is a " "implicit" "concatenated" "docstring" "" " " " "

def all_empty():
" " " " " "

def byte_string_in_docstring_position():
b" don't trim the" b"bytes literal "

def f_string_in_docstring_position():
f" don't trim the" "f-string literal "
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,12 @@ impl FormatNodeRule<ExprStringLiteral> for FormatExprStringLiteral {
match value.as_slice() {
[string_literal] => string_literal.format().with_options(self.kind).fmt(f),
_ => {
// This is just a sanity check because [`DocstringStmt::try_from_statement`]
// ensures that the docstring is a *single* string literal.
assert!(!self.kind.is_docstring());

// Always join strings that aren't parenthesized and thus, always on a single line.
if !f.context().node_level().is_parenthesized() {
if let Some(format_flat) =
if let Some(mut format_flat) =
FormatImplicitConcatenatedStringFlat::new(item.into(), f.context())
{
format_flat.set_docstring(self.kind.is_docstring());
return format_flat.fmt(f);
}
}
Expand Down
23 changes: 6 additions & 17 deletions crates/ruff_python_formatter/src/statement/suite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use ruff_formatter::{
write, FormatContext, FormatOwnedWithRule, FormatRefWithRule, FormatRuleWithOptions,
};
use ruff_python_ast::helpers::is_compound_statement;
use ruff_python_ast::{self as ast, Expr, PySourceType, Stmt, Suite};
use ruff_python_ast::{self as ast, PySourceType, Stmt, Suite};
use ruff_python_ast::{AnyNodeRef, StmtExpr};
use ruff_python_trivia::{lines_after, lines_after_ignoring_end_of_line_trivia, lines_before};
use ruff_text_size::{Ranged, TextRange};
Expand Down Expand Up @@ -796,29 +796,18 @@ impl<'a> DocstringStmt<'a> {
return None;
};

match value.as_ref() {
Expr::StringLiteral(ast::ExprStringLiteral { value, .. })
if !value.is_implicit_concatenated() =>
{
Some(DocstringStmt {
docstring: stmt,
suite_kind,
})
}
_ => None,
}
value.is_string_literal_expr().then_some(DocstringStmt {
docstring: stmt,
suite_kind,
})
}

pub(crate) fn is_docstring_statement(stmt: &StmtExpr, source_type: PySourceType) -> bool {
if source_type.is_ipynb() {
return false;
}

if let Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) = stmt.value.as_ref() {
!value.is_implicit_concatenated()
} else {
false
}
stmt.value.is_string_literal_expr()
}
}

Expand Down
69 changes: 66 additions & 3 deletions crates/ruff_python_formatter/src/string/implicit.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use memchr::memchr2;
use ruff_formatter::{format_args, write};
use ruff_python_ast::str::Quote;
use ruff_python_ast::str_prefix::{
Expand Down Expand Up @@ -103,6 +102,7 @@ impl Format<PyFormatContext<'_>> for FormatImplicitConcatenatedStringExpanded<'_
pub(crate) struct FormatImplicitConcatenatedStringFlat<'a> {
string: StringLike<'a>,
flags: AnyStringFlags,
docstring: bool,
}

impl<'a> FormatImplicitConcatenatedStringFlat<'a> {
Expand Down Expand Up @@ -222,9 +222,14 @@ impl<'a> FormatImplicitConcatenatedStringFlat<'a> {
Some(Self {
flags: merge_flags(string, context)?,
string,
docstring: false,
})
}

pub(crate) fn set_docstring(&mut self, is_docstring: bool) {
self.docstring = is_docstring;
}

pub(crate) fn string(&self) -> StringLike<'a> {
self.string
}
Expand All @@ -239,15 +244,51 @@ impl Format<PyFormatContext<'_>> for FormatImplicitConcatenatedStringFlat<'_> {

// TODO: FStrings when the f-string preview style is enabled???

for part in self.string.parts() {
let mut parts = self.string.parts().peekable();

// Trim implicit concatenated strings in docstring positions.
// Skip over any trailing parts that are all whitespace.
// Leading parts are handled as part of the formatting loop below.
if self.docstring {
for part in self.string.parts().rev() {
assert!(part.is_string_literal());

if f.context()
.locator()
.slice(part.content_range())
.trim()
.is_empty()
{
// Don't format the part.
parts.next_back();
} else {
break;
}
}
}

let mut first_non_empty = self.docstring;

while let Some(part) = parts.next() {
match part {
StringLikePart::String(_) | StringLikePart::Bytes(_) => {
FormatLiteralContent {
range: part.content_range(),
flags: self.flags,
is_fstring: false,
trim_start: first_non_empty && self.docstring,
trim_end: self.docstring && parts.peek().is_none(),
}
.fmt(f)?;

if first_non_empty {
first_non_empty = f
.context()
.locator()
.slice(part.content_range())
.trim_start()
.is_empty();
}
}

StringLikePart::FString(f_string) => {
Expand All @@ -259,6 +300,8 @@ impl Format<PyFormatContext<'_>> for FormatImplicitConcatenatedStringFlat<'_> {
range: literal.range(),
flags: self.flags,
is_fstring: true,
trim_end: false,
trim_start: false,
}
.fmt(f)?;
}
Expand All @@ -283,6 +326,8 @@ impl Format<PyFormatContext<'_>> for FormatImplicitConcatenatedStringFlat<'_> {
range: part.content_range(),
flags: self.flags,
is_fstring: true,
trim_end: false,
trim_start: false,
}
.fmt(f)?;
}
Expand All @@ -298,12 +343,14 @@ struct FormatLiteralContent {
range: TextRange,
flags: AnyStringFlags,
is_fstring: bool,
trim_start: bool,
trim_end: bool,
}

impl Format<PyFormatContext<'_>> for FormatLiteralContent {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let content = f.context().locator().slice(self.range);
let normalized = normalize_string(
let mut normalized = normalize_string(
content,
0,
self.flags,
Expand All @@ -312,6 +359,22 @@ impl Format<PyFormatContext<'_>> for FormatLiteralContent {
false,
);

// Trim the start and end of the string if it's the first or last part of a docstring.
// This is rare, so don't bother with optimizing to use `Cow`.
if self.trim_start {
let trimmed = normalized.trim_start();
if trimmed.len() < normalized.len() {
normalized = trimmed.to_string().into();
}
}

if self.trim_end {
let trimmed = normalized.trim_end();
if trimmed.len() < normalized.len() {
normalized = trimmed.to_string().into();
}
}

match normalized {
Cow::Borrowed(_) => source_text_slice(self.range).fmt(f),
Cow::Owned(normalized) => text(&normalized).fmt(f),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,38 @@ match x:
ddddddddddddddddddddddddddd,
]:
pass
##############################################################################
# In docstring positions
##############################################################################
def short_docstring():
"Implicit" "concatenated" "docstring"
def long_docstring():
"Loooooooooooooooooooooong" "doooooooooooooooooooocstriiiiiiiiiiiiiiiiiiiiiiiiiiiiiiing" "exceding the line width" "but it should be concatenated anyways because it is single line"
def docstring_with_leading_whitespace():
" This is a " "implicit" "concatenated" "docstring"
def docstring_with_trailing_whitespace():
"This is a " "implicit" "concatenated" "docstring "
def docstring_with_leading_empty_parts():
" " " " "" "This is a " "implicit" "concatenated" "docstring"
def docstring_with_trailing_empty_parts():
"This is a " "implicit" "concatenated" "docstring" "" " " " "
def all_empty():
" " " " " "
def byte_string_in_docstring_position():
b" don't trim the" b"bytes literal "
def f_string_in_docstring_position():
f" don't trim the" "f-string literal "
```

## Outputs
Expand Down Expand Up @@ -536,4 +568,45 @@ match x:
]
):
pass
##############################################################################
# In docstring positions
##############################################################################
def short_docstring():
"Implicitconcatenateddocstring"
def long_docstring():
"Loooooooooooooooooooooongdoooooooooooooooooooocstriiiiiiiiiiiiiiiiiiiiiiiiiiiiiiingexceding the line widthbut it should be concatenated anyways because it is single line"
def docstring_with_leading_whitespace():
"This is a implicitconcatenateddocstring"
def docstring_with_trailing_whitespace():
"This is a implicitconcatenateddocstring"
def docstring_with_leading_empty_parts():
"This is a implicitconcatenateddocstring"
def docstring_with_trailing_empty_parts():
"This is a implicitconcatenateddocstring"
def all_empty():
""
def byte_string_in_docstring_position():
b" don't trim thebytes literal "
def f_string_in_docstring_position():
f" don't trim thef-string literal "
```

0 comments on commit c49f8c7

Please sign in to comment.