Skip to content

Commit

Permalink
implement string normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
faultyserver committed Dec 31, 2023
1 parent 805d6d5 commit a0c6e2d
Show file tree
Hide file tree
Showing 8 changed files with 462 additions and 47 deletions.
4 changes: 4 additions & 0 deletions crates/biome_css_formatter/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ impl CssFormatOptions {
pub fn set_quote_style(&mut self, quote_style: QuoteStyle) {
self.quote_style = quote_style;
}

pub fn quote_style(&self) -> QuoteStyle {
self.quote_style
}
}

impl FormatOptions for CssFormatOptions {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
use std::borrow::Cow;

use crate::prelude::*;
use crate::{prelude::*, utils::string_utils::FormatLiteralStringToken};
use biome_css_syntax::{
AnyCssAttributeMatcherValue, CssAttributeMatcherValue, CssAttributeMatcherValueFields,
};
Expand Down Expand Up @@ -28,26 +26,16 @@ impl FormatNodeRule<CssAttributeMatcherValue> for FormatCssAttributeMatcherValue
return write!(f, [ident.format()]);
}

// Unlike almost all other usages of regular identifiers,
// attribute values are case-sensitive, so the identifier here
// does not get converted to lowercase. Once it's quoted, it
// will be parsed as a CssString on the next pass, at which
// point casing is preserved no matter what.
let value = ident.value_token()?;
let quoted = std::format!("\"{}\"", value.text_trimmed());

write!(
f,
[
format_leading_comments(ident.syntax()),
format_replaced(
&value,
&syntax_token_cow_slice(
Cow::Owned(quoted),
&value,
value.text_trimmed_range().start()
)
),
// Unlike almost all other usages of regular identifiers,
// attribute values are case-sensitive, so the identifier here
// does not get converted to lowercase. Once it's quoted, it
// will be parsed as a CssString on the next pass, at which
// point casing is preserved no matter what.
FormatLiteralStringToken::new(&ident.value_token()?),
format_trailing_comments(ident.syntax()),
format_dangling_comments(ident.syntax())
]
Expand Down
4 changes: 2 additions & 2 deletions crates/biome_css_formatter/src/css/value/string.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::prelude::*;
use crate::{prelude::*, utils::string_utils::FormatLiteralStringToken};
use biome_css_syntax::{CssString, CssStringFields};
use biome_formatter::write;

Expand All @@ -8,6 +8,6 @@ impl FormatNodeRule<CssString> for FormatCssString {
fn fmt_fields(&self, node: &CssString, f: &mut CssFormatter) -> FormatResult<()> {
let CssStringFields { value_token } = node.as_fields();

write!(f, [value_token.format()])
write!(f, [FormatLiteralStringToken::new(&value_token?)])
}
}
290 changes: 290 additions & 0 deletions crates/biome_css_formatter/src/utils/string_utils.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
use std::borrow::Cow;

use crate::context::CssFormatOptions;
use crate::prelude::*;
use biome_css_syntax::CssLanguage;
use biome_css_syntax::CssSyntaxKind::{CSS_STRING_LITERAL, CSS_URL_VALUE_RAW_LITERAL, IDENT};
use biome_css_syntax::CssSyntaxToken;
use biome_formatter::token::string::normalize_string;
use biome_formatter::QuoteStyle;
use biome_formatter::{
prelude::{dynamic_text, write},
token::string::ToAsciiLowercaseCow,
Expand Down Expand Up @@ -37,3 +42,288 @@ impl Format<CssFormatContext> for FormatTokenAsLowercase {
}
}
}

/// Data structure of convenience to format string literals. This is copied
/// from the JS formatter, but should eventually have the logic made generic
/// and reusable since many languages will have the same needs.
pub(crate) struct FormatLiteralStringToken<'token> {
/// The current token
token: &'token CssSyntaxToken,
}

impl<'token> FormatLiteralStringToken<'token> {
pub fn new(token: &'token CssSyntaxToken) -> Self {
Self { token }
}

fn token(&self) -> &'token CssSyntaxToken {
self.token
}

pub fn clean_text(&self, options: &CssFormatOptions) -> CleanedStringLiteralText {
let token = self.token();
debug_assert!(
matches!(
token.kind(),
CSS_STRING_LITERAL | CSS_URL_VALUE_RAW_LITERAL | IDENT
),
"Found kind {:?}",
token.kind()
);

let chosen_quote_style = options.quote_style();
let mut string_cleaner = LiteralStringNormaliser::new(self, chosen_quote_style);

let content = string_cleaner.normalise_text();

CleanedStringLiteralText {
text: content,
token,
}
}
}

pub(crate) struct CleanedStringLiteralText<'a> {
token: &'a CssSyntaxToken,
text: Cow<'a, str>,
}

impl Format<CssFormatContext> for CleanedStringLiteralText<'_> {
fn fmt(&self, f: &mut Formatter<CssFormatContext>) -> FormatResult<()> {
format_replaced(
self.token,
&syntax_token_cow_slice(
self.text.clone(),
self.token,
self.token.text_trimmed_range().start(),
),
)
.fmt(f)
}
}

impl Format<CssFormatContext> for FormatLiteralStringToken<'_> {
fn fmt(&self, f: &mut CssFormatter) -> FormatResult<()> {
let cleaned = self.clean_text(f.options());

cleaned.fmt(f)
}
}

/// Data structure of convenience to store some information about the
/// string that has been processed
struct StringInformation {
/// This is the quote that the is calculated and eventually used inside the string.
/// It could be different from the one inside the formatter options
preferred_quote: QuoteStyle,
/// It flags if the raw content has quotes (single or double). The raw content is the
/// content of a string literal without the quotes
raw_content_has_quotes: bool,
}

impl FormatLiteralStringToken<'_> {
/// This function determines which quotes should be used inside to enclose the string.
/// The function take as a input the string **without quotes**.
///
/// # How it works
///
/// The function determines the preferred quote and alternate quote.
/// The preferred quote is the one that comes from the formatter options. The alternate quote is the other one.
///
/// We check how many preferred quotes we have inside the content. If this number is greater then the
/// number alternate quotes that we have inside the content,
/// then we swap them, so we can reduce the number of escaped quotes.
///
/// For example, let's suppose that the preferred quote is double, and we have a string like this:
/// ```js
/// (" content \"\"\" don't ")
/// ```
/// Excluding the quotes at the start and beginning, we have three double quote and one single quote.
/// If we decided to keep them like this, we would have three escaped quotes.
///
/// But then, we choose the single quote as preferred quote and we would have only one quote that is escaped,
/// resulting into a string like this:
/// ```js
/// (' content """ dont\'t ')
/// ```
/// Like this, we reduced the number of escaped quotes.
fn compute_string_information(&self, chosen_quote: QuoteStyle) -> StringInformation {
// For anything other than string literals, the token won't have
// pre-existing quotes, so we can just immediately, safely use the
// preferred quote style without having to check the content.
if !matches!(self.token().kind(), CSS_STRING_LITERAL) {
return StringInformation {
raw_content_has_quotes: false,
preferred_quote: chosen_quote,
};
}

let literal = self.token().text_trimmed();
let alternate = chosen_quote.other();

let char_count = literal.chars().count();

let (preferred_quotes_count, alternate_quotes_count) = literal.chars().enumerate().fold(
(0, 0),
|(preferred_quotes_counter, alternate_quotes_counter), (index, current_character)| {
if index == 0 || index == char_count - 1 {
(preferred_quotes_counter, alternate_quotes_counter)
} else if current_character == chosen_quote.as_char() {
(preferred_quotes_counter + 1, alternate_quotes_counter)
} else if current_character == alternate.as_char() {
(preferred_quotes_counter, alternate_quotes_counter + 1)
} else {
(preferred_quotes_counter, alternate_quotes_counter)
}
},
);

StringInformation {
raw_content_has_quotes: preferred_quotes_count > 0 || alternate_quotes_count > 0,
preferred_quote: if preferred_quotes_count > alternate_quotes_count {
alternate
} else {
chosen_quote
},
}
}
}

/// Struct of convenience used to manipulate the string. It saves some state in order to apply
/// the normalise process.
struct LiteralStringNormaliser<'token> {
/// The current token
token: &'token FormatLiteralStringToken<'token>,
/// The quote that was set inside the configuration
chosen_quote_style: QuoteStyle,
}

impl<'token> LiteralStringNormaliser<'token> {
pub fn new(
token: &'token FormatLiteralStringToken<'_>,
chosen_quote_style: QuoteStyle,
) -> Self {
Self {
token,
chosen_quote_style,
}
}

fn normalise_text(&mut self) -> Cow<'token, str> {
let string_information = self
.token
.compute_string_information(self.chosen_quote_style);

match self.token.token.kind() {
CSS_STRING_LITERAL => self.normalise_string_literal(string_information),
_ => self.normalise_non_string_token(string_information),
}
}

fn get_token(&self) -> &'token CssSyntaxToken {
self.token.token()
}

fn normalise_string_literal(&self, string_information: StringInformation) -> Cow<'token, str> {
let preferred_quote = string_information.preferred_quote;
let polished_raw_content = self.normalize_string(&string_information);

match polished_raw_content {
Cow::Borrowed(raw_content) => {
let final_content = self.swap_quotes(raw_content, &string_information);
match final_content {
Cow::Borrowed(final_content) => Cow::Borrowed(final_content),
Cow::Owned(final_content) => Cow::Owned(final_content),
}
}
Cow::Owned(s) => {
// content is owned, meaning we allocated a new string,
// so we force replacing quotes, regardless
let final_content = std::format!(
"{}{}{}",
preferred_quote.as_char(),
s.as_str(),
preferred_quote.as_char()
);

Cow::Owned(final_content)
}
}
}

/// Add the chosen quotes to any other kind of token to normalize it into a string.
///
/// CSS has various places where "string-like" tokens can be used without quotes, but the
/// semantics aren't affected by whether they are present or not. This function lets those
/// tokens become string literals by safely adding quotes around them.
fn normalise_non_string_token(
&self,
string_information: StringInformation,
) -> Cow<'token, str> {
let preferred_quote = string_information.preferred_quote;
let polished_raw_content = self.normalize_string(&string_information);

match polished_raw_content {
Cow::Borrowed(raw_content) => {
let final_content = self.swap_quotes(raw_content, &string_information);
match final_content {
Cow::Borrowed(final_content) => Cow::Borrowed(final_content),
Cow::Owned(final_content) => Cow::Owned(final_content),
}
}
Cow::Owned(s) => {
// content is owned, meaning we allocated a new string,
// so we force replacing quotes, regardless
let final_content = std::format!(
"{}{}{}",
preferred_quote.as_char(),
s.as_str(),
preferred_quote.as_char()
);

Cow::Owned(final_content)
}
}
}

fn normalize_string(&self, string_information: &StringInformation) -> Cow<'token, str> {
let raw_content = self.raw_content();

normalize_string(raw_content, string_information.preferred_quote.into(), true)
}

fn raw_content(&self) -> &'token str {
let token = self.get_token();
match token.kind() {
CSS_STRING_LITERAL => {
let content = token.text_trimmed();
&content[1..content.len() - 1]
}
_ => token.text_trimmed(),
}
}

fn swap_quotes(
&self,
content_to_use: &'token str,
string_information: &StringInformation,
) -> Cow<'token, str> {
let original_content = self.get_token().text_trimmed();
let preferred_quote = string_information.preferred_quote;

let raw_content_has_quotes = string_information.raw_content_has_quotes;

if raw_content_has_quotes {
Cow::Borrowed(original_content)
} else if !original_content.starts_with(preferred_quote.as_char()) {
Cow::Owned(std::format!(
"{}{}{}",
preferred_quote.as_char(),
content_to_use,
preferred_quote.as_char()
))
} else {
Cow::Borrowed(original_content)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[attr="double"] {
background: url("/double/quoted/path");
}

[attr='single'] {
background: url('/single/quoted/path');
}

[attr=no-quotes] {
/* this url-token should stay unquoted */
background: url(/unquoted/path);
}

[attr=\eescaped] {
}

div {
width: 0\eestays-unquoted;
--\eeunquoted: green;
color: var(--\eeunquoted);
}
Loading

0 comments on commit a0c6e2d

Please sign in to comment.