From a0c6e2d169361917fcc9f9b70068e403d79c5465 Mon Sep 17 00:00:00 2001 From: Jon Egeland Date: Sun, 31 Dec 2023 06:33:14 +0000 Subject: [PATCH] implement string normalization --- crates/biome_css_formatter/src/context.rs | 4 + .../css/auxiliary/attribute_matcher_value.rs | 26 +- .../src/css/value/string.rs | 4 +- .../src/utils/string_utils.rs | 290 ++++++++++++++++++ .../css/quote_style/normalize_quotes.css | 21 ++ .../css/quote_style/normalize_quotes.css.snap | 105 +++++++ .../tests/specs/css/quote_style/options.json | 7 + .../css/selectors/attribute_selector.css.snap | 52 ++-- 8 files changed, 462 insertions(+), 47 deletions(-) create mode 100644 crates/biome_css_formatter/tests/specs/css/quote_style/normalize_quotes.css create mode 100644 crates/biome_css_formatter/tests/specs/css/quote_style/normalize_quotes.css.snap create mode 100644 crates/biome_css_formatter/tests/specs/css/quote_style/options.json diff --git a/crates/biome_css_formatter/src/context.rs b/crates/biome_css_formatter/src/context.rs index 56efb4a666e9..2b94b21a70d3 100644 --- a/crates/biome_css_formatter/src/context.rs +++ b/crates/biome_css_formatter/src/context.rs @@ -121,6 +121,10 @@ impl CssFormatOptions { pub fn set_quote_style(&mut self, quote_style: QuoteStyle) { self.quote_style = quote_style; } + + pub fn quote_style(&self) -> QuoteStyle { + self.quote_style + } } impl FormatOptions for CssFormatOptions { diff --git a/crates/biome_css_formatter/src/css/auxiliary/attribute_matcher_value.rs b/crates/biome_css_formatter/src/css/auxiliary/attribute_matcher_value.rs index 429f7a74799e..feb40379c8ad 100644 --- a/crates/biome_css_formatter/src/css/auxiliary/attribute_matcher_value.rs +++ b/crates/biome_css_formatter/src/css/auxiliary/attribute_matcher_value.rs @@ -1,6 +1,4 @@ -use std::borrow::Cow; - -use crate::prelude::*; +use crate::{prelude::*, utils::string_utils::FormatLiteralStringToken}; use biome_css_syntax::{ AnyCssAttributeMatcherValue, CssAttributeMatcherValue, CssAttributeMatcherValueFields, }; @@ -28,26 +26,16 @@ impl FormatNodeRule for FormatCssAttributeMatcherValue return write!(f, [ident.format()]); } - // Unlike almost all other usages of regular identifiers, - // attribute values are case-sensitive, so the identifier here - // does not get converted to lowercase. Once it's quoted, it - // will be parsed as a CssString on the next pass, at which - // point casing is preserved no matter what. - let value = ident.value_token()?; - let quoted = std::format!("\"{}\"", value.text_trimmed()); - write!( f, [ format_leading_comments(ident.syntax()), - format_replaced( - &value, - &syntax_token_cow_slice( - Cow::Owned(quoted), - &value, - value.text_trimmed_range().start() - ) - ), + // Unlike almost all other usages of regular identifiers, + // attribute values are case-sensitive, so the identifier here + // does not get converted to lowercase. Once it's quoted, it + // will be parsed as a CssString on the next pass, at which + // point casing is preserved no matter what. + FormatLiteralStringToken::new(&ident.value_token()?), format_trailing_comments(ident.syntax()), format_dangling_comments(ident.syntax()) ] diff --git a/crates/biome_css_formatter/src/css/value/string.rs b/crates/biome_css_formatter/src/css/value/string.rs index 326913c4be40..943cfd533732 100644 --- a/crates/biome_css_formatter/src/css/value/string.rs +++ b/crates/biome_css_formatter/src/css/value/string.rs @@ -1,4 +1,4 @@ -use crate::prelude::*; +use crate::{prelude::*, utils::string_utils::FormatLiteralStringToken}; use biome_css_syntax::{CssString, CssStringFields}; use biome_formatter::write; @@ -8,6 +8,6 @@ impl FormatNodeRule for FormatCssString { fn fmt_fields(&self, node: &CssString, f: &mut CssFormatter) -> FormatResult<()> { let CssStringFields { value_token } = node.as_fields(); - write!(f, [value_token.format()]) + write!(f, [FormatLiteralStringToken::new(&value_token?)]) } } diff --git a/crates/biome_css_formatter/src/utils/string_utils.rs b/crates/biome_css_formatter/src/utils/string_utils.rs index 4f0405d9915a..a374292d0a0f 100644 --- a/crates/biome_css_formatter/src/utils/string_utils.rs +++ b/crates/biome_css_formatter/src/utils/string_utils.rs @@ -1,7 +1,12 @@ use std::borrow::Cow; +use crate::context::CssFormatOptions; use crate::prelude::*; use biome_css_syntax::CssLanguage; +use biome_css_syntax::CssSyntaxKind::{CSS_STRING_LITERAL, CSS_URL_VALUE_RAW_LITERAL, IDENT}; +use biome_css_syntax::CssSyntaxToken; +use biome_formatter::token::string::normalize_string; +use biome_formatter::QuoteStyle; use biome_formatter::{ prelude::{dynamic_text, write}, token::string::ToAsciiLowercaseCow, @@ -37,3 +42,288 @@ impl Format for FormatTokenAsLowercase { } } } + +/// Data structure of convenience to format string literals. This is copied +/// from the JS formatter, but should eventually have the logic made generic +/// and reusable since many languages will have the same needs. +pub(crate) struct FormatLiteralStringToken<'token> { + /// The current token + token: &'token CssSyntaxToken, +} + +impl<'token> FormatLiteralStringToken<'token> { + pub fn new(token: &'token CssSyntaxToken) -> Self { + Self { token } + } + + fn token(&self) -> &'token CssSyntaxToken { + self.token + } + + pub fn clean_text(&self, options: &CssFormatOptions) -> CleanedStringLiteralText { + let token = self.token(); + debug_assert!( + matches!( + token.kind(), + CSS_STRING_LITERAL | CSS_URL_VALUE_RAW_LITERAL | IDENT + ), + "Found kind {:?}", + token.kind() + ); + + let chosen_quote_style = options.quote_style(); + let mut string_cleaner = LiteralStringNormaliser::new(self, chosen_quote_style); + + let content = string_cleaner.normalise_text(); + + CleanedStringLiteralText { + text: content, + token, + } + } +} + +pub(crate) struct CleanedStringLiteralText<'a> { + token: &'a CssSyntaxToken, + text: Cow<'a, str>, +} + +impl Format for CleanedStringLiteralText<'_> { + fn fmt(&self, f: &mut Formatter) -> FormatResult<()> { + format_replaced( + self.token, + &syntax_token_cow_slice( + self.text.clone(), + self.token, + self.token.text_trimmed_range().start(), + ), + ) + .fmt(f) + } +} + +impl Format for FormatLiteralStringToken<'_> { + fn fmt(&self, f: &mut CssFormatter) -> FormatResult<()> { + let cleaned = self.clean_text(f.options()); + + cleaned.fmt(f) + } +} + +/// Data structure of convenience to store some information about the +/// string that has been processed +struct StringInformation { + /// This is the quote that the is calculated and eventually used inside the string. + /// It could be different from the one inside the formatter options + preferred_quote: QuoteStyle, + /// It flags if the raw content has quotes (single or double). The raw content is the + /// content of a string literal without the quotes + raw_content_has_quotes: bool, +} + +impl FormatLiteralStringToken<'_> { + /// This function determines which quotes should be used inside to enclose the string. + /// The function take as a input the string **without quotes**. + /// + /// # How it works + /// + /// The function determines the preferred quote and alternate quote. + /// The preferred quote is the one that comes from the formatter options. The alternate quote is the other one. + /// + /// We check how many preferred quotes we have inside the content. If this number is greater then the + /// number alternate quotes that we have inside the content, + /// then we swap them, so we can reduce the number of escaped quotes. + /// + /// For example, let's suppose that the preferred quote is double, and we have a string like this: + /// ```js + /// (" content \"\"\" don't ") + /// ``` + /// Excluding the quotes at the start and beginning, we have three double quote and one single quote. + /// If we decided to keep them like this, we would have three escaped quotes. + /// + /// But then, we choose the single quote as preferred quote and we would have only one quote that is escaped, + /// resulting into a string like this: + /// ```js + /// (' content """ dont\'t ') + /// ``` + /// Like this, we reduced the number of escaped quotes. + fn compute_string_information(&self, chosen_quote: QuoteStyle) -> StringInformation { + // For anything other than string literals, the token won't have + // pre-existing quotes, so we can just immediately, safely use the + // preferred quote style without having to check the content. + if !matches!(self.token().kind(), CSS_STRING_LITERAL) { + return StringInformation { + raw_content_has_quotes: false, + preferred_quote: chosen_quote, + }; + } + + let literal = self.token().text_trimmed(); + let alternate = chosen_quote.other(); + + let char_count = literal.chars().count(); + + let (preferred_quotes_count, alternate_quotes_count) = literal.chars().enumerate().fold( + (0, 0), + |(preferred_quotes_counter, alternate_quotes_counter), (index, current_character)| { + if index == 0 || index == char_count - 1 { + (preferred_quotes_counter, alternate_quotes_counter) + } else if current_character == chosen_quote.as_char() { + (preferred_quotes_counter + 1, alternate_quotes_counter) + } else if current_character == alternate.as_char() { + (preferred_quotes_counter, alternate_quotes_counter + 1) + } else { + (preferred_quotes_counter, alternate_quotes_counter) + } + }, + ); + + StringInformation { + raw_content_has_quotes: preferred_quotes_count > 0 || alternate_quotes_count > 0, + preferred_quote: if preferred_quotes_count > alternate_quotes_count { + alternate + } else { + chosen_quote + }, + } + } +} + +/// Struct of convenience used to manipulate the string. It saves some state in order to apply +/// the normalise process. +struct LiteralStringNormaliser<'token> { + /// The current token + token: &'token FormatLiteralStringToken<'token>, + /// The quote that was set inside the configuration + chosen_quote_style: QuoteStyle, +} + +impl<'token> LiteralStringNormaliser<'token> { + pub fn new( + token: &'token FormatLiteralStringToken<'_>, + chosen_quote_style: QuoteStyle, + ) -> Self { + Self { + token, + chosen_quote_style, + } + } + + fn normalise_text(&mut self) -> Cow<'token, str> { + let string_information = self + .token + .compute_string_information(self.chosen_quote_style); + + match self.token.token.kind() { + CSS_STRING_LITERAL => self.normalise_string_literal(string_information), + _ => self.normalise_non_string_token(string_information), + } + } + + fn get_token(&self) -> &'token CssSyntaxToken { + self.token.token() + } + + fn normalise_string_literal(&self, string_information: StringInformation) -> Cow<'token, str> { + let preferred_quote = string_information.preferred_quote; + let polished_raw_content = self.normalize_string(&string_information); + + match polished_raw_content { + Cow::Borrowed(raw_content) => { + let final_content = self.swap_quotes(raw_content, &string_information); + match final_content { + Cow::Borrowed(final_content) => Cow::Borrowed(final_content), + Cow::Owned(final_content) => Cow::Owned(final_content), + } + } + Cow::Owned(s) => { + // content is owned, meaning we allocated a new string, + // so we force replacing quotes, regardless + let final_content = std::format!( + "{}{}{}", + preferred_quote.as_char(), + s.as_str(), + preferred_quote.as_char() + ); + + Cow::Owned(final_content) + } + } + } + + /// Add the chosen quotes to any other kind of token to normalize it into a string. + /// + /// CSS has various places where "string-like" tokens can be used without quotes, but the + /// semantics aren't affected by whether they are present or not. This function lets those + /// tokens become string literals by safely adding quotes around them. + fn normalise_non_string_token( + &self, + string_information: StringInformation, + ) -> Cow<'token, str> { + let preferred_quote = string_information.preferred_quote; + let polished_raw_content = self.normalize_string(&string_information); + + match polished_raw_content { + Cow::Borrowed(raw_content) => { + let final_content = self.swap_quotes(raw_content, &string_information); + match final_content { + Cow::Borrowed(final_content) => Cow::Borrowed(final_content), + Cow::Owned(final_content) => Cow::Owned(final_content), + } + } + Cow::Owned(s) => { + // content is owned, meaning we allocated a new string, + // so we force replacing quotes, regardless + let final_content = std::format!( + "{}{}{}", + preferred_quote.as_char(), + s.as_str(), + preferred_quote.as_char() + ); + + Cow::Owned(final_content) + } + } + } + + fn normalize_string(&self, string_information: &StringInformation) -> Cow<'token, str> { + let raw_content = self.raw_content(); + + normalize_string(raw_content, string_information.preferred_quote.into(), true) + } + + fn raw_content(&self) -> &'token str { + let token = self.get_token(); + match token.kind() { + CSS_STRING_LITERAL => { + let content = token.text_trimmed(); + &content[1..content.len() - 1] + } + _ => token.text_trimmed(), + } + } + + fn swap_quotes( + &self, + content_to_use: &'token str, + string_information: &StringInformation, + ) -> Cow<'token, str> { + let original_content = self.get_token().text_trimmed(); + let preferred_quote = string_information.preferred_quote; + + let raw_content_has_quotes = string_information.raw_content_has_quotes; + + if raw_content_has_quotes { + Cow::Borrowed(original_content) + } else if !original_content.starts_with(preferred_quote.as_char()) { + Cow::Owned(std::format!( + "{}{}{}", + preferred_quote.as_char(), + content_to_use, + preferred_quote.as_char() + )) + } else { + Cow::Borrowed(original_content) + } + } +} diff --git a/crates/biome_css_formatter/tests/specs/css/quote_style/normalize_quotes.css b/crates/biome_css_formatter/tests/specs/css/quote_style/normalize_quotes.css new file mode 100644 index 000000000000..2e54284ea980 --- /dev/null +++ b/crates/biome_css_formatter/tests/specs/css/quote_style/normalize_quotes.css @@ -0,0 +1,21 @@ +[attr="double"] { + background: url("/double/quoted/path"); +} + +[attr='single'] { + background: url('/single/quoted/path'); +} + +[attr=no-quotes] { + /* this url-token should stay unquoted */ + background: url(/unquoted/path); +} + +[attr=\eescaped] { +} + +div { + width: 0\eestays-unquoted; + --\eeunquoted: green; + color: var(--\eeunquoted); +} \ No newline at end of file diff --git a/crates/biome_css_formatter/tests/specs/css/quote_style/normalize_quotes.css.snap b/crates/biome_css_formatter/tests/specs/css/quote_style/normalize_quotes.css.snap new file mode 100644 index 000000000000..277989ec1d22 --- /dev/null +++ b/crates/biome_css_formatter/tests/specs/css/quote_style/normalize_quotes.css.snap @@ -0,0 +1,105 @@ +--- +source: crates/biome_formatter_test/src/snapshot_builder.rs +info: css/quote_style/normalize_quotes.css +--- + +# Input + +```css +[attr="double"] { + background: url("/double/quoted/path"); +} + +[attr='single'] { + background: url('/single/quoted/path'); +} + +[attr=no-quotes] { + /* this url-token should stay unquoted */ + background: url(/unquoted/path); +} + +[attr=\eescaped] { +} + +div { + width: 0\eestays-unquoted; + --\eeunquoted: green; + color: var(--\eeunquoted); +} +``` + + +============================= + +# Outputs + +## Output 1 + +----- +Indent style: Tab +Indent width: 2 +Line ending: LF +Line width: 80 +Quote style: Double Quotes +----- + +```css +[attr="double"] { + background: url("/double/quoted/path"); +} + +[attr="single"] { + background: url("/single/quoted/path"); +} + +[attr="no-quotes"] { + /* this url-token should stay unquoted */ + background: url(/unquoted/path); +} + +[attr="\eescaped"] { +} + +div { + width: 0\eestays-unquoted; + --\eeunquoted: green; + color: var(--\eeunquoted); +} +``` + +## Output 2 + +----- +Indent style: Tab +Indent width: 2 +Line ending: LF +Line width: 80 +Quote style: Single Quotes +----- + +```css +[attr='double'] { + background: url('/double/quoted/path'); +} + +[attr='single'] { + background: url('/single/quoted/path'); +} + +[attr='no-quotes'] { + /* this url-token should stay unquoted */ + background: url(/unquoted/path); +} + +[attr='\eescaped'] { +} + +div { + width: 0\eestays-unquoted; + --\eeunquoted: green; + color: var(--\eeunquoted); +} +``` + + diff --git a/crates/biome_css_formatter/tests/specs/css/quote_style/options.json b/crates/biome_css_formatter/tests/specs/css/quote_style/options.json new file mode 100644 index 000000000000..6edcc0d45946 --- /dev/null +++ b/crates/biome_css_formatter/tests/specs/css/quote_style/options.json @@ -0,0 +1,7 @@ +{ + "cases": [ + { + "quote_style": "Single" + } + ] +} \ No newline at end of file diff --git a/crates/biome_css_formatter/tests/specs/css/selectors/attribute_selector.css.snap b/crates/biome_css_formatter/tests/specs/css/selectors/attribute_selector.css.snap index ecdd6db77443..3851b9aedbbb 100644 --- a/crates/biome_css_formatter/tests/specs/css/selectors/attribute_selector.css.snap +++ b/crates/biome_css_formatter/tests/specs/css/selectors/attribute_selector.css.snap @@ -169,7 +169,7 @@ a[id="test"] { } a[id="test"] { } -a[id='test'] { +a[id="test"] { } a[id= func("foo")] {} a[class="(╯°□°)╯︵ ┻━┻"] { @@ -197,55 +197,55 @@ span[lang] { } span[lang] { } -span[lang='pt'] { +span[lang="pt"] { } -span[lang='pt'] { +span[lang="pt"] { } -span[lang='pt'] { +span[lang="pt"] { } -span[lang='pt'] { +span[lang="pt"] { } -span[lang='pt'] { +span[lang="pt"] { } -span[lang='pt'] { +span[lang="pt"] { } -span[lang='pt'] { +span[lang="pt"] { } -span[lang='pt'] { +span[lang="pt"] { } -span[lang~='en-us'] { +span[lang~="en-us"] { } -span[lang~='en-us'] { +span[lang~="en-us"] { } -span[lang|='zh'] { +span[lang|="zh"] { } -span[lang~='en-us'] { +span[lang~="en-us"] { } -a[href^='#'] { +a[href^="#"] { } -a[href*='example'] { +a[href*="example"] { } -a[href*='example'] { +a[href*="example"] { } -input[type='radio' i] { +input[type="radio" i] { } -input[type='radio' i] { +input[type="radio" i] { } -input[type~='radio' i] { +input[type~="radio" i] { } -input[type~='radio' i] { +input[type~="radio" i] { } -input[type~='radio' i] { +input[type~="radio" i] { } -img[alt='person'][src='lorem'] { +img[alt="person"][src="lorem"] { } -img[alt='person'][src='lorem'] { +img[alt="person"][src="lorem"] { } -img[alt~='person'][src*='lorem'] { +img[alt~="person"][src*="lorem"] { } -img[alt~='person'][src*='lorem'] { +img[alt~="person"][src*="lorem"] { } -img[alt~='person'][src*='lorem'] { +img[alt~="person"][src*="lorem"] { } [foo|att="val"] {