perf: iterate over bytes instead of chars where possible (#4081)

biomejs · Sep 25, 2024 · cc6d34f · cc6d34f
1 parent e184d45
commit cc6d34f
Show file tree

Hide file tree

Showing 33 changed files with 238 additions and 235 deletions.
diff --git a/crates/biome_configuration/src/editorconfig.rs b/crates/biome_configuration/src/editorconfig.rs
@@ -279,18 +279,18 @@ fn expand_unknown_glob_patterns(pattern: &str) -> Result<Vec<String>, EditorConf
 
     let mut all_variants = vec![];
     let mut current_variants = None;
-    for (i, c) in pattern.chars().enumerate() {
-        match c {
-            '{' => {
+    for (index, byte) in pattern.bytes().enumerate() {
+        match byte {
+            b'{' => {
                 if current_variants.is_none() {
-                    current_variants = Some(Variants::new(i));
+                    current_variants = Some(Variants::new(index));
                 } else {
                     // TODO: error, recursive brace expansion is not supported
                 }
             }
-            '}' => {
+            b'}' => {
                 if let Some(mut v) = current_variants.take() {
-                    v.end = i;
+                    v.end = index;
                     v.parse_to_variants(&pattern[v.start..=v.end])?;
                     all_variants.push(v);
                 }

diff --git a/crates/biome_css_analyze/src/lint/correctness/no_invalid_direction_in_linear_gradient.rs b/crates/biome_css_analyze/src/lint/correctness/no_invalid_direction_in_linear_gradient.rs
@@ -92,8 +92,8 @@ impl Rule for NoInvalidDirectionInLinearGradient {
         if IN_KEYWORD.is_match(&first_css_parameter_text) {
             return None;
         }
-        if let Some(first_char) = first_css_parameter_text.chars().next() {
-            if first_char.is_ascii_digit() {
+        if let Some(first_byte) = first_css_parameter_text.bytes().next() {
+            if first_byte.is_ascii_digit() {
                 if ANGLE.is_match(&first_css_parameter_text) {
                     return None;
                 }

diff --git a/crates/biome_css_analyze/src/lint/correctness/no_invalid_grid_areas.rs b/crates/biome_css_analyze/src/lint/correctness/no_invalid_grid_areas.rs
@@ -111,7 +111,7 @@ impl Rule for NoInvalidGridAreas {
             // Need to remove `"` with escaping slash from the grid area
             // Ex: "\"a a a\""
             .map(|x| {
-                let trimmed_text = x.token_text();
+                let trimmed_text = x.token_text_trimmed();
                 let text_range = x.text_range();
                 (trimmed_text, text_range)
             })
@@ -168,12 +168,12 @@ impl Rule for NoInvalidGridAreas {
 
 // Check if the grid areas are consistent
 fn is_consistent_grids(grid_areas_props: GridAreasProps) -> Option<UseConsistentGridAreasState> {
-    let first_prop = clean_text(&grid_areas_props[0].0);
+    let first_prop = inner_string_text(&grid_areas_props[0].0);
     let first_len = first_prop.len();
     let mut shortest = &grid_areas_props[0];
 
     for grid_areas_prop in &grid_areas_props {
-        let cleaned_text = clean_text(&grid_areas_prop.0);
+        let cleaned_text = inner_string_text(&grid_areas_prop.0);
         // Check if the grid areas are empty
         if cleaned_text.is_empty() {
             return Some(UseConsistentGridAreasState {
@@ -184,7 +184,7 @@ fn is_consistent_grids(grid_areas_props: GridAreasProps) -> Option<UseConsistent
         }
         // Check if all elements have the same length
         if cleaned_text.len() != first_len {
-            if cleaned_text.len() < clean_text(&shortest.0).len() {
+            if cleaned_text.len() < inner_string_text(&shortest.0).len() {
                 shortest = grid_areas_prop;
             }
             return Some(UseConsistentGridAreasState {
@@ -223,17 +223,19 @@ fn is_consistent_grids(grid_areas_props: GridAreasProps) -> Option<UseConsistent
 
 // Check if all characters in a string are the same
 fn is_all_same(token_text: TokenText) -> bool {
-    let prop = clean_text(&token_text);
-    let chars: Vec<char> = prop.chars().filter(|c| !c.is_whitespace()).collect();
-    let head = chars[0];
-    chars.iter().all(|&c| c == head)
+    let prop = inner_string_text(&token_text);
+    let mut iter = prop.chars().filter(|c| !c.is_whitespace());
+    let Some(head) = iter.next() else {
+        return true;
+    };
+    iter.all(|c| c == head)
 }
 
 fn has_partial_match(grid_areas_props: &GridAreasProps) -> Option<GridAreasProp> {
     let mut seen_parts = FxHashSet::default();
 
     for (text, range) in grid_areas_props {
-        let prop = clean_text(text);
+        let prop = inner_string_text(text);
         let parts: FxHashSet<String> = prop
             .split_whitespace()
             .map(|part| part.to_string())
@@ -248,6 +250,15 @@ fn has_partial_match(grid_areas_props: &GridAreasProps) -> Option<GridAreasProp>
     None
 }
 
-fn clean_text(text: &TokenText) -> String {
-    text.replace('"', "").trim().to_string()
+fn inner_string_text(text: &TokenText) -> &str {
+    let result = text.text();
+    if result.len() >= 2 {
+        debug_assert!(
+            (result.starts_with('"') && result.len() >= 2 && result.ends_with('"'))
+                || (result.starts_with('\'') && result.len() >= 2 && result.ends_with('\''))
+        );
+        result[1..result.len() - 1].trim()
+    } else {
+        result
+    }
 }
diff --git a/crates/biome_css_analyze/src/lint/suspicious/no_duplicate_font_names.rs b/crates/biome_css_analyze/src/lint/suspicious/no_duplicate_font_names.rs
@@ -115,6 +115,7 @@ impl Rule for NoDuplicateFontNames {
                 }
                 // A font family name. e.g "Lucida Grande", "Arial".
                 AnyCssValue::CssString(val) => {
+                    // FIXME: avoid String allocation
                     let normalized_font_name: String = val
                         .text()
                         .chars()

diff --git a/crates/biome_diagnostics/src/display/frame.rs b/crates/biome_diagnostics/src/display/frame.rs
@@ -393,16 +393,11 @@ pub(super) fn print_invisibles(
 
     // Get the first trailing whitespace character in the string
     let trailing_whitespace_index = input
-        .char_indices()
+        .bytes()
+        .enumerate()
         .rev()
-        .find_map(|(index, char)| {
-            if !char.is_ascii_whitespace() {
-                Some(index)
-            } else {
-                None
-            }
-        })
-        .unwrap_or(input.len());
+        .find(|(_, byte)| !byte.is_ascii_whitespace())
+        .map_or(input.len(), |(index, _)| index);
 
     let mut iter = input.char_indices().peekable();
     let mut prev_char_was_whitespace = false;

diff --git a/crates/biome_formatter/src/token/number.rs b/crates/biome_formatter/src/token/number.rs
@@ -60,18 +60,18 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
 
     let text = text.to_ascii_lowercase_cow();
     let mut copied_or_ignored_chars = 0usize;
-    let mut iter = text.chars().enumerate();
+    let mut iter = text.bytes().enumerate();
     let mut curr = iter.next();
     let mut state = IntegerPart;
 
     // Will be filled only if and when the first place that needs reformatting is detected.
     let mut cleaned_text = String::new();
 
     // Look at only the start of the text, ignore any sign, and make sure numbers always start with a digit. Add 0 if missing.
-    if let Some((_, '+' | '-')) = curr {
+    if let Some((_, b'+' | b'-')) = curr {
         curr = iter.next();
     }
-    if let Some((curr_index, '.')) = curr {
+    if let Some((curr_index, b'.')) = curr {
         cleaned_text.push_str(&text[copied_or_ignored_chars..curr_index]);
         copied_or_ignored_chars = curr_index;
         cleaned_text.push('0');
@@ -91,7 +91,7 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
                     dot_index,
                     last_non_zero_index: None,
                 }),
-                (curr_index, Some('e') | None),
+                (curr_index, Some(b'e') | None),
             ) => {
                 // The decimal part equals zero, ignore it completely.
                 // Caveat: Prettier still prints a single `.0` unless there was *only* a trailing dot.
@@ -108,7 +108,7 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
                     last_non_zero_index: Some(last_non_zero_index),
                     ..
                 }),
-                (curr_index, Some('e') | None),
+                (curr_index, Some(b'e') | None),
             ) if last_non_zero_index.get() < curr_index - 1 => {
                 // The decimal part ends with at least one zero, ignore them but copy the part from the dot until the last non-zero.
                 cleaned_text.push_str(&text[copied_or_ignored_chars..=last_non_zero_index.get()]);
@@ -151,13 +151,13 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
         // Update state after the current char
         match (&state, curr) {
             // Cases entering or remaining in decimal part
-            (_, Some((curr_index, '.'))) => {
+            (_, Some((curr_index, b'.'))) => {
                 state = DecimalPart(FormatNumberLiteralDecimalPart {
                     dot_index: curr_index,
                     last_non_zero_index: None,
                 });
             }
-            (DecimalPart(decimal_part), Some((curr_index, '1'..='9'))) => {
+            (DecimalPart(decimal_part), Some((curr_index, b'1'..=b'9'))) => {
                 state = DecimalPart(FormatNumberLiteralDecimalPart {
                     last_non_zero_index: Some(unsafe {
                         // We've already entered InDecimalPart, so curr_index must be >0
@@ -167,15 +167,15 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
                 });
             }
             // Cases entering or remaining in exponent
-            (_, Some((curr_index, 'e'))) => {
+            (_, Some((curr_index, b'e'))) => {
                 state = Exponent(FormatNumberLiteralExponent {
                     e_index: curr_index,
                     is_negative: false,
                     first_digit_index: None,
                     first_non_zero_index: None,
                 });
             }
-            (Exponent(exponent), Some((_, '-'))) => {
+            (Exponent(exponent), Some((_, b'-'))) => {
                 state = Exponent(FormatNumberLiteralExponent {
                     is_negative: true,
                     ..*exponent
@@ -188,14 +188,14 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
                         ..
                     },
                 ),
-                Some((curr_index, curr_char @ '0'..='9')),
+                Some((curr_index, curr_char @ b'0'..=b'9')),
             ) => {
                 state = Exponent(FormatNumberLiteralExponent {
                     first_digit_index: Some(unsafe {
                         // We've already entered InExponent, so curr_index must be >0
                         NonZeroUsize::new_unchecked(curr_index)
                     }),
-                    first_non_zero_index: if curr_char != '0' {
+                    first_non_zero_index: if curr_char != b'0' {
                         Some(unsafe {
                             // We've already entered InExponent, so curr_index must be >0
                             NonZeroUsize::new_unchecked(curr_index)
@@ -213,7 +213,7 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
                         ..
                     },
                 ),
-                Some((curr_index, '1'..='9')),
+                Some((curr_index, b'1'..=b'9')),
             ) => {
                 state = Exponent(FormatNumberLiteralExponent {
                     first_non_zero_index: Some(unsafe { NonZeroUsize::new_unchecked(curr_index) }),
@@ -225,7 +225,7 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
 
         // Repeat or exit
         match curr {
-            None | Some((_, 'x') /* hex bailout */) => break,
+            None | Some((_, b'x') /* hex bailout */) => break,
             Some(_) => curr = iter.next(),
         }
     }

diff --git a/crates/biome_graphql_formatter/src/graphql/value/string_value.rs b/crates/biome_graphql_formatter/src/graphql/value/string_value.rs
@@ -28,7 +28,7 @@ impl FormatNodeRule<GraphqlStringValue> for FormatGraphqlStringValue {
             let min_indent = trimmed_content
                 .lines()
                 .filter(|line| !line.trim().is_empty()) // Ignore empty lines
-                .map(|line| line.chars().take_while(|&c| c.is_whitespace()).count())
+                .map(|line| line.bytes().take_while(|b| b.is_ascii_whitespace()).count())
                 .min()
                 .unwrap_or(0);
 
@@ -73,5 +73,5 @@ impl FormatNodeRule<GraphqlStringValue> for FormatGraphqlStringValue {
 }
 
 fn is_blank(line: &str) -> bool {
-    line.chars().all(|c| c.is_whitespace())
+    line.bytes().all(|byte| byte.is_ascii_whitespace())
 }
diff --git a/crates/biome_grit_patterns/src/grit_built_in_functions.rs b/crates/biome_grit_patterns/src/grit_built_in_functions.rs
@@ -116,7 +116,9 @@ fn capitalize_fn<'a>(
     };
 
     let string = arg1.text(&state.files, context.language())?;
-    Ok(ResolvedPattern::from_string(capitalize(&string)))
+    Ok(ResolvedPattern::from_string(
+        capitalize(&string).to_string(),
+    ))
 }
 
 fn distinct_fn<'a>(
@@ -379,12 +381,14 @@ fn uppercase_fn<'a>(
     Ok(ResolvedPattern::from_string(string.to_uppercase()))
 }
 
-fn capitalize(s: &str) -> String {
-    let mut chars = s.chars();
-    match chars.next() {
-        None => String::new(),
-        Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
+fn capitalize(s: &str) -> Cow<str> {
+    if let Some(first_char) = s.chars().next() {
+        if !first_char.is_uppercase() {
+            let rest = &s[first_char.len_utf8()..];
+            return Cow::Owned(first_char.to_ascii_uppercase().to_string() + rest);
+        }
     }
+    Cow::Borrowed(s)
 }
 
 fn resolve<'a>(target_path: Cow<'a, str>, from_file: Cow<'a, str>) -> Result<String> {

diff --git a/crates/biome_html_formatter/src/utils/children.rs b/crates/biome_html_formatter/src/utils/children.rs
@@ -11,7 +11,7 @@ use biome_rowan::{SyntaxResult, TextLen, TextRange, TextSize, TokenText};
 
 use crate::{comments::HtmlComments, context::HtmlFormatContext, HtmlFormatter};
 
-pub(crate) static HTML_WHITESPACE_CHARS: [char; 4] = [' ', '\n', '\t', '\r'];
+pub(crate) static HTML_WHITESPACE_CHARS: [u8; 4] = [b' ', b'\n', b'\t', b'\r'];
 
 /// Meaningful HTML text is defined to be text that has either non-whitespace
 /// characters, or does not contain a newline. Whitespace is defined as ASCII
@@ -29,11 +29,11 @@ pub(crate) static HTML_WHITESPACE_CHARS: [char; 4] = [' ', '\n', '\t', '\r'];
 /// ```
 pub fn is_meaningful_html_text(text: &str) -> bool {
     let mut has_newline = false;
-    for c in text.chars() {
+    for byte in text.bytes() {
         // If there is a non-whitespace character
-        if !HTML_WHITESPACE_CHARS.contains(&c) {
+        if !HTML_WHITESPACE_CHARS.contains(&byte) {
             return true;
-        } else if c == '\n' {
+        } else if byte == b'\n' {
             has_newline = true;
         }
     }
@@ -191,7 +191,7 @@ where
                                     // A text only consisting of whitespace that also contains a new line isn't considered meaningful text.
                                     // It can be entirely removed from the content without changing the semantics.
                                     let newlines =
-                                        whitespace.chars().filter(|c| *c == '\n').count();
+                                        whitespace.bytes().filter(|b| *b == b'\n').count();
 
                                     // Keep up to one blank line between tags.
                                     // ```html

diff --git a/...biome_js_analyze/src/lint/complexity/no_multiple_spaces_in_regular_expression_literals.rs b/...biome_js_analyze/src/lint/complexity/no_multiple_spaces_in_regular_expression_literals.rs
@@ -67,7 +67,6 @@ impl Rule for NoMultipleSpacesInRegularExpressionLiterals {
         let mut range_list = vec![];
         let mut previous_is_space = false;
         let mut first_consecutive_space_index = 0;
-        // We use `char_indices` to get the byte index of every character
         for (i, ch) in trimmed_text.bytes().enumerate() {
             if ch == b' ' {
                 if !previous_is_space {