Skip to content

Commit

Permalink
perf: iterate over bytes instead of chars where possible (#4081)
Browse files Browse the repository at this point in the history
  • Loading branch information
Conaclos authored Sep 25, 2024
1 parent e184d45 commit cc6d34f
Show file tree
Hide file tree
Showing 33 changed files with 238 additions and 235 deletions.
12 changes: 6 additions & 6 deletions crates/biome_configuration/src/editorconfig.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,18 +279,18 @@ fn expand_unknown_glob_patterns(pattern: &str) -> Result<Vec<String>, EditorConf

let mut all_variants = vec![];
let mut current_variants = None;
for (i, c) in pattern.chars().enumerate() {
match c {
'{' => {
for (index, byte) in pattern.bytes().enumerate() {
match byte {
b'{' => {
if current_variants.is_none() {
current_variants = Some(Variants::new(i));
current_variants = Some(Variants::new(index));
} else {
// TODO: error, recursive brace expansion is not supported
}
}
'}' => {
b'}' => {
if let Some(mut v) = current_variants.take() {
v.end = i;
v.end = index;
v.parse_to_variants(&pattern[v.start..=v.end])?;
all_variants.push(v);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ impl Rule for NoInvalidDirectionInLinearGradient {
if IN_KEYWORD.is_match(&first_css_parameter_text) {
return None;
}
if let Some(first_char) = first_css_parameter_text.chars().next() {
if first_char.is_ascii_digit() {
if let Some(first_byte) = first_css_parameter_text.bytes().next() {
if first_byte.is_ascii_digit() {
if ANGLE.is_match(&first_css_parameter_text) {
return None;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ impl Rule for NoInvalidGridAreas {
// Need to remove `"` with escaping slash from the grid area
// Ex: "\"a a a\""
.map(|x| {
let trimmed_text = x.token_text();
let trimmed_text = x.token_text_trimmed();
let text_range = x.text_range();
(trimmed_text, text_range)
})
Expand Down Expand Up @@ -168,12 +168,12 @@ impl Rule for NoInvalidGridAreas {

// Check if the grid areas are consistent
fn is_consistent_grids(grid_areas_props: GridAreasProps) -> Option<UseConsistentGridAreasState> {
let first_prop = clean_text(&grid_areas_props[0].0);
let first_prop = inner_string_text(&grid_areas_props[0].0);
let first_len = first_prop.len();
let mut shortest = &grid_areas_props[0];

for grid_areas_prop in &grid_areas_props {
let cleaned_text = clean_text(&grid_areas_prop.0);
let cleaned_text = inner_string_text(&grid_areas_prop.0);
// Check if the grid areas are empty
if cleaned_text.is_empty() {
return Some(UseConsistentGridAreasState {
Expand All @@ -184,7 +184,7 @@ fn is_consistent_grids(grid_areas_props: GridAreasProps) -> Option<UseConsistent
}
// Check if all elements have the same length
if cleaned_text.len() != first_len {
if cleaned_text.len() < clean_text(&shortest.0).len() {
if cleaned_text.len() < inner_string_text(&shortest.0).len() {
shortest = grid_areas_prop;
}
return Some(UseConsistentGridAreasState {
Expand Down Expand Up @@ -223,17 +223,19 @@ fn is_consistent_grids(grid_areas_props: GridAreasProps) -> Option<UseConsistent

// Check if all characters in a string are the same
fn is_all_same(token_text: TokenText) -> bool {
let prop = clean_text(&token_text);
let chars: Vec<char> = prop.chars().filter(|c| !c.is_whitespace()).collect();
let head = chars[0];
chars.iter().all(|&c| c == head)
let prop = inner_string_text(&token_text);
let mut iter = prop.chars().filter(|c| !c.is_whitespace());
let Some(head) = iter.next() else {
return true;
};
iter.all(|c| c == head)
}

fn has_partial_match(grid_areas_props: &GridAreasProps) -> Option<GridAreasProp> {
let mut seen_parts = FxHashSet::default();

for (text, range) in grid_areas_props {
let prop = clean_text(text);
let prop = inner_string_text(text);
let parts: FxHashSet<String> = prop
.split_whitespace()
.map(|part| part.to_string())
Expand All @@ -248,6 +250,15 @@ fn has_partial_match(grid_areas_props: &GridAreasProps) -> Option<GridAreasProp>
None
}

fn clean_text(text: &TokenText) -> String {
text.replace('"', "").trim().to_string()
fn inner_string_text(text: &TokenText) -> &str {
let result = text.text();
if result.len() >= 2 {
debug_assert!(
(result.starts_with('"') && result.len() >= 2 && result.ends_with('"'))
|| (result.starts_with('\'') && result.len() >= 2 && result.ends_with('\''))
);
result[1..result.len() - 1].trim()
} else {
result
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ impl Rule for NoDuplicateFontNames {
}
// A font family name. e.g "Lucida Grande", "Arial".
AnyCssValue::CssString(val) => {
// FIXME: avoid String allocation
let normalized_font_name: String = val
.text()
.chars()
Expand Down
13 changes: 4 additions & 9 deletions crates/biome_diagnostics/src/display/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,16 +393,11 @@ pub(super) fn print_invisibles(

// Get the first trailing whitespace character in the string
let trailing_whitespace_index = input
.char_indices()
.bytes()
.enumerate()
.rev()
.find_map(|(index, char)| {
if !char.is_ascii_whitespace() {
Some(index)
} else {
None
}
})
.unwrap_or(input.len());
.find(|(_, byte)| !byte.is_ascii_whitespace())
.map_or(input.len(), |(index, _)| index);

let mut iter = input.char_indices().peekable();
let mut prev_char_was_whitespace = false;
Expand Down
26 changes: 13 additions & 13 deletions crates/biome_formatter/src/token/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,18 +60,18 @@ fn format_trimmed_number(text: &str) -> Cow<str> {

let text = text.to_ascii_lowercase_cow();
let mut copied_or_ignored_chars = 0usize;
let mut iter = text.chars().enumerate();
let mut iter = text.bytes().enumerate();
let mut curr = iter.next();
let mut state = IntegerPart;

// Will be filled only if and when the first place that needs reformatting is detected.
let mut cleaned_text = String::new();

// Look at only the start of the text, ignore any sign, and make sure numbers always start with a digit. Add 0 if missing.
if let Some((_, '+' | '-')) = curr {
if let Some((_, b'+' | b'-')) = curr {
curr = iter.next();
}
if let Some((curr_index, '.')) = curr {
if let Some((curr_index, b'.')) = curr {
cleaned_text.push_str(&text[copied_or_ignored_chars..curr_index]);
copied_or_ignored_chars = curr_index;
cleaned_text.push('0');
Expand All @@ -91,7 +91,7 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
dot_index,
last_non_zero_index: None,
}),
(curr_index, Some('e') | None),
(curr_index, Some(b'e') | None),
) => {
// The decimal part equals zero, ignore it completely.
// Caveat: Prettier still prints a single `.0` unless there was *only* a trailing dot.
Expand All @@ -108,7 +108,7 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
last_non_zero_index: Some(last_non_zero_index),
..
}),
(curr_index, Some('e') | None),
(curr_index, Some(b'e') | None),
) if last_non_zero_index.get() < curr_index - 1 => {
// The decimal part ends with at least one zero, ignore them but copy the part from the dot until the last non-zero.
cleaned_text.push_str(&text[copied_or_ignored_chars..=last_non_zero_index.get()]);
Expand Down Expand Up @@ -151,13 +151,13 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
// Update state after the current char
match (&state, curr) {
// Cases entering or remaining in decimal part
(_, Some((curr_index, '.'))) => {
(_, Some((curr_index, b'.'))) => {
state = DecimalPart(FormatNumberLiteralDecimalPart {
dot_index: curr_index,
last_non_zero_index: None,
});
}
(DecimalPart(decimal_part), Some((curr_index, '1'..='9'))) => {
(DecimalPart(decimal_part), Some((curr_index, b'1'..=b'9'))) => {
state = DecimalPart(FormatNumberLiteralDecimalPart {
last_non_zero_index: Some(unsafe {
// We've already entered InDecimalPart, so curr_index must be >0
Expand All @@ -167,15 +167,15 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
});
}
// Cases entering or remaining in exponent
(_, Some((curr_index, 'e'))) => {
(_, Some((curr_index, b'e'))) => {
state = Exponent(FormatNumberLiteralExponent {
e_index: curr_index,
is_negative: false,
first_digit_index: None,
first_non_zero_index: None,
});
}
(Exponent(exponent), Some((_, '-'))) => {
(Exponent(exponent), Some((_, b'-'))) => {
state = Exponent(FormatNumberLiteralExponent {
is_negative: true,
..*exponent
Expand All @@ -188,14 +188,14 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
..
},
),
Some((curr_index, curr_char @ '0'..='9')),
Some((curr_index, curr_char @ b'0'..=b'9')),
) => {
state = Exponent(FormatNumberLiteralExponent {
first_digit_index: Some(unsafe {
// We've already entered InExponent, so curr_index must be >0
NonZeroUsize::new_unchecked(curr_index)
}),
first_non_zero_index: if curr_char != '0' {
first_non_zero_index: if curr_char != b'0' {
Some(unsafe {
// We've already entered InExponent, so curr_index must be >0
NonZeroUsize::new_unchecked(curr_index)
Expand All @@ -213,7 +213,7 @@ fn format_trimmed_number(text: &str) -> Cow<str> {
..
},
),
Some((curr_index, '1'..='9')),
Some((curr_index, b'1'..=b'9')),
) => {
state = Exponent(FormatNumberLiteralExponent {
first_non_zero_index: Some(unsafe { NonZeroUsize::new_unchecked(curr_index) }),
Expand All @@ -225,7 +225,7 @@ fn format_trimmed_number(text: &str) -> Cow<str> {

// Repeat or exit
match curr {
None | Some((_, 'x') /* hex bailout */) => break,
None | Some((_, b'x') /* hex bailout */) => break,
Some(_) => curr = iter.next(),
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ impl FormatNodeRule<GraphqlStringValue> for FormatGraphqlStringValue {
let min_indent = trimmed_content
.lines()
.filter(|line| !line.trim().is_empty()) // Ignore empty lines
.map(|line| line.chars().take_while(|&c| c.is_whitespace()).count())
.map(|line| line.bytes().take_while(|b| b.is_ascii_whitespace()).count())
.min()
.unwrap_or(0);

Expand Down Expand Up @@ -73,5 +73,5 @@ impl FormatNodeRule<GraphqlStringValue> for FormatGraphqlStringValue {
}

fn is_blank(line: &str) -> bool {
line.chars().all(|c| c.is_whitespace())
line.bytes().all(|byte| byte.is_ascii_whitespace())
}
16 changes: 10 additions & 6 deletions crates/biome_grit_patterns/src/grit_built_in_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,9 @@ fn capitalize_fn<'a>(
};

let string = arg1.text(&state.files, context.language())?;
Ok(ResolvedPattern::from_string(capitalize(&string)))
Ok(ResolvedPattern::from_string(
capitalize(&string).to_string(),
))
}

fn distinct_fn<'a>(
Expand Down Expand Up @@ -379,12 +381,14 @@ fn uppercase_fn<'a>(
Ok(ResolvedPattern::from_string(string.to_uppercase()))
}

fn capitalize(s: &str) -> String {
let mut chars = s.chars();
match chars.next() {
None => String::new(),
Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
fn capitalize(s: &str) -> Cow<str> {
if let Some(first_char) = s.chars().next() {
if !first_char.is_uppercase() {
let rest = &s[first_char.len_utf8()..];
return Cow::Owned(first_char.to_ascii_uppercase().to_string() + rest);
}
}
Cow::Borrowed(s)
}

fn resolve<'a>(target_path: Cow<'a, str>, from_file: Cow<'a, str>) -> Result<String> {
Expand Down
10 changes: 5 additions & 5 deletions crates/biome_html_formatter/src/utils/children.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use biome_rowan::{SyntaxResult, TextLen, TextRange, TextSize, TokenText};

use crate::{comments::HtmlComments, context::HtmlFormatContext, HtmlFormatter};

pub(crate) static HTML_WHITESPACE_CHARS: [char; 4] = [' ', '\n', '\t', '\r'];
pub(crate) static HTML_WHITESPACE_CHARS: [u8; 4] = [b' ', b'\n', b'\t', b'\r'];

/// Meaningful HTML text is defined to be text that has either non-whitespace
/// characters, or does not contain a newline. Whitespace is defined as ASCII
Expand All @@ -29,11 +29,11 @@ pub(crate) static HTML_WHITESPACE_CHARS: [char; 4] = [' ', '\n', '\t', '\r'];
/// ```
pub fn is_meaningful_html_text(text: &str) -> bool {
let mut has_newline = false;
for c in text.chars() {
for byte in text.bytes() {
// If there is a non-whitespace character
if !HTML_WHITESPACE_CHARS.contains(&c) {
if !HTML_WHITESPACE_CHARS.contains(&byte) {
return true;
} else if c == '\n' {
} else if byte == b'\n' {
has_newline = true;
}
}
Expand Down Expand Up @@ -191,7 +191,7 @@ where
// A text only consisting of whitespace that also contains a new line isn't considered meaningful text.
// It can be entirely removed from the content without changing the semantics.
let newlines =
whitespace.chars().filter(|c| *c == '\n').count();
whitespace.bytes().filter(|b| *b == b'\n').count();

// Keep up to one blank line between tags.
// ```html
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ impl Rule for NoMultipleSpacesInRegularExpressionLiterals {
let mut range_list = vec![];
let mut previous_is_space = false;
let mut first_consecutive_space_index = 0;
// We use `char_indices` to get the byte index of every character
for (i, ch) in trimmed_text.bytes().enumerate() {
if ch == b' ' {
if !previous_is_space {
Expand Down
Loading

0 comments on commit cc6d34f

Please sign in to comment.