From 5579f9fe74ab6955e0a9e81155019c18e2310cea Mon Sep 17 00:00:00 2001 From: Victorien Elvinger Date: Mon, 21 Oct 2024 12:01:57 +0200 Subject: [PATCH] feat(organize_import): utilities for ordering import sources (#4313) --- Cargo.lock | 8 +- .../src/lint/a11y/use_generic_font_names.rs | 4 +- ...no_invalid_direction_in_linear_gradient.rs | 6 +- .../lint/correctness/no_unknown_property.rs | 4 +- .../src/lint/correctness/no_unknown_unit.rs | 8 +- .../lint/nursery/no_unknown_pseudo_class.rs | 4 +- .../lint/nursery/no_unknown_pseudo_element.rs | 4 +- .../suspicious/no_duplicate_font_names.rs | 4 +- .../no_duplicate_selectors_keyframe_block.rs | 11 +- crates/biome_css_analyze/src/utils.rs | 12 +- crates/biome_css_formatter/src/lib.rs | 2 +- .../src/utils/component_value_list.rs | 4 +- .../src/utils/string_utils.rs | 2 +- crates/biome_formatter/src/token/number.rs | 2 +- .../src/assists/source/organize_imports.rs | 2 + .../assists/source/organize_imports/util.rs | 380 ++++++++++++++++++ crates/biome_js_analyze/src/lib.rs | 2 +- .../src/lint/a11y/no_redundant_alt.rs | 9 +- .../src/lint/a11y/no_svg_without_title.rs | 10 +- .../lint/a11y/use_key_with_click_events.rs | 4 +- .../src/lint/a11y/use_media_caption.rs | 4 +- .../src/lint/suspicious/use_valid_typeof.rs | 8 +- .../expressions/bigint_literal_expression.rs | 2 +- .../src/ts/types/bigint_literal_type.rs | 2 +- crates/biome_rowan/src/token_text.rs | 6 + crates/biome_string_case/src/lib.rs | 257 +++++++++++- 26 files changed, 693 insertions(+), 68 deletions(-) create mode 100644 crates/biome_js_analyze/src/assists/source/organize_imports/util.rs diff --git a/Cargo.lock b/Cargo.lock index 7898d81ad04b..9cd4378c1f27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1760,9 +1760,9 @@ checksum = "68b0cf012f1230e43cd00ebb729c6bb58707ecfa8ad08b52ef3a4ccd2697fc30" [[package]] name = "either" -version = "1.8.1" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "elsa" @@ -2576,9 +2576,9 @@ checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] diff --git a/crates/biome_css_analyze/src/lint/a11y/use_generic_font_names.rs b/crates/biome_css_analyze/src/lint/a11y/use_generic_font_names.rs index 20e064ea2877..4ef3acdbbab8 100644 --- a/crates/biome_css_analyze/src/lint/a11y/use_generic_font_names.rs +++ b/crates/biome_css_analyze/src/lint/a11y/use_generic_font_names.rs @@ -7,7 +7,7 @@ use biome_css_syntax::{ CssGenericComponentValueList, CssGenericProperty, CssSyntaxKind, }; use biome_rowan::{AstNode, SyntaxNodeCast, TextRange}; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; use crate::utils::{ find_font_family, is_css_variable, is_font_family_keyword, is_system_family_name_keyword, @@ -78,7 +78,7 @@ impl Rule for UseGenericFontNames { fn run(ctx: &RuleContext) -> Option { let node = ctx.query(); let property_name = node.name().ok()?.text(); - let property_name = property_name.to_lowercase_cow(); + let property_name = property_name.to_ascii_lowercase_cow(); // Ignore `@font-face`. See more detail: https://drafts.csswg.org/css-fonts/#font-face-rule if is_in_font_face_at_rule(node) { diff --git a/crates/biome_css_analyze/src/lint/correctness/no_invalid_direction_in_linear_gradient.rs b/crates/biome_css_analyze/src/lint/correctness/no_invalid_direction_in_linear_gradient.rs index 72220b3a1e88..b2ba389eacf3 100644 --- a/crates/biome_css_analyze/src/lint/correctness/no_invalid_direction_in_linear_gradient.rs +++ b/crates/biome_css_analyze/src/lint/correctness/no_invalid_direction_in_linear_gradient.rs @@ -5,7 +5,7 @@ use biome_console::markup; use biome_css_syntax::{CssFunction, CssParameter}; use biome_rowan::AstNode; use biome_rowan::AstSeparatedList; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; use regex::Regex; use std::sync::LazyLock; @@ -83,7 +83,7 @@ impl Rule for NoInvalidDirectionInLinearGradient { "-o-linear-gradient", "-ms-linear-gradient", ]; - if !linear_gradient_property.contains(&node_name.to_lowercase_cow().as_ref()) { + if !linear_gradient_property.contains(&node_name.to_ascii_lowercase_cow().as_ref()) { return None; } let css_parameter = node.items(); @@ -104,7 +104,7 @@ impl Rule for NoInvalidDirectionInLinearGradient { let direction_property = ["top", "left", "bottom", "right"]; if !direction_property.iter().any(|&keyword| { first_css_parameter_text - .to_lowercase_cow() + .to_ascii_lowercase_cow() .contains(keyword) }) { return None; diff --git a/crates/biome_css_analyze/src/lint/correctness/no_unknown_property.rs b/crates/biome_css_analyze/src/lint/correctness/no_unknown_property.rs index 8270d2b744e2..dcb53cc317bf 100644 --- a/crates/biome_css_analyze/src/lint/correctness/no_unknown_property.rs +++ b/crates/biome_css_analyze/src/lint/correctness/no_unknown_property.rs @@ -4,7 +4,7 @@ use biome_analyze::{ use biome_console::markup; use biome_css_syntax::CssGenericProperty; use biome_rowan::{AstNode, TextRange}; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; use crate::utils::{is_known_properties, vendor_prefixed}; @@ -74,7 +74,7 @@ impl Rule for NoUnknownProperty { fn run(ctx: &RuleContext) -> Option { let node = ctx.query(); let property_name = node.name().ok()?.text(); - let property_name_lower = property_name.to_lowercase_cow(); + let property_name_lower = property_name.to_ascii_lowercase_cow(); if !property_name_lower.starts_with("--") // Ignore `composes` property. // See https://github.com/css-modules/css-modules/blob/master/docs/composition.md for more details. diff --git a/crates/biome_css_analyze/src/lint/correctness/no_unknown_unit.rs b/crates/biome_css_analyze/src/lint/correctness/no_unknown_unit.rs index 921bc430e6fb..6213b4fdfe4b 100644 --- a/crates/biome_css_analyze/src/lint/correctness/no_unknown_unit.rs +++ b/crates/biome_css_analyze/src/lint/correctness/no_unknown_unit.rs @@ -6,7 +6,7 @@ use biome_css_syntax::{ AnyCssDimension, CssFunction, CssGenericProperty, CssQueryFeaturePlain, CssSyntaxKind, }; use biome_rowan::{SyntaxNodeCast, TextRange}; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; const RESOLUTION_MEDIA_FEATURE_NAMES: [&str; 3] = ["resolution", "min-resolution", "max-resolution"]; @@ -111,7 +111,7 @@ impl Rule for NoUnknownUnit { .value_token() .ok()?; let function_name = - function_name_token.text_trimmed().to_lowercase_cow(); + function_name_token.text_trimmed().to_ascii_lowercase_cow(); if function_name.ends_with("image-set") { allow_x = true; @@ -127,7 +127,7 @@ impl Rule for NoUnknownUnit { .value_token() .ok()?; let property_name = - property_name_token.text_trimmed().to_lowercase_cow(); + property_name_token.text_trimmed().to_ascii_lowercase_cow(); if property_name == "image-resolution" { allow_x = true; @@ -142,7 +142,7 @@ impl Rule for NoUnknownUnit { .value_token() .ok()?; let feature_name = - feature_name_token.text_trimmed().to_lowercase_cow(); + feature_name_token.text_trimmed().to_ascii_lowercase_cow(); if RESOLUTION_MEDIA_FEATURE_NAMES.contains(&feature_name.as_ref()) { allow_x = true; diff --git a/crates/biome_css_analyze/src/lint/nursery/no_unknown_pseudo_class.rs b/crates/biome_css_analyze/src/lint/nursery/no_unknown_pseudo_class.rs index 9131516aea85..c42262b81c15 100644 --- a/crates/biome_css_analyze/src/lint/nursery/no_unknown_pseudo_class.rs +++ b/crates/biome_css_analyze/src/lint/nursery/no_unknown_pseudo_class.rs @@ -14,7 +14,7 @@ use biome_css_syntax::{ CssPseudoClassFunctionValueList, CssPseudoClassIdentifier, CssPseudoElementSelector, }; use biome_rowan::{declare_node_union, AstNode, TextRange}; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; declare_lint_rule! { /// Disallow unknown pseudo-class selectors. @@ -169,7 +169,7 @@ impl Rule for NoUnknownPseudoClass { } }; - let lower_name = name.to_lowercase_cow(); + let lower_name = name.to_ascii_lowercase_cow(); let lower_name = lower_name.as_ref(); let is_valid_class = match pseudo_type { diff --git a/crates/biome_css_analyze/src/lint/nursery/no_unknown_pseudo_element.rs b/crates/biome_css_analyze/src/lint/nursery/no_unknown_pseudo_element.rs index 7141e68d5893..169e8035ab8c 100644 --- a/crates/biome_css_analyze/src/lint/nursery/no_unknown_pseudo_element.rs +++ b/crates/biome_css_analyze/src/lint/nursery/no_unknown_pseudo_element.rs @@ -4,7 +4,7 @@ use biome_analyze::{ use biome_console::markup; use biome_css_syntax::{AnyCssPseudoElement, CssPseudoElementSelector}; use biome_rowan::AstNode; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; use crate::utils::{is_pseudo_elements, vender_prefix}; @@ -80,7 +80,7 @@ impl Rule for NoUnknownPseudoElement { }; if !vender_prefix(pseudo_element_name.as_str()).is_empty() - || is_pseudo_elements(pseudo_element_name.to_lowercase_cow().as_ref()) + || is_pseudo_elements(pseudo_element_name.to_ascii_lowercase_cow().as_ref()) { return None; } diff --git a/crates/biome_css_analyze/src/lint/suspicious/no_duplicate_font_names.rs b/crates/biome_css_analyze/src/lint/suspicious/no_duplicate_font_names.rs index 06e72cd73450..2e7c2026960a 100644 --- a/crates/biome_css_analyze/src/lint/suspicious/no_duplicate_font_names.rs +++ b/crates/biome_css_analyze/src/lint/suspicious/no_duplicate_font_names.rs @@ -6,7 +6,7 @@ use biome_analyze::{ use biome_console::markup; use biome_css_syntax::{AnyCssGenericComponentValue, AnyCssValue, CssGenericProperty}; use biome_rowan::{AstNode, TextRange}; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; use crate::utils::{find_font_family, is_font_family_keyword}; @@ -66,7 +66,7 @@ impl Rule for NoDuplicateFontNames { fn run(ctx: &RuleContext) -> Option { let node = ctx.query(); let property_name = node.name().ok()?.text(); - let property_name = property_name.to_lowercase_cow(); + let property_name = property_name.to_ascii_lowercase_cow(); let is_font_family = property_name == "font-family"; let is_font = property_name == "font"; diff --git a/crates/biome_css_analyze/src/lint/suspicious/no_duplicate_selectors_keyframe_block.rs b/crates/biome_css_analyze/src/lint/suspicious/no_duplicate_selectors_keyframe_block.rs index c9eacf76cd12..cd47d645990b 100644 --- a/crates/biome_css_analyze/src/lint/suspicious/no_duplicate_selectors_keyframe_block.rs +++ b/crates/biome_css_analyze/src/lint/suspicious/no_duplicate_selectors_keyframe_block.rs @@ -6,7 +6,7 @@ use biome_analyze::{ use biome_console::markup; use biome_css_syntax::{AnyCssKeyframesItem, AnyCssKeyframesSelector, CssKeyframesBlock}; use biome_rowan::AstNode; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; declare_lint_rule! { /// Disallow duplicate selectors within keyframe blocks. @@ -59,9 +59,12 @@ impl Rule for NoDuplicateSelectorsKeyframeBlock { match keyframe_item { AnyCssKeyframesItem::CssKeyframesItem(item) => { let keyframe_selector = item.selectors().into_iter().next()?.ok()?; - if !selector_list - .insert(keyframe_selector.text().to_lowercase_cow().to_string()) - { + if !selector_list.insert( + keyframe_selector + .text() + .to_ascii_lowercase_cow() + .to_string(), + ) { return Some(keyframe_selector); } } diff --git a/crates/biome_css_analyze/src/utils.rs b/crates/biome_css_analyze/src/utils.rs index 33ca5e959b08..d7c5c9475b55 100644 --- a/crates/biome_css_analyze/src/utils.rs +++ b/crates/biome_css_analyze/src/utils.rs @@ -15,7 +15,7 @@ use crate::keywords::{ }; use biome_css_syntax::{AnyCssGenericComponentValue, AnyCssValue, CssGenericComponentValueList}; use biome_rowan::{AstNode, SyntaxNodeCast}; -use biome_string_case::StrOnlyExtension; +use biome_string_case::{StrLikeExtension, StrOnlyExtension}; pub fn is_font_family_keyword(value: &str) -> bool { BASIC_KEYWORDS.contains(&value) || FONT_FAMILY_KEYWORDS.contains(&value) @@ -39,7 +39,7 @@ pub fn is_font_shorthand_keyword(value: &str) -> bool { } pub fn is_css_variable(value: &str) -> bool { - value.to_lowercase_cow().starts_with("var(") + value.to_ascii_lowercase_cow().starts_with("var(") } /// Get the font-families within a `font` shorthand property value. @@ -47,7 +47,7 @@ pub fn find_font_family(value: CssGenericComponentValueList) -> Vec let mut font_families: Vec = Vec::new(); for v in value { let value = v.text(); - let lower_case_value = value.to_lowercase_cow(); + let lower_case_value = value.to_ascii_lowercase_cow(); // Ignore CSS variables if is_css_variable(&lower_case_value) { @@ -112,7 +112,7 @@ pub fn find_font_family(value: CssGenericComponentValueList) -> Vec /// Check if the value is a known CSS value function. pub fn is_function_keyword(value: &str) -> bool { FUNCTION_KEYWORDS - .binary_search(&value.to_lowercase_cow().as_ref()) + .binary_search(&value.to_ascii_lowercase_cow().as_ref()) .is_ok() } @@ -180,7 +180,7 @@ pub fn vendor_prefixed(props: &str) -> bool { /// Check if the input string is a media feature name. pub fn is_media_feature_name(prop: &str) -> bool { - let input = prop.to_lowercase_cow(); + let input = prop.to_ascii_lowercase_cow(); let count = MEDIA_FEATURE_NAMES.binary_search(&input.as_ref()); if count.is_ok() { return true; @@ -224,7 +224,7 @@ fn is_custom_element(prop: &str) -> bool { /// Check if the input string is a known type selector. pub fn is_known_type_selector(prop: &str) -> bool { - let input = prop.to_lowercase_cow(); + let input = prop.to_ascii_lowercase_cow(); HTML_TAGS.binary_search(&input.as_ref()).is_ok() || SVG_TAGS.binary_search(&prop).is_ok() || MATH_ML_TAGS.binary_search(&input.as_ref()).is_ok() diff --git a/crates/biome_css_formatter/src/lib.rs b/crates/biome_css_formatter/src/lib.rs index 47d3cd82a6c1..77b5228e9cda 100644 --- a/crates/biome_css_formatter/src/lib.rs +++ b/crates/biome_css_formatter/src/lib.rs @@ -26,7 +26,7 @@ use biome_formatter::{ }; use biome_formatter::{Formatted, Printed}; use biome_rowan::{AstNode, SyntaxNode, TextRange}; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; /// Used to get an object that knows how to format this object. pub(crate) trait AsFormat { diff --git a/crates/biome_css_formatter/src/utils/component_value_list.rs b/crates/biome_css_formatter/src/utils/component_value_list.rs index 854a298e5d20..2250ab886f0d 100644 --- a/crates/biome_css_formatter/src/utils/component_value_list.rs +++ b/crates/biome_css_formatter/src/utils/component_value_list.rs @@ -2,7 +2,7 @@ use crate::comments::CssComments; use biome_css_syntax::{CssGenericDelimiter, CssGenericProperty, CssLanguage, CssSyntaxKind}; use biome_formatter::{write, CstFormatContext}; use biome_formatter::{FormatOptions, FormatResult}; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; use crate::prelude::*; use crate::CssFormatter; @@ -180,7 +180,7 @@ where .and_then(|parent| parent.name().ok()) .and_then(|name| name.as_css_identifier().map(|name| name.text())) .map_or(false, |name| { - let name = name.to_lowercase_cow(); + let name = name.to_ascii_lowercase_cow(); name.starts_with("grid-template") || name == "grid" }); diff --git a/crates/biome_css_formatter/src/utils/string_utils.rs b/crates/biome_css_formatter/src/utils/string_utils.rs index df97a8f5138f..807262f19d4c 100644 --- a/crates/biome_css_formatter/src/utils/string_utils.rs +++ b/crates/biome_css_formatter/src/utils/string_utils.rs @@ -13,7 +13,7 @@ use biome_formatter::{ Format, FormatResult, }; use biome_rowan::SyntaxToken; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; use crate::{prelude::CssFormatContext, AsFormat, CssFormatter}; diff --git a/crates/biome_formatter/src/token/number.rs b/crates/biome_formatter/src/token/number.rs index d34fd5f0ca4a..f28a44e8d0a4 100644 --- a/crates/biome_formatter/src/token/number.rs +++ b/crates/biome_formatter/src/token/number.rs @@ -1,5 +1,5 @@ use biome_rowan::{Language, SyntaxToken}; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; use std::borrow::Cow; use std::num::NonZeroUsize; diff --git a/crates/biome_js_analyze/src/assists/source/organize_imports.rs b/crates/biome_js_analyze/src/assists/source/organize_imports.rs index 9f386b53bbd1..dc325f765b26 100644 --- a/crates/biome_js_analyze/src/assists/source/organize_imports.rs +++ b/crates/biome_js_analyze/src/assists/source/organize_imports.rs @@ -16,6 +16,8 @@ use biome_rowan::{ use crate::JsRuleAction; +pub mod util; + declare_source_rule! { /// Provides a whole-source code action to sort the imports in the file /// using import groups and natural ordering. diff --git a/crates/biome_js_analyze/src/assists/source/organize_imports/util.rs b/crates/biome_js_analyze/src/assists/source/organize_imports/util.rs new file mode 100644 index 000000000000..902a590cef4e --- /dev/null +++ b/crates/biome_js_analyze/src/assists/source/organize_imports/util.rs @@ -0,0 +1,380 @@ +use std::{ + cmp::Ordering, + path::{Component, Path}, +}; + +use biome_string_case::AsciiCollator; + +/// Type for comparing two import sources. +/// Import sources are first grouped and ordered by [ImportSourceKind]. +/// +/// [ImportSourceKind::Path] are ordered according to their proxoimity of the importing module. +/// For instance, the following order holds: `/` < `../..` < `..` < `.` +/// +/// Other kinds are ordered with a natural string order tailored for paths. +/// See [ImportSourceAsciiCollator] for more details. +/// +/// ``` +/// use biome_js_analyze::assists::source::organize_imports::util::ImportSource; +/// +/// assert!(ImportSource::from("https://example.org") < ImportSource::from("bun:test")); +/// assert!(ImportSource::from("node:test") < ImportSource::from("@scope/package")); +/// assert!(ImportSource::from("@scope/package") < ImportSource::from("package")); +/// assert!(ImportSource::from("package") < ImportSource::from("@/alias")); +/// assert!(ImportSource::from("@/alias") < ImportSource::from("/path")); +/// assert!(ImportSource::from("../..") < ImportSource::from("..")); +/// assert!(ImportSource::from("..") < ImportSource::from(".")); +/// assert!(ImportSource::from("./path9") < ImportSource::from("./path10")); +/// assert!(ImportSource::from("./path/a") < ImportSource::from("./path-a")); +/// ``` +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct ImportSource { + kind: ImportSourceKind, + inner: T, +} +impl ImportSource { + pub fn kind(&self) -> ImportSourceKind { + self.kind + } +} +impl> From for ImportSource { + fn from(inner: T) -> Self { + Self { + kind: ImportSourceKind::from_source(inner.as_ref()), + inner, + } + } +} +impl + Eq> PartialOrd for ImportSource { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl + Eq> Ord for ImportSource { + fn cmp(&self, other: &Self) -> Ordering { + match self.kind.cmp(&other.kind()) { + Ordering::Equal => { + if self.kind == ImportSourceKind::Path { + PathComponents::from(Path::new(self.inner.as_ref())) + .cmp(PathComponents::from(Path::new(other.inner.as_ref()))) + // [PathComponents] normalizes paths. + // This lead to a partial order bwteen edge cases such as `./..` and `..`. + // To obtain a totak order, we apply a string order when the normalized paths are equal. + .then_with(|| { + ImportSourceAsciiCollator + .cmp_str(self.inner.as_ref(), other.inner.as_ref()) + }) + } else { + ImportSourceAsciiCollator.cmp_str(self.inner.as_ref(), other.inner.as_ref()) + } + } + result => result, + } + } +} + +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum ImportSourceKind { + Unknown, + // `https://example.org` + Url, + /// `node:test`, `npm:@scope/package` + ProtocolPackage, + /// `package` + Package, + /// Import sources that start with `@/`, `#`, `~`, or `%` + /// Node.js subpath imports and TypeScript aliases. + Alias, + /// Import sources that start with `/`, `./`, or `../` + Path, +} +impl ImportSourceKind { + pub fn from_source(import_source: &str) -> Self { + let mut iter = import_source.bytes(); + match iter.next() { + Some(b'@') => { + match iter.next() { + Some(b'/') | None => { + // TypeScript conventional path aliases + Self::Alias + } + Some(b'a'..=b'z' | b'0'..=b'9' | b'-') => Self::Package, + _ => Self::Unknown, + } + } + // Node.js subpath imports + Some(b'#' | b'~' | b'%') => Self::Alias, + Some(b'/') => Self::Path, + Some(b'.') => match iter.next() { + Some(b'.') => { + if matches!(iter.next(), None | Some(b'/')) { + Self::Path + } else { + Self::Unknown + } + } + None | Some(b'/') => Self::Path, + Some(_) => Self::Unknown, + }, + Some(b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_') => { + loop { + match iter.next() { + Some(b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.') => {} + Some(b':') => { + // Protocol + return match iter.next() { + Some(b'/') => Self::Url, + Some(b'@' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_') => { + Self::ProtocolPackage + } + _ => Self::Unknown, + }; + } + None | Some(b'/') => { + return Self::Package; + } + Some(_) => { + return Self::Unknown; + } + } + } + } + _ => Self::Unknown, + } + } +} + +/// This type is analog to [std::path::Component] with the following changes: +/// - [PathComponent::ParentDir] may represent several [Component::ParentDir] at once. +/// - [PathComponent::ParentDir] is ordered before [PathComponent::CurDir] +/// - Order between two [PathComponent::Normal] relies on [ImportSourceAsciiCollator] +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +enum PathComponent<'a> { + /// See [Component::Prefix] + Prefix(std::path::PrefixComponent<'a>), + /// See [Component::RootDir] + RootDir, + /// See [Component::ParentDir] + ParentDir(usize), + /// See [Component::CurDir] + CurDir, + /// See [Component::Normal] + Normal(&'a std::ffi::OsStr), +} +impl<'a> PartialOrd for PathComponent<'a> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl<'a> Ord for PathComponent<'a> { + fn cmp(&self, other: &Self) -> Ordering { + match (self, other) { + (PathComponent::RootDir, PathComponent::RootDir) + | (PathComponent::CurDir, PathComponent::CurDir) => Ordering::Equal, + (PathComponent::ParentDir(n1), PathComponent::ParentDir(n2)) => n1.cmp(n2).reverse(), + (PathComponent::Prefix(c1), PathComponent::Prefix(c2)) => c1.cmp(c2), + ( + PathComponent::Prefix(_), + PathComponent::RootDir + | PathComponent::ParentDir(_) + | PathComponent::CurDir + | PathComponent::Normal(_), + ) => Ordering::Less, + ( + PathComponent::RootDir + | PathComponent::ParentDir(_) + | PathComponent::CurDir + | PathComponent::Normal(_), + PathComponent::Prefix(_), + ) => Ordering::Greater, + ( + PathComponent::RootDir, + PathComponent::CurDir | PathComponent::ParentDir(_) | PathComponent::Normal(_), + ) => Ordering::Less, + (PathComponent::ParentDir(_), PathComponent::CurDir | PathComponent::Normal(_)) => { + Ordering::Less + } + (PathComponent::CurDir | PathComponent::Normal(_), PathComponent::ParentDir(_)) => { + Ordering::Greater + } + ( + PathComponent::CurDir | PathComponent::ParentDir(_) | PathComponent::Normal(_), + PathComponent::RootDir, + ) => Ordering::Greater, + (PathComponent::CurDir, PathComponent::Normal(_)) => Ordering::Less, + (PathComponent::Normal(_), PathComponent::CurDir) => Ordering::Greater, + (PathComponent::Normal(s1), PathComponent::Normal(s2)) => { + ImportSourceAsciiCollator.cmp_osstr(s1, s2) + } + } + } +} + +/// This type is analog to [std::path::Components] with the following changes: +/// - The iterator yields [PathComponent] instead of [std::path::Component]. +/// - Consecutive parent directories such as `../..` yields a single [PathComponent::ParentDir] with +/// the count of parent directories. +/// - `./..` is normalized to `..` +struct PathComponents<'a> { + inner: std::path::Components<'a>, +} +impl<'a> From<&'a Path> for PathComponents<'a> { + fn from(path: &'a Path) -> Self { + Self { + inner: path.components(), + } + } +} +impl<'a> Iterator for PathComponents<'a> { + type Item = PathComponent<'a>; + + fn next(&mut self) -> Option { + Some(match self.inner.next()? { + Component::Prefix(c) => PathComponent::Prefix(c), + Component::RootDir => PathComponent::RootDir, + Component::ParentDir => { + let count = self + .inner + .clone() + .take_while(|c| matches!(c, Component::ParentDir)) + .count(); + for _ in 1..=count { + self.inner.next(); + } + PathComponent::ParentDir(count + 1) + } + Component::CurDir => { + // Normalize `./../..` to `../..`. + // Note that [std::path::Components] already normalizes `.././..` to `../..` + let parent_dir_count = self + .inner + .clone() + .take_while(|c| matches!(c, Component::ParentDir)) + .count(); + if parent_dir_count == 0 { + PathComponent::CurDir + } else { + for _ in 1..=parent_dir_count { + self.inner.next(); + } + PathComponent::ParentDir(parent_dir_count) + } + } + Component::Normal(s) => PathComponent::Normal(s), + }) + } +} + +/// Custom collation order to get a natural order between import sources. +/// +/// Non-printable characters and alphanumeric characters have the same order as [biome_string_case::CldrAsciiCollator]. +/// Others are ordered differently, but still between non-printable characters and alphanumeric characters. +pub struct ImportSourceAsciiCollator; +impl ImportSourceAsciiCollator { + const COLLATION: [u8; 128] = [ + b'\0', 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x7f, b'\t', b'\n', + 0x0b, 0x0c, b'\r', b'/', b'\\', b'?', b'#', b'=', b'&', b';', b',', b'@', b':', b'.', b' ', + b'_', b'-', b'+', b'*', b'!', b'%', b'$', b'(', b')', b'[', b']', b'{', b'}', b'<', b'>', + b'|', b'^', b'~', b'\'', b'"', b'`', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', + b'9', b'A', b'a', b'B', b'b', b'C', b'c', b'D', b'd', b'E', b'e', b'F', b'f', b'G', b'g', + b'H', b'h', b'I', b'i', b'J', b'j', b'K', b'k', b'L', b'l', b'M', b'm', b'N', b'n', b'O', + b'o', b'P', b'p', b'Q', b'q', b'R', b'r', b'S', b's', b'T', b't', b'U', b'u', b'V', b'v', + b'W', b'w', b'X', b'x', b'Y', b'y', b'Z', b'z', + ]; +} +impl biome_string_case::AsciiCollator for ImportSourceAsciiCollator { + const WEIGHTS: [u8; 256] = biome_string_case::ascii_collation_weight_from(&Self::COLLATION); +} + +#[cfg(test)] +mod test { + use biome_string_case::AsciiCollator; + + use super::*; + + #[test] + fn import_source_kind() { + assert_eq!( + ImportSourceKind::from_source("/absolute/path"), + ImportSourceKind::Path + ); + } + + #[test] + fn import_source_ascii_collator() { + let sorted = &[ + "/", + "@scope/package/path", + "@scope/package@>=1.0/path", + "@scope/package@^1.0/path", + "@scope/package@~1.0/path", + "@scope/package@1.0/path", + "./", + "../", + "a.js", + "a a.js", + "a_a.js", + "a-a.js", + "a+a.js", + "a*a.js", + "https://example.org/path?prop=val", + "https://example.org/path?prop-1=val", + "https://example.org/path?prop-1=val#frag", + "https://example.org/path?prop-1=val&prop2=val", + "https://example.org/path?prop-1=val-1&prop-2=val-2", + "https://example.org/path#frag", + "https://example.org/path-a", + ]; + for items in sorted.windows(2) { + let (x, y) = (items[0], items[1]); + assert_eq!( + ImportSourceAsciiCollator.cmp_str(x, y), + Ordering::Less, + "'{x}' < '{y}'" + ); + } + } + + #[test] + fn test_cmp_path() { + let cmp_path = + |p1: &Path, p2: &Path| PathComponents::from(p1).cmp(PathComponents::from(p2)); + assert_eq!(cmp_path(Path::new("/"), Path::new("..")), Ordering::Less); + assert_eq!(cmp_path(Path::new(".."), Path::new(".")), Ordering::Less); + assert_eq!(cmp_path(Path::new("."), Path::new("test")), Ordering::Less); + } + + #[test] + fn test_import_source_cmp() { + let sorted = [ + ImportSource::from("https://example.org/path?prop=val"), + ImportSource::from("https://example.org/path?prop-1=val"), + ImportSource::from("bun:test"), + ImportSource::from("node:test"), + ImportSource::from("npm:@scope/package/path"), + ImportSource::from("npm:@scope/package@>=1.0/path"), + ImportSource::from("npm:@scope/package@^1.0/path"), + ImportSource::from("npm:@scope/package@~1.0/path"), + ImportSource::from("npm:@scope/package@1.0/path"), + ImportSource::from("npm:package/path"), + ImportSource::from("npm:package@>=1.0/path"), + ImportSource::from("npm:package@^1.0/path"), + ImportSource::from("npm:package@~1.0/path"), + ImportSource::from("npm:package@1.0/path"), + ImportSource::from("#internal"), + ImportSource::from("@/internal"), + ImportSource::from("%/internal"), + ImportSource::from("~/internal"), + ImportSource::from("/"), + ImportSource::from(".././.."), + ImportSource::from("../.."), + ImportSource::from(".."), + ImportSource::from("."), + ]; + for items in sorted.windows(2) { + let (x, y) = (&items[0], &items[1]); + assert!(x < y, "'{:?}' < '{:?}'", x, y); + } + } +} diff --git a/crates/biome_js_analyze/src/lib.rs b/crates/biome_js_analyze/src/lib.rs index 9c0866e4de42..ba9290bfb340 100644 --- a/crates/biome_js_analyze/src/lib.rs +++ b/crates/biome_js_analyze/src/lib.rs @@ -14,7 +14,7 @@ use biome_suppression::{parse_suppression_comment, SuppressionDiagnostic}; use std::ops::Deref; use std::sync::{Arc, LazyLock}; -mod assists; +pub mod assists; mod ast_utils; pub mod globals; pub mod lint; diff --git a/crates/biome_js_analyze/src/lint/a11y/no_redundant_alt.rs b/crates/biome_js_analyze/src/lint/a11y/no_redundant_alt.rs index 86209ecd8b8a..e378cd21d513 100644 --- a/crates/biome_js_analyze/src/lint/a11y/no_redundant_alt.rs +++ b/crates/biome_js_analyze/src/lint/a11y/no_redundant_alt.rs @@ -6,7 +6,7 @@ use biome_js_syntax::{ AnyJsExpression, AnyJsLiteralExpression, AnyJsTemplateElement, AnyJsxAttributeValue, }; use biome_rowan::AstNode; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; declare_lint_rule! { /// Enforce `img` alt prop does not contain the word "image", "picture", or "photo". @@ -143,7 +143,8 @@ impl Rule for NoRedundantAlt { const REDUNDANT_WORDS: [&str; 3] = ["image", "photo", "picture"]; fn is_redundant_alt(alt: &str) -> bool { - REDUNDANT_WORDS - .into_iter() - .any(|word| alt.split_whitespace().any(|x| x.to_lowercase_cow() == word)) + REDUNDANT_WORDS.into_iter().any(|word| { + alt.split_whitespace() + .any(|x| x.to_ascii_lowercase_cow() == word) + }) } diff --git a/crates/biome_js_analyze/src/lint/a11y/no_svg_without_title.rs b/crates/biome_js_analyze/src/lint/a11y/no_svg_without_title.rs index 2608f97d49e3..55d4486b55a7 100644 --- a/crates/biome_js_analyze/src/lint/a11y/no_svg_without_title.rs +++ b/crates/biome_js_analyze/src/lint/a11y/no_svg_without_title.rs @@ -1,10 +1,8 @@ -use std::borrow::Cow; - use biome_analyze::{context::RuleContext, declare_lint_rule, Ast, Rule, RuleDiagnostic}; use biome_console::markup; use biome_js_syntax::{jsx_ext::AnyJsxElement, JsxAttribute, JsxChildList, JsxElement}; use biome_rowan::{AstNode, AstNodeList}; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; declare_lint_rule! { /// Enforces the usage of the `title` element for the `svg` element. @@ -152,8 +150,8 @@ impl Rule for NoSvgWithoutTitle { return Some(()); }; - match role_attribute_text.to_lowercase_cow() { - Cow::Borrowed("img") => { + match role_attribute_text.to_ascii_lowercase_cow().as_ref() { + "img" => { let [aria_label, aria_labelledby] = node .attributes() .find_by_names(["aria-label", "aria-labelledby"]); @@ -166,7 +164,7 @@ impl Rule for NoSvgWithoutTitle { Some(()) } // if role attribute is empty, the svg element should have title element - Cow::Borrowed("") => Some(()), + "" => Some(()), _ => None, } } diff --git a/crates/biome_js_analyze/src/lint/a11y/use_key_with_click_events.rs b/crates/biome_js_analyze/src/lint/a11y/use_key_with_click_events.rs index 9de4c03e05e1..53da1f1412a2 100644 --- a/crates/biome_js_analyze/src/lint/a11y/use_key_with_click_events.rs +++ b/crates/biome_js_analyze/src/lint/a11y/use_key_with_click_events.rs @@ -6,7 +6,7 @@ use biome_analyze::{ use biome_console::markup; use biome_js_syntax::{jsx_ext::AnyJsxElement, AnyJsxAttribute, AnyJsxElementName}; use biome_rowan::AstNode; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; declare_lint_rule! { /// Enforce onClick is accompanied by at least one of the following: `onKeyUp`, `onKeyDown`, `onKeyPress`. @@ -78,7 +78,7 @@ impl Rule for UseKeyWithClickEvents { match element.name() { Ok(AnyJsxElementName::JsxName(name)) => { let name_token = name.value_token().ok()?; - let element_name = name_token.text_trimmed().to_lowercase_cow(); + let element_name = name_token.text_trimmed().to_ascii_lowercase_cow(); // Don't handle interactive roles // TODO Support aria roles https://github.com/rome/tools/issues/3640 diff --git a/crates/biome_js_analyze/src/lint/a11y/use_media_caption.rs b/crates/biome_js_analyze/src/lint/a11y/use_media_caption.rs index 0981e535d0ff..f70c39444cbd 100644 --- a/crates/biome_js_analyze/src/lint/a11y/use_media_caption.rs +++ b/crates/biome_js_analyze/src/lint/a11y/use_media_caption.rs @@ -4,7 +4,7 @@ use biome_console::markup; use biome_js_syntax::jsx_ext::AnyJsxElement; use biome_js_syntax::{AnyJsxChild, JsxElement, TextRange}; use biome_rowan::AstNode; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; declare_lint_rule! { /// Enforces that `audio` and `video` elements must have a `track` for captions. @@ -87,7 +87,7 @@ impl Rule for UseMediaCaption { .as_jsx_string()? .inner_string_text() .ok()? - .to_lowercase_cow() + .to_ascii_lowercase_cow() == "captions"; Some(has_track && has_valid_kind) diff --git a/crates/biome_js_analyze/src/lint/suspicious/use_valid_typeof.rs b/crates/biome_js_analyze/src/lint/suspicious/use_valid_typeof.rs index a75dbadf88b3..9aca9087cf65 100644 --- a/crates/biome_js_analyze/src/lint/suspicious/use_valid_typeof.rs +++ b/crates/biome_js_analyze/src/lint/suspicious/use_valid_typeof.rs @@ -9,7 +9,7 @@ use biome_js_syntax::{ JsBinaryOperator, JsUnaryOperator, TextRange, }; use biome_rowan::{AstNode, BatchMutationExt}; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; use crate::JsRuleAction; @@ -129,14 +129,14 @@ impl Rule for UseValidTypeof { .text_trimmed() .trim_start_matches(['"', '\'']) .trim_end_matches(['"', '\'']) - .to_lowercase_cow(); + .to_ascii_lowercase_cow(); if JsTypeName::from_str(&literal).is_some() { return None; } // Try to fix the casing of the literal eg. "String" -> "string" - let suggestion = literal.to_lowercase_cow(); + let suggestion = literal.to_ascii_lowercase_cow(); return Some(( TypeofError::InvalidLiteral(range, literal.to_string()), JsTypeName::from_str(&suggestion).map(|type_name| (lit.clone(), type_name)), @@ -180,7 +180,7 @@ impl Rule for UseValidTypeof { let suggestion = ident.name().ok().and_then(|name| { let value = name.value_token().ok()?; - let to_lower = value.text_trimmed().to_lowercase_cow(); + let to_lower = value.text_trimmed().to_ascii_lowercase_cow(); let as_type = JsTypeName::from_str(&to_lower)?; Some((id.clone(), as_type)) diff --git a/crates/biome_js_formatter/src/js/expressions/bigint_literal_expression.rs b/crates/biome_js_formatter/src/js/expressions/bigint_literal_expression.rs index 72060b4e5303..e1cdc00cca63 100644 --- a/crates/biome_js_formatter/src/js/expressions/bigint_literal_expression.rs +++ b/crates/biome_js_formatter/src/js/expressions/bigint_literal_expression.rs @@ -4,7 +4,7 @@ use biome_formatter::write; use biome_js_syntax::parentheses::NeedsParentheses; use biome_js_syntax::JsBigintLiteralExpression; use biome_js_syntax::JsBigintLiteralExpressionFields; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; use std::borrow::Cow; #[derive(Debug, Clone, Default)] diff --git a/crates/biome_js_formatter/src/ts/types/bigint_literal_type.rs b/crates/biome_js_formatter/src/ts/types/bigint_literal_type.rs index 8d5021144efc..911fb332ea4b 100644 --- a/crates/biome_js_formatter/src/ts/types/bigint_literal_type.rs +++ b/crates/biome_js_formatter/src/ts/types/bigint_literal_type.rs @@ -4,7 +4,7 @@ use crate::prelude::*; use biome_formatter::write; use biome_js_syntax::{TsBigintLiteralType, TsBigintLiteralTypeFields}; -use biome_string_case::StrOnlyExtension; +use biome_string_case::StrLikeExtension; #[derive(Debug, Clone, Default)] pub struct FormatTsBigintLiteralType; diff --git a/crates/biome_rowan/src/token_text.rs b/crates/biome_rowan/src/token_text.rs index c58755a6ad92..9f044e709878 100644 --- a/crates/biome_rowan/src/token_text.rs +++ b/crates/biome_rowan/src/token_text.rs @@ -94,6 +94,12 @@ impl PartialEq for &'_ str { } } +impl AsRef for TokenText { + fn as_ref(&self) -> &str { + self.text() + } +} + impl Borrow for TokenText { fn borrow(&self) -> &str { self.text() diff --git a/crates/biome_string_case/src/lib.rs b/crates/biome_string_case/src/lib.rs index adcad87c8654..8b802cfad32b 100644 --- a/crates/biome_string_case/src/lib.rs +++ b/crates/biome_string_case/src/lib.rs @@ -1,10 +1,13 @@ //! Identify string case and convert to various string cases. -use std::borrow::Cow; +use std::{borrow::Cow, cmp::Ordering, ffi::OsStr}; -/// Represents the [Case] of a string. +// Include the file generated by `../build.rs` +//include!(concat!(env!("OUT_DIR"), "/ascii_collation.rs")); + +/// Represents the case of a string. /// -/// Note that some cases are superset of others. +/// Note that some cases are supersets of others. /// For example, a name in [Case::Lower] is also in [Case::Camel], [Case::Kebab] , and [Case::Snake]. /// Thus [Case::Camel], [Case::Kebab], and [Case::Snake] are superset of [Case::Lower]. /// `Case::Unknown` is a superset of all [Case]. @@ -399,7 +402,7 @@ impl Iterator for CasesIterator { (0, Some(6)) } } -impl std::iter::FusedIterator for CasesIterator {} +impl core::iter::FusedIterator for CasesIterator {} const LEADING_BIT_INDEX_TO_CASE: [Case; 11] = [ Case::Number, @@ -415,25 +418,211 @@ const LEADING_BIT_INDEX_TO_CASE: [Case; 11] = [ Case::Unknown, ]; +/// A collator defines an order between a set of characters. +/// +/// This order may differ from their binary order. +/// This is often used to provide a more natural order for humans than the binary order represents. +pub trait Collator { + type Char: PartialEq; + + /// Returns the weight of the character `c`. + /// + /// A character with a smaller weight than another one, is placed before in the collation order. + fn weight(&self, c: &Self::Char) -> impl Ord; + + /// Returns the ASCII digit if `c` is a numeric character. + /// + /// This allows the collator to compare numbers i na human way (e.g. `9` < `10`). + fn as_ascii_digit(&self, c: &Self::Char) -> Option; + + /// Returns an [Ordering] between `iter1` and `iter2`. + fn cmp( + &self, + iter1: impl IntoIterator, + iter2: impl IntoIterator, + ) -> Ordering { + let mut iter1 = iter1.into_iter(); + let mut iter2 = iter2.into_iter(); + loop { + match (iter1.next(), iter2.next()) { + (Some(c1), Some(c2)) if c1 == c2 => {} + (Some(mut c1), Some(mut c2)) => { + if let (Some(n1), Some(n2)) = + (self.as_ascii_digit(&c1), self.as_ascii_digit(&c2)) + { + // Compare numbers + // We don't skip leading zeroes. + let mut number_ordering = n1.cmp(&n2); + loop { + match (iter1.next(), iter2.next()) { + (None, None) => { + return number_ordering; + } + (None, Some(_)) => { + return Ordering::Less; + } + (Some(_), None) => { + return Ordering::Greater; + } + (Some(next1), Some(next2)) => { + c1 = next1; + c2 = next2; + } + } + match (self.as_ascii_digit(&c1), self.as_ascii_digit(&c2)) { + (Some(n1), Some(_n2)) => { + number_ordering = number_ordering.then(n1.cmp(&n2)); + } + (Some(_), None) => { + return Ordering::Greater; + } + (None, Some(_)) => { + return Ordering::Less; + } + (None, None) => match number_ordering { + Ordering::Equal => { + break; + } + ordering => { + return ordering; + } + }, + } + } + } + match self.weight(&c1).cmp(&self.weight(&c2)) { + Ordering::Equal => {} + ordering => { + return ordering; + } + } + } + (None, Some(_)) => { + return Ordering::Less; + } + (None, None) => { + return Ordering::Equal; + } + (Some(_), None) => { + return Ordering::Greater; + } + } + } + } +} + +/// An Ascii collator defines an order between ASCII characters. +/// +/// The order is extended at any byte value. +/// This order may differ from their binary order. +/// This is often used to provide a more natural order for humans than the binary order represents. +pub trait AsciiCollator { + /// Weight of a given byte. + /// The order between two bytes is defined by the order between their weights. + /// Usually a byte that is not a valid ASCII character is mapped to itself. + /// You may use [ascii_collation_weight_from] to create the weight table from the an ASCII collation table. + const WEIGHTS: [u8; 256]; + + /// Compare `s1` and `s2` using [self] as collator. + fn cmp_str(&self, s1: &str, s2: &str) -> Ordering + where + Self: Collator, + { + self.cmp(s1.bytes(), s2.bytes()) + } + + /// Compare `s1` and `s2` using [self] as collator. + fn cmp_osstr(&self, s1: &OsStr, s2: &OsStr) -> Ordering + where + Self: Collator, + { + self.cmp_bytes(s1.as_encoded_bytes(), s2.as_encoded_bytes()) + } + + /// Compare `s1` and `s2` using [self] as collator. + fn cmp_bytes(&self, s1: &[u8], s2: &[u8]) -> Ordering + where + Self: Collator, + { + self.cmp(s1.iter().copied(), s2.iter().copied()) + } +} +impl Collator for C { + type Char = u8; + + fn weight(&self, c: &Self::Char) -> impl Ord { + // SAFETY: safe indexing because [Self::WEIGHTS] has exactly `u8::MAX` items. + unsafe { *Self::WEIGHTS.get_unchecked(*c as usize) } + } + + fn as_ascii_digit(&self, c: &Self::Char) -> Option { + c.is_ascii_digit().then_some(*c) + } +} + +/// Unicode collation for ASCII extracted from the CLDR (Common Locale Data Repository) root table. +pub struct CldrAsciiCollator; +impl CldrAsciiCollator { + /// Unicode collation for ASCII extracted from the CLDR root table: + /// . + /// See also . + const COLLATION: [u8; 128] = [ + b'\0', 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x7f, b'\t', b'\n', + 0x0b, 0x0c, b'\r', b' ', b'_', b'-', b',', b';', b':', b'!', b'?', b'.', b'\'', b'"', b'(', + b')', b'[', b']', b'{', b'}', b'@', b'*', b'/', b'\\', b'&', b'#', b'%', b'`', b'^', b'+', + b'<', b'=', b'>', b'|', b'~', b'$', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', + b'9', b'A', b'a', b'B', b'b', b'C', b'c', b'D', b'd', b'E', b'e', b'F', b'f', b'G', b'g', + b'H', b'h', b'I', b'i', b'J', b'j', b'K', b'k', b'L', b'l', b'M', b'm', b'N', b'n', b'O', + b'o', b'P', b'p', b'Q', b'q', b'R', b'r', b'S', b's', b'T', b't', b'U', b'u', b'V', b'v', + b'W', b'w', b'X', b'x', b'Y', b'y', b'Z', b'z', + ]; +} +impl AsciiCollator for CldrAsciiCollator { + const WEIGHTS: [u8; 256] = ascii_collation_weight_from(&Self::COLLATION); +} + +/// Generate the collation weight table from an ASCII collation table. +/// The last 128 bytes are mapped to themselves. +pub const fn ascii_collation_weight_from(collation_table: &[u8; 128]) -> [u8; 256] { + let mut result = [0u8; 256]; + let mut i = 0; + while i < collation_table.len() { + debug_assert!( + result[collation_table[i] as usize] == 0, + "A character appears twice in the collation table." + ); + result[collation_table[i] as usize] = i as u8; + i += 1; + } + while i < result.len() { + result[i] = i as u8; + i += 1; + } + result +} + pub trait StrLikeExtension: ToOwned { /// Returns the same value as String::to_lowercase. The only difference /// is that this functions returns ```Cow``` and does not allocate /// if the string is already in lowercase. - fn to_ascii_lowercase_cow(&self) -> std::borrow::Cow; + fn to_ascii_lowercase_cow(&self) -> Cow; + + /// Compare two strings using a natural ASCII order. + /// + /// Uppercase letters come first (e.g. `A` < `a` < `B` < `b`) + /// and number are compared in a human way (e.g. `9` < `10`). + fn ascii_nat_cmp(&self, other: &Self) -> Ordering; } pub trait StrOnlyExtension: ToOwned { /// Returns the same value as String::to_lowercase. The only difference /// is that this functions returns ```Cow``` and does not allocate /// if the string is already in lowercase. - fn to_lowercase_cow(&self) -> std::borrow::Cow; - /// Returns the same value as String::to_lowercase. The only difference - /// is that this functions returns ```Cow``` and does not allocate - /// if the string is already in lowercase. - fn to_ascii_lowercase_cow(&self) -> std::borrow::Cow; + fn to_lowercase_cow(&self) -> Cow; } -impl StrOnlyExtension for str { +impl StrLikeExtension for str { fn to_ascii_lowercase_cow(&self) -> Cow { let has_ascii_uppercase = self.bytes().any(|b| b.is_ascii_uppercase()); if has_ascii_uppercase { @@ -444,6 +633,12 @@ impl StrOnlyExtension for str { } } + fn ascii_nat_cmp(&self, other: &Self) -> Ordering { + self.as_bytes().ascii_nat_cmp(other.as_bytes()) + } +} + +impl StrOnlyExtension for str { fn to_lowercase_cow(&self) -> Cow { let has_uppercase = self.chars().any(char::is_uppercase); if has_uppercase { @@ -468,6 +663,11 @@ impl StrLikeExtension for std::ffi::OsStr { Cow::Borrowed(self) } } + + fn ascii_nat_cmp(&self, other: &Self) -> Ordering { + self.as_encoded_bytes() + .ascii_nat_cmp(other.as_encoded_bytes()) + } } impl StrLikeExtension for [u8] { @@ -479,6 +679,10 @@ impl StrLikeExtension for [u8] { Cow::Borrowed(self) } } + + fn ascii_nat_cmp(&self, other: &Self) -> Ordering { + CldrAsciiCollator.cmp(self.iter().copied(), other.iter().copied()) + } } // TODO. Once trait-alias are stabilized it would be enough to `use` this trait instead of individual ones. @@ -488,6 +692,7 @@ impl StrExtension for T {} #[cfg(test)] mod tests { + use core::cmp::Ordering; use std::ffi::OsStr; use super::*; @@ -921,4 +1126,34 @@ mod tests { assert!(matches!("tešt".to_lowercase_cow(), Cow::Borrowed(_))); } + + #[test] + fn collation_weight_unique() { + for weight in 0..=255 { + assert!(CldrAsciiCollator::WEIGHTS.contains(&weight)); + } + } + + #[test] + fn ascii_nat_ord() { + assert_eq!("".ascii_nat_cmp(""), Ordering::Equal); + assert_eq!("a".ascii_nat_cmp(""), Ordering::Greater); + assert_eq!("".ascii_nat_cmp("a"), Ordering::Less); + + assert_eq!("ab".ascii_nat_cmp("ab"), Ordering::Equal); + assert_eq!("abc".ascii_nat_cmp("ab"), Ordering::Greater); + assert_eq!("ab".ascii_nat_cmp("abc"), Ordering::Less); + + assert_eq!("A".ascii_nat_cmp("a"), Ordering::Less); + assert_eq!("a".ascii_nat_cmp("B"), Ordering::Less); + + assert_eq!("9".ascii_nat_cmp("10"), Ordering::Less); + assert_eq!("10".ascii_nat_cmp("10"), Ordering::Equal); + assert_eq!("10".ascii_nat_cmp("9"), Ordering::Greater); + assert_eq!("09".ascii_nat_cmp("10"), Ordering::Less); + assert_eq!("a00".ascii_nat_cmp("a01"), Ordering::Less); + assert_eq!("a00b".ascii_nat_cmp("a01b"), Ordering::Less); + + assert_eq!("a10".ascii_nat_cmp("a009"), Ordering::Less); + } }