From 0dc228c09b22fd57804f8bc426edb3402b43a759 Mon Sep 17 00:00:00 2001 From: jedel1043 Date: Mon, 6 Jun 2022 18:10:02 -0500 Subject: [PATCH 01/20] Implement new `Intl` API --- Cargo.lock | 283 +++++-- boa_engine/Cargo.toml | 22 +- boa_engine/src/builtins/intl/locale/mod.rs | 20 + boa_engine/src/builtins/intl/locale/tests.rs | 191 +++++ boa_engine/src/builtins/intl/locale/utils.rs | 361 +++++++++ boa_engine/src/builtins/intl/mod.rs | 778 +------------------ boa_engine/src/builtins/intl/options.rs | 202 +++++ boa_engine/src/builtins/intl/tests.rs | 602 ++------------ boa_engine/src/context/icu.rs | 124 +-- boa_engine/src/context/mod.rs | 52 +- boa_engine/src/string/mod.rs | 10 + 11 files changed, 1183 insertions(+), 1462 deletions(-) create mode 100644 boa_engine/src/builtins/intl/locale/mod.rs create mode 100644 boa_engine/src/builtins/intl/locale/tests.rs create mode 100644 boa_engine/src/builtins/intl/locale/utils.rs create mode 100644 boa_engine/src/builtins/intl/options.rs diff --git a/Cargo.lock b/Cargo.lock index a7c9fd82276..93e331cacee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -134,11 +134,14 @@ dependencies = [ "dyn-clone", "fast-float", "float-cmp", + "icu_calendar", + "icu_collator", "icu_datetime", - "icu_locale_canonicalizer", "icu_locid", + "icu_locid_transform", "icu_plurals", "icu_provider", + "icu_provider_adapters", "icu_testdata", "indexmap", "jemallocator", @@ -703,9 +706,9 @@ dependencies = [ [[package]] name = "fixed_decimal" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa523feb405bd9fd25daafb500a48112156a8c15860d18451190e8701500e863" +checksum = "ceab5dbd7a3a08385160503f6b6636420dbd72d29babdf8c894d4534304191c3" dependencies = [ "displaydoc", "smallvec", @@ -818,117 +821,209 @@ dependencies = [ [[package]] name = "icu_calendar" -version = "0.6.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c0d5e4d859bd991761ead59f6c8745d3cf3443c1c6c9bf6c5131b3ac8cf3c09" +checksum = "81b686a9fbeca17bfa11b5b9864d840d8f2dc5abd80bef562486a6005f62c248" dependencies = [ "displaydoc", "icu_locid", "icu_provider", "serde", "tinystr", - "zerovec 0.7.0", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_collator" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9ca263b27af1ab7192837ebe4e5389d389d0c249885f65c9b330decfc3e4abb" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid", + "icu_normalizer", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_collections" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5be938a104d76f3bb9be012b6cd1451f7ed61c1eb9605c80f0f59c23f204dd" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", ] [[package]] name = "icu_datetime" -version = "0.6.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683f1ef5f2d28919f374030942274e0576e4f09c2d2735092899eb8dc1842c9d" +checksum = "f9109c2277e98df5ebea10fa63cdd1aafbf78d726ecddbd205b8cff212345b98" dependencies = [ "displaydoc", "either", + "fixed_decimal", "icu_calendar", + "icu_decimal", "icu_locid", "icu_plurals", "icu_provider", + "icu_timezone", "litemap", "serde", "smallvec", "tinystr", "writeable", - "zerovec 0.7.0", + "zerovec", ] [[package]] -name = "icu_locale_canonicalizer" -version = "0.6.0" +name = "icu_decimal" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "973942d4e8c01fac1839feb537f4933514236585f418125963ff78d4004eebfd" +checksum = "d36aab31559864247be8b5c11a0a00a649c15922bfe158442b9083e78d8c1fc3" dependencies = [ + "displaydoc", + "fixed_decimal", "icu_locid", "icu_provider", - "litemap", "serde", - "tinystr", - "zerovec 0.7.0", + "writeable", +] + +[[package]] +name = "icu_list" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c40218275f081c4493f190357c5395647b06734c2dc3dcb41cc099a0f60168b1" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider", + "regex-automata", + "writeable", + "zerovec", ] [[package]] name = "icu_locid" -version = "0.6.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a47bdfb63c6b49f5c43fb7ba358edcd1422fdf2e8df6fe26ece0df4925333cd" +checksum = "34b3de5d99a0e275fe6193b9586dbf37364daebc0d39c89b5cf8376a53b789e8" dependencies = [ "displaydoc", "litemap", "serde", "tinystr", "writeable", - "zerovec 0.7.0", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "934712cef692e652dbc9a02024c2fc7d82ac7c6406d84f482c8b6f52cc897273" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider", + "serde", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f13202f9022ca7cf1b08631bc026526a1b232ceb70695ed5a7ffbb90a90c67e" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerofrom", + "zerovec", ] [[package]] name = "icu_plurals" -version = "0.6.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a725b9ebe4910eb1bcc65a3a4c3262c6cc2ebd327a63df9f6d18ee53acee5f" +checksum = "e81bb05ec62d5103926407da9a366c2af53e3d3f3bd0b94cd7074510a74d7444" dependencies = [ "displaydoc", "fixed_decimal", "icu_locid", "icu_provider", "serde", - "zerovec 0.7.0", + "zerovec", +] + +[[package]] +name = "icu_properties" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b46dce0df6daeda3aec068f6f31eeaa6402525a505df9b816d8fd5bd9c876448" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_provider", + "zerovec", ] [[package]] name = "icu_provider" -version = "0.6.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fbd7ffd479fdbbc366334a82821dc50d9f80b758389393374e9b36ff159f1a" +checksum = "2f911086e3c521a8a824d4f8bfd87769645ced2f07ff913b521c0d793be07100" dependencies = [ "displaydoc", "icu_locid", "icu_provider_macros", - "litemap", - "postcard", "serde", + "stable_deref_trait", "writeable", "yoke", "zerofrom", - "zerovec 0.7.0", + "zerovec", ] [[package]] -name = "icu_provider_blob" -version = "0.6.0" +name = "icu_provider_adapters" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "474b884a565f7ec52a26754a8b57646c128195e7af629caa52317ef6674e3e0d" +checksum = "980c71d8a91b246ebbb97847178a4b816eea39d1d550c70ee566384555bb6545" dependencies = [ + "icu_locid", "icu_provider", - "postcard", "serde", - "writeable", + "tinystr", "yoke", - "zerovec 0.7.0", + "zerovec", ] [[package]] name = "icu_provider_macros" -version = "0.6.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71e3ab0b492dcc1416f2f16142596467382ed70bdbfaee51c83e086b1ce0e75c" +checksum = "38cf6f5b65cf81f0b4298da647101acbfe6ae0e25263f92bd7a22597e9d6d606" dependencies = [ "proc-macro2", "quote", @@ -937,12 +1032,42 @@ dependencies = [ [[package]] name = "icu_testdata" -version = "0.6.0" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e03d02c963ad1f0680749016afad29f87933ca82957c536bfa975880b2efdae" +dependencies = [ + "icu_calendar", + "icu_collator", + "icu_collections", + "icu_datetime", + "icu_decimal", + "icu_list", + "icu_locid", + "icu_locid_transform", + "icu_normalizer", + "icu_plurals", + "icu_properties", + "icu_provider", + "icu_provider_adapters", + "icu_timezone", + "litemap", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_timezone" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5580eeaa6ea70b94f286120ffcfb70f75ac8d759d95ccf6223a3c479ff99285" +checksum = "081c60a92f80496fe2aa49a54ab86394ec2f91993cff985f8451e4912a347ea0" dependencies = [ + "displaydoc", + "icu_calendar", + "icu_locid", "icu_provider", - "icu_provider_blob", + "serde", + "tinystr", + "zerovec", ] [[package]] @@ -1078,12 +1203,11 @@ checksum = "8f9f08d8963a6c613f4b1a78f4f4a4dbfadf8e6545b2d72861731e4858b8b47f" [[package]] name = "litemap" -version = "0.4.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78d268a51abaaee3b8686e56396eb725b0da510bddd266a52e784aa1029dae73" +checksum = "f34a3f4798fac63fb48cf277eefa38f94d3443baff555bb98e4f56bc9092368e" dependencies = [ "serde", - "yoke", ] [[package]] @@ -1357,22 +1481,6 @@ dependencies = [ "plotters-backend", ] -[[package]] -name = "postcard" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a25c0b0ae06fcffe600ad392aabfa535696c8973f2253d9ac83171924c58a858" -dependencies = [ - "postcard-cobs", - "serde", -] - -[[package]] -name = "postcard-cobs" -version = "0.1.5-pre" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c68cb38ed13fd7bc9dd5db8f165b7c8d9c1a315104083a2b10f11354c2af97f" - [[package]] name = "ppv-lite86" version = "0.2.17" @@ -1514,6 +1622,15 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9368763f5a9b804326f3af749e16f9abf378d227bcdee7634b13d8f17793782" +dependencies = [ + "memchr", +] + [[package]] name = "regex-syntax" version = "0.6.28" @@ -1833,14 +1950,13 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dfb77d2490072fb5616d67686f55481b3d97701e383e208a7225843eba1aae6" +checksum = "f8aeafdfd935e4a7fe16a91ab711fa52d54df84f9c8f7ca5837a9d1d902ef4c2" dependencies = [ "displaydoc", "serde", - "zerovec 0.7.0", - "zerovec 0.8.1", + "zerovec", ] [[package]] @@ -1964,6 +2080,18 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1e5fa573d8ac5f1a856f8d7be41d390ee973daf97c806b2c1a465e4e1406e68" +[[package]] +name = "utf16_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52df8b7fb78e7910d776fccf2e42ceaf3604d55e8e7eb2dbd183cb1441d8a692" + +[[package]] +name = "utf8_iter" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a8922555b9500e3d865caed19330172cd67cbf82203f1a3311d8c305cc9f33" + [[package]] name = "utf8parse" version = "0.2.0" @@ -2157,17 +2285,23 @@ version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + [[package]] name = "writeable" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8ab608ef0f68f7b5e1f17a38342cbc2725bf212f6ba9f103b0e05f675c41d83" +checksum = "f8e6ab4f5da1b24daf2c590cfac801bacb27b15b4f050e84eb60149ea726f06b" [[package]] name = "yoke" -version = "0.5.0" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a829d54286b35cf07cbf9d8de817387ba4de20286e59214e67eaad5124b620a1" +checksum = "1fe1d55ca72c32d573bfbd5cb2f0ca65a497854c44762957a6d3da96041a5184" dependencies = [ "serde", "stable_deref_trait", @@ -2177,9 +2311,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "768f11e61cfb57f5de25941c877571dde114de48cac204594cc72beb71073d9f" +checksum = "58c2c5bb7c929b85c1b9ec69091b0d835f0878b4fd9eb67973b25936e06c4374" dependencies = [ "proc-macro2", "quote", @@ -2210,9 +2344,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.7.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c1b475ff48237bf7281cfa1721a52f0ad7f95ede1a46385e555870a354afc45" +checksum = "b9d919a74c17749ccb17beaf6405562e413cd94e98ba52ca1e64bbe7eefbd8b8" dependencies = [ "serde", "yoke", @@ -2220,20 +2354,11 @@ dependencies = [ "zerovec-derive", ] -[[package]] -name = "zerovec" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3412f49402c32fffcc98fa861dc496eaa777442c5a5fc1e8d33d0fbb53cb0d2" -dependencies = [ - "zerofrom", -] - [[package]] name = "zerovec-derive" -version = "0.6.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd14fd397ea1b593c9c97a35d4da3dfb3a0ac7a1cad0e7f9e1b4bc313d1787e9" +checksum = "490e5f878c2856225e884c35927e7ea6db3c24cdb7229b72542c7526ad7ed49e" dependencies = [ "proc-macro2", "quote", diff --git a/boa_engine/Cargo.toml b/boa_engine/Cargo.toml index 8537d6c7f67..cdc7d2182fc 100644 --- a/boa_engine/Cargo.toml +++ b/boa_engine/Cargo.toml @@ -15,12 +15,15 @@ rust-version.workspace = true profiler = ["boa_profiler/profiler"] deser = ["boa_interner/serde", "boa_ast/serde"] intl = [ - "dep:icu_locale_canonicalizer", + "dep:icu_locid_transform", "dep:icu_locid", "dep:icu_datetime", "dep:icu_plurals", "dep:icu_provider", + "dep:icu_provider_adapters", "dep:icu_testdata", + "dep:icu_calendar", + "dep:icu_collator", "dep:sys-locale" ] @@ -59,12 +62,17 @@ tap = "1.0.1" sptr = "0.3.2" static_assertions = "1.1.0" thiserror = "1.0.38" -icu_locale_canonicalizer = { version = "0.6.0", features = ["serde"], optional = true } -icu_locid = { version = "0.6.0", features = ["serde"], optional = true } -icu_datetime = { version = "0.6.0", features = ["serde"], optional = true } -icu_plurals = { version = "0.6.0", features = ["serde"], optional = true } -icu_provider = { version = "0.6.0", optional = true } -icu_testdata = { version = "0.6.0", optional = true } + +# intl deps +icu_locid_transform = { version = "1.0.0", features = ["serde"], optional = true } +icu_locid = { version = "1.0.0", features = ["serde"], optional = true } +icu_datetime = { version = "1.0.0", features = ["serde", "experimental"], optional = true } +icu_calendar = { version = "1.0.0", optional = true } +icu_collator = { version = "1.0.0", optional = true } +icu_plurals = { version = "1.0.0", features = ["serde"], optional = true } +icu_provider = { version = "1.0.1", optional = true } +icu_provider_adapters = { version = "1.0.0", features = ["serde"], optional = true } +icu_testdata = { version = "1.0.0", optional = true } sys-locale = { version = "0.2.3", optional = true } [dev-dependencies] diff --git a/boa_engine/src/builtins/intl/locale/mod.rs b/boa_engine/src/builtins/intl/locale/mod.rs new file mode 100644 index 00000000000..2f96fc18792 --- /dev/null +++ b/boa_engine/src/builtins/intl/locale/mod.rs @@ -0,0 +1,20 @@ +#[cfg(test)] +mod tests; + +mod utils; + +use icu_collator::CaseFirst; +use icu_datetime::options::preferences::HourCycle; +use icu_locid::Locale; +pub(super) use utils::*; + +use crate::JsString; + +pub(crate) struct JsLocale { + locale: Locale, + calendar: JsString, + hour_cycle: HourCycle, + case_first: CaseFirst, + numeric: bool, + numbering: JsString, +} diff --git a/boa_engine/src/builtins/intl/locale/tests.rs b/boa_engine/src/builtins/intl/locale/tests.rs new file mode 100644 index 00000000000..1ef11b96d90 --- /dev/null +++ b/boa_engine/src/builtins/intl/locale/tests.rs @@ -0,0 +1,191 @@ +// use icu_datetime::{pattern::CoarseHourCycle, provider::calendar::TimeLengthsV1Marker, options::preferences::HourCycle}; +// use icu_locid::{ +// extensions::{ +// unicode::{Key, Value}, +// Extensions, +// }, +// extensions_unicode_key as key, extensions_unicode_value as value, langid, locale, LanguageIdentifier, Locale, +// }; +// use icu_provider::{hello_world::HelloWorldV1Marker, DataProvider, DataRequest, AsDowncastingAnyProvider, DataLocale}; +// use serde::{de::value::StrDeserializer, Deserialize, Serialize}; + +// use crate::{ +// builtins::intl::{ +// locale::{best_available_locale, best_fit_matcher, default_locale, lookup_matcher}, +// ExtensionKey, Service, +// }, +// context::icu::{BoaProvider, Icu}, +// }; + +// #[test] +// fn best_avail_loc() { +// let provider = icu_testdata::any(); +// let provider = provider.as_downcasting(); + +// assert_eq!( +// best_available_locale::(langid!("en"), &provider), +// Some(langid!("en")) +// ); + +// assert_eq!( +// best_available_locale::(langid!("es-ES"), &provider), +// Some(langid!("es")) +// ); + +// assert_eq!( +// best_available_locale::(langid!("kr"), &provider), +// None +// ); +// } + +// #[test] +// fn lookup_match() { +// let icu = Icu::new(BoaProvider::Any(Box::new(icu_testdata::any()))).unwrap(); + +// // requested: [] + +// let res = lookup_matcher::(&[], &icu); +// assert_eq!(res, default_locale(icu.locale_canonicalizer())); +// assert!(res.extensions.is_empty()); + +// // requested: [fr-FR-u-hc-h12] +// let req: Locale = "fr-FR-u-hc-h12".parse().unwrap(); + +// let res = lookup_matcher::(&[req.clone()], &icu); +// assert_eq!(res.id, langid!("fr")); +// assert_eq!(res.extensions, req.extensions); + +// // requested: [kr-KR-u-hc-h12, gr-GR-u-hc-h24-x-4a, es-ES-valencia-u-ca-gregory, uz-Cyrl] +// let kr: Locale = "kr-KR-u-hc-h12".parse().unwrap(); +// let gr: Locale = "gr-GR-u-hc-h24-x-4a".parse().unwrap(); +// let en: Locale = "es-ES-valencia-u-ca-gregory".parse().unwrap(); +// let uz = locale!("uz-Cyrl"); +// let req = vec![kr, gr, en.clone(), uz]; + +// let res = best_fit_matcher::(&req, &icu); +// assert_eq!(res.id, langid!("es")); +// assert_eq!(res.extensions, en.extensions); +// } + +// // #[test] +// // fn locale_resolution() { +// // let mut context = Context::default(); + +// // // test lookup +// // let available_locales = Vec::::new(); +// // let requested_locales = Vec::::new(); +// // let relevant_extension_keys = Vec::::new(); +// // let locale_data = FxHashMap::default(); +// // let options = DateTimeFormatRecord { +// // locale_matcher: JsString::new("lookup"), +// // properties: FxHashMap::default(), +// // }; + +// // let locale_record = resolve_locale( +// // &available_locales, +// // &requested_locales, +// // &options, +// // &relevant_extension_keys, +// // &locale_data, +// // &mut context, +// // ); +// // assert_eq!( +// // locale_record.locale, +// // default_locale(context.icu().locale_canonicalizer()) +// // .to_string() +// // .as_str() +// // ); +// // assert_eq!( +// // locale_record.data_locale, +// // default_locale(context.icu().locale_canonicalizer()) +// // .to_string() +// // .as_str() +// // ); +// // assert!(locale_record.properties.is_empty()); + +// // // test best fit +// // let available_locales = Vec::::new(); +// // let requested_locales = Vec::::new(); +// // let relevant_extension_keys = Vec::::new(); +// // let locale_data = FxHashMap::default(); +// // let options = DateTimeFormatRecord { +// // locale_matcher: JsString::new("best-fit"), +// // properties: FxHashMap::default(), +// // }; + +// // let locale_record = resolve_locale( +// // &available_locales, +// // &requested_locales, +// // &options, +// // &relevant_extension_keys, +// // &locale_data, +// // &mut context, +// // ); +// // assert_eq!( +// // locale_record.locale, +// // default_locale(context.icu().locale_canonicalizer()) +// // .to_string() +// // .as_str() +// // ); +// // assert_eq!( +// // locale_record.data_locale, +// // default_locale(context.icu().locale_canonicalizer()) +// // .to_string() +// // .as_str() +// // ); +// // assert!(locale_record.properties.is_empty()); + +// // // available: [es-ES], requested: [es-ES] +// // let available_locales = vec![JsString::new("es-ES")]; +// // let requested_locales = vec![JsString::new("es-ES")]; +// // let relevant_extension_keys = Vec::::new(); +// // let locale_data = FxHashMap::default(); +// // let options = DateTimeFormatRecord { +// // locale_matcher: JsString::new("lookup"), +// // properties: FxHashMap::default(), +// // }; + +// // let locale_record = resolve_locale( +// // &available_locales, +// // &requested_locales, +// // &options, +// // &relevant_extension_keys, +// // &locale_data, +// // &mut context, +// // ); +// // assert_eq!(locale_record.locale, "es-ES"); +// // assert_eq!(locale_record.data_locale, "es-ES"); +// // assert!(locale_record.properties.is_empty()); + +// // // available: [zh-CN], requested: [] +// // let available_locales = vec![JsString::new("zh-CN")]; +// // let requested_locales = Vec::::new(); +// // let relevant_extension_keys = Vec::::new(); +// // let locale_data = FxHashMap::default(); +// // let options = DateTimeFormatRecord { +// // locale_matcher: JsString::new("lookup"), +// // properties: FxHashMap::default(), +// // }; + +// // let locale_record = resolve_locale( +// // &available_locales, +// // &requested_locales, +// // &options, +// // &relevant_extension_keys, +// // &locale_data, +// // &mut context, +// // ); +// // assert_eq!( +// // locale_record.locale, +// // default_locale(context.icu().locale_canonicalizer()) +// // .to_string() +// // .as_str() +// // ); +// // assert_eq!( +// // locale_record.data_locale, +// // default_locale(context.icu().locale_canonicalizer()) +// // .to_string() +// // .as_str() +// // ); +// // assert!(locale_record.properties.is_empty()); +// // } diff --git a/boa_engine/src/builtins/intl/locale/utils.rs b/boa_engine/src/builtins/intl/locale/utils.rs new file mode 100644 index 00000000000..03ce5a1d3ad --- /dev/null +++ b/boa_engine/src/builtins/intl/locale/utils.rs @@ -0,0 +1,361 @@ +use crate::{ + builtins::{ + intl::{ + options::{IntlOptions, LocaleMatcher}, + Service, + }, + Array, JsArgs, + }, + context::icu::Icu, + Context, JsNativeError, JsResult, JsValue, +}; + +use icu_locid::{subtags::Variants, LanguageIdentifier, Locale}; +use icu_locid_transform::LocaleCanonicalizer; +use icu_provider::{DataProvider, DataRequest, DataRequestMetadata, KeyedDataMarker}; +use indexmap::IndexSet; + +use tap::TapOptional; + +/// Abstract operation `CanonicalizeLocaleList ( locales )` +/// +/// Converts an array of [`JsValue`]s containing structurally valid +/// [Unicode BCP 47 locale identifiers][bcp-47] into their [canonical form][canon]. +/// +/// For efficiency, this returns [`Locale`]s instead of [`String`]s, since +/// `Locale` allows us to modify individual parts of the locale without scanning +/// the whole string again. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-canonicalizelocalelist +/// [bcp-47]: https://unicode.org/reports/tr35/#Unicode_locale_identifier +/// [canon]: https://unicode.org/reports/tr35/#LocaleId_Canonicalization +pub(in crate::builtins::intl) fn canonicalize_locale_list( + args: &[JsValue], + context: &mut Context, +) -> JsResult> { + // 1. If locales is undefined, then + let locales = args.get_or_undefined(0); + if locales.is_undefined() { + // a. Return a new empty List. + return Ok(IndexSet::default()); + } + + // 2. Let seen be a new empty List. + let mut seen = IndexSet::new(); + + // 3. If Type(locales) is String or Type(locales) is Object and locales has an [[InitializedLocale]] internal slot, then + // TODO: check if Type(locales) is object and handle the internal slots + let o = if locales.is_string() { + // a. Let O be CreateArrayFromList(« locales »). + Array::create_array_from_list([locales.clone()], context) + } else { + // 4. Else, + // a. Let O be ? ToObject(locales). + locales.to_object(context)? + }; + + // 5. Let len be ? ToLength(? Get(O, "length")). + let len = o.length_of_array_like(context)?; + + // 6 Let k be 0. + // 7. Repeat, while k < len, + for k in 0..len { + // a. Let Pk be ToString(k). + // b. Let kPresent be ? HasProperty(O, Pk). + let k_present = o.has_property(k, context)?; + // c. If kPresent is true, then + if k_present { + // i. Let kValue be ? Get(O, Pk). + let k_value = o.get(k, context)?; + // ii. If Type(kValue) is not String or Object, throw a TypeError exception. + if !(k_value.is_object() || k_value.is_string()) { + return Err(JsNativeError::typ() + .with_message("locale should be a String or Object") + .into()); + } + // iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]] internal slot, then + // TODO: handle checks for InitializedLocale internal slot (there should be an if statement here) + // 1. Let tag be kValue.[[Locale]]. + // iv. Else, + // 1. Let tag be ? ToString(kValue). + // v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + let mut tag = k_value + .to_string(context)? + .to_std_string_escaped() + .parse() + .map_err(|_| { + JsNativeError::range() + .with_message("locale is not a structurally valid language tag") + })?; + + // vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). + context.icu().locale_canonicalizer().canonicalize(&mut tag); + + // vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen. + seen.insert(tag); + } + // d. Increase k by 1. + } + + // 8. Return seen. + Ok(seen) +} + +/// Abstract operation `ResolveLocale ( availableLocales, requestedLocales, options, relevantExtensionKeys, localeData )` +/// +/// Compares a BCP 47 language priority list `requestedLocales` against the locales +/// in `availableLocales` and determines the best available language to meet the request. +/// `availableLocales`, `requestedLocales`, and `relevantExtensionKeys` must be provided as +/// `List` values, options and `localeData` as Records. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-resolvelocale +#[allow(dead_code)] +pub(in crate::builtins::intl) fn resolve_locale( + requested_locales: &[Locale], + options: &mut IntlOptions, + icu: &Icu

, +) -> Locale +where + S: Service

, + P: DataProvider, +{ + // 1. Let matcher be options.[[localeMatcher]]. + // 2. If matcher is "lookup", then + // a. Let r be ! LookupMatcher(availableLocales, requestedLocales). + // 3. Else, + // a. Let r be ! BestFitMatcher(availableLocales, requestedLocales). + // 4. Let foundLocale be r.[[locale]]. + let mut found_locale = if options.matcher == LocaleMatcher::Lookup { + lookup_matcher::(requested_locales, icu) + } else { + best_fit_matcher::(requested_locales, icu) + }; + + // From here, the spec differs significantly from the implementation, + // since ICU4X allows us to skip some steps and modularize the + // extension resolution algorithm. However, the original spec is left here + // for completion purposes. + + // 5. Let result be a new Record. + // 6. Set result.[[dataLocale]] to foundLocale. + // 7. If r has an [[extension]] field, then + // a. Let components be ! UnicodeExtensionComponents(r.[[extension]]). + // b. Let keywords be components.[[Keywords]]. + // 9. For each element key of relevantExtensionKeys, do + // a. Let foundLocaleData be localeData.[[]]. + // b. Assert: Type(foundLocaleData) is Record. + // c. Let keyLocaleData be foundLocaleData.[[]]. + // d. Assert: Type(keyLocaleData) is List. + // e. Let value be keyLocaleData[0]. + // f. Assert: Type(value) is either String or Null. + // g. Let supportedExtensionAddition be "". + // h. If r has an [[extension]] field, then + // i. If keywords contains an element whose [[Key]] is the same as key, then + // 1. Let entry be the element of keywords whose [[Key]] is the same as key. + // 2. Let requestedValue be entry.[[Value]]. + // 3. If requestedValue is not the empty String, then + // a. If keyLocaleData contains requestedValue, then + // i. Let value be requestedValue. + // ii. Let supportedExtensionAddition be the string-concatenation of "-", key, "-", and value. + // 4. Else if keyLocaleData contains "true", then + // a. Let value be "true". + // b. Let supportedExtensionAddition be the string-concatenation of "-" and key. + // i. If options has a field [[]], then + // i. Let optionsValue be options.[[]]. + // ii. Assert: Type(optionsValue) is either String, Undefined, or Null. + // iii. If Type(optionsValue) is String, then + // 1. Let optionsValue be the string optionsValue after performing the algorithm steps to transform + // Unicode extension values to canonical syntax per Unicode Technical Standard #35 LDML § 3.2.1 + // Canonical Unicode Locale Identifiers, treating key as ukey and optionsValue as uvalue productions. + // 2. Let optionsValue be the string optionsValue after performing the algorithm steps to replace + // Unicode extension values with their canonical form per Unicode Technical Standard #35 LDML § 3.2.1 + // Canonical Unicode Locale Identifiers, treating key as ukey and optionsValue as uvalue productions. + // 3. If optionsValue is the empty String, then + // a. Let optionsValue be "true". + // iv. If keyLocaleData contains optionsValue, then + // 1. If SameValue(optionsValue, value) is false, then + // a. Let value be optionsValue. + // b. Let supportedExtensionAddition be "". + // j. Set result.[[]] to value. + // k. Append supportedExtensionAddition to supportedExtension. + // 10. If the number of elements in supportedExtension is greater than 2, then + // a. Let foundLocale be InsertUnicodeExtensionAndCanonicalize(foundLocale, supportedExtension). + // 11. Set result.[[locale]] to foundLocale. + + // 12. Return result. + S::resolve( + &mut found_locale, + &mut options.service_options, + icu.provider(), + ); + found_locale +} + +/// Abstract operation `DefaultLocale ( )` +/// +/// Returns a String value representing the structurally valid and canonicalized +/// Unicode BCP 47 locale identifier for the host environment's current locale. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-defaultlocale +fn default_locale(canonicalizer: &LocaleCanonicalizer) -> Locale { + sys_locale::get_locale() + .and_then(|loc| loc.parse::().ok()) + .tap_some_mut(|loc| { + canonicalizer.canonicalize(loc); + }) + .unwrap_or_default() +} + +/// Abstract operation `BestAvailableLocale ( availableLocales, locale )` +/// +/// Compares the provided argument `locale`, which must be a String value with a +/// structurally valid and canonicalized Unicode BCP 47 locale identifier, against +/// the locales in `availableLocales` and returns either the longest non-empty prefix +/// of `locale` that is an element of `availableLocales`, or undefined if there is no +/// such element. +/// +/// We only work with language identifiers, which have the same semantics +/// but are a bit easier to manipulate. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-bestavailablelocale +fn best_available_locale( + candidate: LanguageIdentifier, + provider: &(impl DataProvider + ?Sized), +) -> Option { + // 1. Let candidate be locale. + let mut candidate = candidate.into(); + // 2. Repeat + loop { + // a. If availableLocales contains an element equal to candidate, return candidate. + // ICU4X requires doing data requests in order to check if a locale + // is part of the set of supported locales. + let response = DataProvider::::load( + provider, + DataRequest { + locale: &candidate, + metadata: DataRequestMetadata::default(), + }, + ); + + if let Ok(req) = response { + let metadata = req.metadata; + + // `metadata.locale` returns None when the provider doesn't have a + // fallback mechanism, but supports the required locale. + // However, if the provider has a fallback mechanism, this will return + // `Some(locale)`, where the locale is the used locale after applying + // the fallback algorithm, even if the used locale is exactly the same + // as the required locale. + if metadata.locale.is_none() || metadata.locale.as_ref() == Some(&candidate) { + return Some(candidate.get_langid()); + } + } + + // b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate. If that character does not occur, return undefined. + // c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2. + // d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive. + // + // Since the definition of `LanguageIdentifier` allows us to manipulate it + // without using strings, we can replace these steps by a simpler + // algorithm. + + if candidate.has_variants() { + let mut variants = candidate + .clear_variants() + .iter() + .copied() + .collect::>(); + variants.pop(); + candidate.set_variants(Variants::from_vec_unchecked(variants)); + } else if candidate.region().is_some() { + candidate.set_region(None); + } else if candidate.script().is_some() { + candidate.set_script(None); + } else { + return None; + } + } +} + +/// Abstract operation `LookupMatcher ( availableLocales, requestedLocales )` +/// +/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`, +/// against the locales in `availableLocales` and determines the best available language to +/// meet the request. +/// +/// # Note +/// +/// This differs a bit from the spec, since we don't have an `[[AvailableLocales]]` +/// list to compare with. However, we can do data requests to a [`DataProvider`] +/// in order to see if a certain [`Locale`] is supported. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-lookupmatcher +fn lookup_matcher( + requested_locales: &[Locale], + icu: &Icu>, +) -> Locale { + // 1. Let result be a new Record. + // 2. For each element locale of requestedLocales, do + for locale in requested_locales { + // a. Let noExtensionsLocale be the String value that is locale with any Unicode locale + // extension sequences removed. + let mut locale = locale.clone(); + let id = std::mem::take(&mut locale.id); + + // b. Let availableLocale be ! BestAvailableLocale(availableLocales, noExtensionsLocale). + let available_locale = best_available_locale::(id, icu.provider()); + + // c. If availableLocale is not undefined, then + if let Some(available_locale) = available_locale { + // i. Set result.[[locale]] to availableLocale. + // Assignment deferred. See return statement below. + // ii. If locale and noExtensionsLocale are not the same String value, then + // 1. Let extension be the String value consisting of the substring of the Unicode + // locale extension sequence within locale. + // 2. Set result.[[extension]] to extension. + locale.id = available_locale; + + // iii. Return result. + return locale; + } + } + + // 3. Let defLocale be ! DefaultLocale(). + // 4. Set result.[[locale]] to defLocale. + // 5. Return result. + default_locale(icu.locale_canonicalizer()) +} + +/// Abstract operation `BestFitMatcher ( availableLocales, requestedLocales )` +/// +/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`, +/// against the locales in `availableLocales` and determines the best available language to +/// meet the request. The algorithm is implementation dependent, but should produce results +/// that a typical user of the requested locales would perceive as at least as good as those +/// produced by the `LookupMatcher` abstract operation. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-bestfitmatcher +fn best_fit_matcher( + requested_locales: &[Locale], + icu: &Icu>, +) -> Locale { + lookup_matcher::(requested_locales, icu) +} diff --git a/boa_engine/src/builtins/intl/mod.rs b/boa_engine/src/builtins/intl/mod.rs index 609914426b3..a978c50090d 100644 --- a/boa_engine/src/builtins/intl/mod.rs +++ b/boa_engine/src/builtins/intl/mod.rs @@ -9,24 +9,24 @@ use crate::{ builtins::intl::date_time_format::DateTimeFormat, - builtins::{Array, BuiltIn, JsArgs}, - error::JsNativeError, - object::{JsObject, ObjectInitializer}, + builtins::{Array, BuiltIn}, + object::ObjectInitializer, property::Attribute, symbol::WellKnownSymbols, Context, JsResult, JsValue, }; -pub mod date_time_format; #[cfg(test)] mod tests; +pub(crate) mod date_time_format; +mod locale; +mod options; + use boa_profiler::Profiler; -use icu_locale_canonicalizer::LocaleCanonicalizer; -use icu_locid::{locale, Locale}; -use indexmap::IndexSet; -use rustc_hash::FxHashMap; -use tap::{Conv, Pipe, TapOptional}; +use icu_locid::Locale; +use icu_provider::KeyedDataMarker; +use tap::{Conv, Pipe}; /// JavaScript `Intl` object. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -75,7 +75,7 @@ impl Intl { context: &mut Context, ) -> JsResult { // 1. Let ll be ? CanonicalizeLocaleList(locales). - let ll = canonicalize_locale_list(args, context)?; + let ll = locale::canonicalize_locale_list(args, context)?; // 2. Return CreateArrayFromList(ll). Ok(JsValue::Object(Array::create_array_from_list( @@ -85,758 +85,8 @@ impl Intl { } } -/// `MatcherRecord` type aggregates unicode `locale` string and unicode locale `extension`. -/// -/// This is a return value for `lookup_matcher` and `best_fit_matcher` subroutines. -#[derive(Debug)] -struct MatcherRecord { - locale: String, - extension: String, -} - -/// Abstract operation `DefaultLocale ( )` -/// -/// Returns a String value representing the structurally valid and canonicalized -/// Unicode BCP 47 locale identifier for the host environment's current locale. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma402/#sec-defaultlocale -fn default_locale(canonicalizer: &LocaleCanonicalizer) -> Locale { - #[allow(clippy::string_lit_as_bytes)] - sys_locale::get_locale() - .and_then(|loc| loc.parse::().ok()) - .tap_some_mut(|loc| canonicalize_unicode_locale_id(loc, canonicalizer)) - .unwrap_or(locale!("en-US")) -} - -/// Abstract operation `BestAvailableLocale ( availableLocales, locale )` -/// -/// Compares the provided argument `locale`, which must be a String value with a -/// structurally valid and canonicalized Unicode BCP 47 locale identifier, against -/// the locales in `availableLocales` and returns either the longest non-empty prefix -/// of `locale` that is an element of `availableLocales`, or undefined if there is no -/// such element. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma402/#sec-bestavailablelocale -fn best_available_locale<'a>(available_locales: &'_ [&'_ str], locale: &'a str) -> Option<&'a str> { - // 1. Let candidate be locale. - let mut candidate = locale; - // 2. Repeat - loop { - // a. If availableLocales contains an element equal to candidate, return candidate. - if available_locales.contains(&candidate) { - return Some(candidate); - } - - // b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate. If that character does not occur, return undefined. - let pos = candidate.rfind('-'); - match pos { - Some(ind) => { - // c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2. - let tmp_candidate = candidate[..ind].to_string(); - let prev_dash = tmp_candidate.rfind('-').unwrap_or(ind); - let trim_ind = if ind >= 2 && prev_dash == ind - 2 { - ind - 2 - } else { - ind - }; - // d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive. - candidate = &candidate[..trim_ind]; - } - None => return None, - } - } -} - -/// Abstract operation `LookupMatcher ( availableLocales, requestedLocales )` -/// -/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`, -/// against the locales in `availableLocales` and determines the best available language to -/// meet the request. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma402/#sec-lookupmatcher -fn lookup_matcher( - available_locales: &[&str], - requested_locales: &[&str], - canonicalizer: &LocaleCanonicalizer, -) -> MatcherRecord { - // 1. Let result be a new Record. - // 2. For each element locale of requestedLocales, do - for locale_str in requested_locales { - // a. Let noExtensionsLocale be the String value that is locale with any Unicode locale - // extension sequences removed. - let locale: Locale = locale_str.parse().expect("Locale parsing failed"); - let no_extensions_locale = locale.id.to_string(); - - // b. Let availableLocale be ! BestAvailableLocale(availableLocales, noExtensionsLocale). - let available_locale = best_available_locale(available_locales, &no_extensions_locale); - - // c. If availableLocale is not undefined, then - if let Some(available_locale) = available_locale { - // i. Set result.[[locale]] to availableLocale. - // Assignment deferred. See return statement below. - // ii. If locale and noExtensionsLocale are not the same String value, then - let maybe_ext = if locale_str.eq(&no_extensions_locale) { - String::new() - } else { - // 1. Let extension be the String value consisting of the substring of the Unicode - // locale extension sequence within locale. - // 2. Set result.[[extension]] to extension. - locale.extensions.to_string() - }; - - // iii. Return result. - return MatcherRecord { - locale: available_locale.into(), - extension: maybe_ext, - }; - } - } - - // 3. Let defLocale be ! DefaultLocale(). - // 4. Set result.[[locale]] to defLocale. - // 5. Return result. - MatcherRecord { - locale: default_locale(canonicalizer).to_string(), - extension: String::new(), - } -} - -/// Abstract operation `BestFitMatcher ( availableLocales, requestedLocales )` -/// -/// Compares `requestedLocales`, which must be a `List` as returned by `CanonicalizeLocaleList`, -/// against the locales in `availableLocales` and determines the best available language to -/// meet the request. The algorithm is implementation dependent, but should produce results -/// that a typical user of the requested locales would perceive as at least as good as those -/// produced by the `LookupMatcher` abstract operation. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma402/#sec-bestfitmatcher -fn best_fit_matcher( - available_locales: &[&str], - requested_locales: &[&str], - canonicalizer: &LocaleCanonicalizer, -) -> MatcherRecord { - lookup_matcher(available_locales, requested_locales, canonicalizer) -} - -/// `Keyword` structure is a pair of keyword key and keyword value. -#[derive(Debug)] -struct Keyword { - key: String, - value: String, -} - -/// `UniExtRecord` structure represents unicode extension records. -/// -/// It contains the list of unicode `extension` attributes and the list of `keywords`. -/// -/// For example: -/// -/// - `-u-nu-thai` has no attributes and the list of keywords contains `(nu:thai)` pair. -#[allow(dead_code)] -#[derive(Debug)] -struct UniExtRecord { - attributes: Vec, // never read at this point - keywords: Vec, -} - -/// Abstract operation `UnicodeExtensionComponents ( extension )` -/// -/// Returns the attributes and keywords from `extension`, which must be a String -/// value whose contents are a `Unicode locale extension` sequence. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma402/#sec-unicode-extension-components -fn unicode_extension_components(extension: &str) -> UniExtRecord { - // 1. Let attributes be a new empty List. - let mut attributes: Vec = Vec::new(); - - // 2. Let keywords be a new empty List. - let mut keywords: Vec = Vec::new(); - - // 3. Let keyword be undefined. - let mut keyword: Option = None; - - // 4. Let size be the length of extension. - let size = extension.len(); - - // 5. Let k be 3. - let mut k = 3; - - // 6. Repeat, while k < size, - while k < size { - // a. Let e be ! StringIndexOf(extension, "-", k). - let e = extension[k..].find('-'); - - // b. If e = -1, let len be size - k; else let len be e - k. - let len = e.unwrap_or(size - k); - - // c. Let subtag be the String value equal to the substring of extension consisting of the - // code units at indices k (inclusive) through k + len (exclusive). - let subtag = &extension[k..k + len]; - - // d. If keyword is undefined and len ≠ 2, then - if keyword.is_none() && len != 2 { - // i. If subtag is not an element of attributes, then - if !attributes.iter().any(|s| s == subtag) { - // 1. Append subtag to attributes. - attributes.push(subtag.to_string()); - } - // e. Else if len = 2, then - } else if len == 2 { - // i. If keyword is not undefined and keywords does not contain an element - // whose [[Key]] is the same as keyword.[[Key]], then - // 1. Append keyword to keywords. - if let Some(keyword_val) = keyword { - let has_key = keywords.iter().any(|elem| elem.key == keyword_val.key); - if !has_key { - keywords.push(keyword_val); - } - }; - - // ii. Set keyword to the Record { [[Key]]: subtag, [[Value]]: "" }. - keyword = Some(Keyword { - key: subtag.into(), - value: String::new(), - }); - // f. Else, - } else { - // i. If keyword.[[Value]] is the empty String, then - // 1. Set keyword.[[Value]] to subtag. - // ii. Else, - // 1. Set keyword.[[Value]] to the string-concatenation of keyword.[[Value]], "-", and subtag. - if let Some(keyword_val) = keyword { - let new_keyword_val = if keyword_val.value.is_empty() { - subtag.into() - } else { - format!("{}-{subtag}", keyword_val.value) - }; - - keyword = Some(Keyword { - key: keyword_val.key, - value: new_keyword_val, - }); - }; - } - - // g. Let k be k + len + 1. - k = k + len + 1; - } - - // 7. If keyword is not undefined and keywords does not contain an element whose [[Key]] is - // the same as keyword.[[Key]], then - // a. Append keyword to keywords. - if let Some(keyword_val) = keyword { - let has_key = keywords.iter().any(|elem| elem.key == keyword_val.key); - if !has_key { - keywords.push(keyword_val); - } - }; - - // 8. Return the Record { [[Attributes]]: attributes, [[Keywords]]: keywords }. - UniExtRecord { - attributes, - keywords, - } -} - -/// Abstract operation `InsertUnicodeExtensionAndCanonicalize ( locale, extension )` -/// -/// Inserts `extension`, which must be a Unicode locale extension sequence, into -/// `locale`, which must be a String value with a structurally valid and canonicalized -/// Unicode BCP 47 locale identifier. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma402/#sec-insert-unicode-extension-and-canonicalize -fn insert_unicode_extension_and_canonicalize( - locale: &str, - extension: &str, - canonicalizer: &LocaleCanonicalizer, -) -> String { - // TODO 1. Assert: locale does not contain a substring that is a Unicode locale extension sequence. - // TODO 2. Assert: extension is a Unicode locale extension sequence. - // TODO 3. Assert: tag matches the unicode_locale_id production. - // 4. Let privateIndex be ! StringIndexOf(locale, "-x-", 0). - let private_index = locale.find("-x-"); - let new_locale = match private_index { - // 5. If privateIndex = -1, then - None => { - // a. Let locale be the string-concatenation of locale and extension. - locale.to_owned() + extension - } - // 6. Else, - Some(idx) => { - // a. Let preExtension be the substring of locale from position 0, inclusive, - // to position privateIndex, exclusive. - let pre_extension = &locale[0..idx]; - - // b. Let postExtension be the substring of locale from position privateIndex to - // the end of the string. - let post_extension = &locale[idx..]; - - // c. Let locale be the string-concatenation of preExtension, extension, - // and postExtension. - pre_extension.to_owned() + extension + post_extension - } - }; - - // 7. Assert: ! IsStructurallyValidLanguageTag(locale) is true. - let mut new_locale = new_locale - .parse() - .expect("Assert: ! IsStructurallyValidLanguageTag(locale) is true."); - - // 8. Return ! CanonicalizeUnicodeLocaleId(locale). - canonicalize_unicode_locale_id(&mut new_locale, canonicalizer); - new_locale.to_string() -} - -/// Abstract operation `CanonicalizeLocaleList ( locales )` -/// -/// Converts an array of [`JsValue`]s containing structurally valid -/// [Unicode BCP 47 locale identifiers][bcp-47] into their [canonical form][canon]. -/// -/// For efficiency, this returns a [`Vec`] of [`Locale`]s instead of a [`Vec`] of -/// [`String`]s, since [`Locale`] allows us to modify individual parts of the locale -/// without scanning the whole string again. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma402/#sec-canonicalizelocalelist -/// [bcp-47]: https://unicode.org/reports/tr35/#Unicode_locale_identifier -/// [canon]: https://unicode.org/reports/tr35/#LocaleId_Canonicalization -fn canonicalize_locale_list(args: &[JsValue], context: &mut Context) -> JsResult> { - // 1. If locales is undefined, then - let locales = args.get_or_undefined(0); - if locales.is_undefined() { - // a. Return a new empty List. - return Ok(Vec::new()); - } - - // 2. Let seen be a new empty List. - let mut seen = IndexSet::new(); - - // 3. If Type(locales) is String or Type(locales) is Object and locales has an [[InitializedLocale]] internal slot, then - // TODO: check if Type(locales) is object and handle the internal slots - let o = if locales.is_string() { - // a. Let O be CreateArrayFromList(« locales »). - Array::create_array_from_list([locales.clone()], context) - } else { - // 4. Else, - // a. Let O be ? ToObject(locales). - locales.to_object(context)? - }; - - // 5. Let len be ? ToLength(? Get(O, "length")). - let len = o.length_of_array_like(context)?; - - // 6 Let k be 0. - // 7. Repeat, while k < len, - for k in 0..len { - // a. Let Pk be ToString(k). - // b. Let kPresent be ? HasProperty(O, Pk). - let k_present = o.has_property(k, context)?; - // c. If kPresent is true, then - if k_present { - // i. Let kValue be ? Get(O, Pk). - let k_value = o.get(k, context)?; - // ii. If Type(kValue) is not String or Object, throw a TypeError exception. - if !(k_value.is_object() || k_value.is_string()) { - return Err(JsNativeError::typ() - .with_message("locale should be a String or Object") - .into()); - } - // iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]] internal slot, then - // TODO: handle checks for InitializedLocale internal slot (there should be an if statement here) - // 1. Let tag be kValue.[[Locale]]. - // iv. Else, - // 1. Let tag be ? ToString(kValue). - // v. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. - let mut tag = k_value - .to_string(context)? - .to_std_string() - .ok() - .and_then(|tag| tag.parse().ok()) - .ok_or_else(|| { - JsNativeError::range() - .with_message("locale is not a structurally valid language tag") - })?; - - // vi. Let canonicalizedTag be CanonicalizeUnicodeLocaleId(tag). - canonicalize_unicode_locale_id(&mut tag, context.icu().locale_canonicalizer()); - seen.insert(tag); - // vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen. - } - // d. Increase k by 1. - } - - // 8. Return seen. - Ok(seen.into_iter().collect()) -} - -/// `LocaleDataRecord` is the type of `locale_data` argument in `resolve_locale` subroutine. -/// -/// It is an alias for a map where key is a string and value is another map. -/// -/// Value of that inner map is a vector of strings representing locale parameters. -type LocaleDataRecord = FxHashMap>>; - -/// `DateTimeFormatRecord` type aggregates `locale_matcher` selector and `properties` map. -/// -/// It is used as a type of `options` parameter in `resolve_locale` subroutine. -#[derive(Debug)] -struct DateTimeFormatRecord { - pub(crate) locale_matcher: String, - pub(crate) properties: FxHashMap, -} - -/// `ResolveLocaleRecord` type consists of unicode `locale` string, `data_locale` string and `properties` map. -/// -/// This is a return value for `resolve_locale` subroutine. -#[derive(Debug)] -struct ResolveLocaleRecord { - pub(crate) locale: String, - pub(crate) properties: FxHashMap, - pub(crate) data_locale: String, -} - -/// Abstract operation `ResolveLocale ( availableLocales, requestedLocales, options, relevantExtensionKeys, localeData )` -/// -/// Compares a BCP 47 language priority list `requestedLocales` against the locales -/// in `availableLocales` and determines the best available language to meet the request. -/// `availableLocales`, `requestedLocales`, and `relevantExtensionKeys` must be provided as -/// `List` values, options and `localeData` as Records. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma402/#sec-resolvelocale -#[allow(dead_code)] -fn resolve_locale( - available_locales: &[&str], - requested_locales: &[&str], - options: &DateTimeFormatRecord, - relevant_extension_keys: &[&str], - locale_data: &LocaleDataRecord, - context: &mut Context, -) -> ResolveLocaleRecord { - // 1. Let matcher be options.[[localeMatcher]]. - let matcher = &options.locale_matcher; - // 2. If matcher is "lookup", then - // a. Let r be ! LookupMatcher(availableLocales, requestedLocales). - // 3. Else, - // a. Let r be ! BestFitMatcher(availableLocales, requestedLocales). - let r = if matcher == "lookup" { - lookup_matcher( - available_locales, - requested_locales, - context.icu().locale_canonicalizer(), - ) - } else { - best_fit_matcher( - available_locales, - requested_locales, - context.icu().locale_canonicalizer(), - ) - }; - - // 4. Let foundLocale be r.[[locale]]. - let mut found_locale = r.locale; - - // 5. Let result be a new Record. - let mut result = ResolveLocaleRecord { - locale: String::new(), - properties: FxHashMap::default(), - data_locale: String::new(), - }; - - // 6. Set result.[[dataLocale]] to foundLocale. - result.data_locale = found_locale.clone(); - - // 7. If r has an [[extension]] field, then - let keywords = if r.extension.is_empty() { - Vec::::new() - } else { - // a. Let components be ! UnicodeExtensionComponents(r.[[extension]]). - let components = unicode_extension_components(&r.extension); - // b. Let keywords be components.[[Keywords]]. - components.keywords - }; - - // 8. Let supportedExtension be "-u". - let mut supported_extension = String::from("-u"); - - // 9. For each element key of relevantExtensionKeys, do - for &key in relevant_extension_keys { - // a. Let foundLocaleData be localeData.[[]]. - // TODO b. Assert: Type(foundLocaleData) is Record. - let found_locale_data = locale_data - .get(&found_locale) - .map_or_else(FxHashMap::default, Clone::clone); - - // c. Let keyLocaleData be foundLocaleData.[[]]. - // TODO d. Assert: Type(keyLocaleData) is List. - let key_locale_data = found_locale_data - .get(key) - .map_or_else(Vec::new, Clone::clone); - - // e. Let value be keyLocaleData[0]. - // TODO f. Assert: Type(value) is either String or Null. - let mut value = key_locale_data - .get(0) - .map_or_else(JsValue::null, |first_elt| first_elt.clone().into()); - - // g. Let supportedExtensionAddition be "". - let mut supported_extension_addition = String::new(); - - // h. If r has an [[extension]] field, then - if !r.extension.is_empty() { - // i. If keywords contains an element whose [[Key]] is the same as key, then - // 1. Let entry be the element of keywords whose [[Key]] is the same as key. - let maybe_entry = keywords.iter().find(|elem| key.eq(&elem.key)); - if let Some(entry) = maybe_entry { - // 2. Let requestedValue be entry.[[Value]]. - let requested_value = &entry.value; - - // 3. If requestedValue is not the empty String, then - if !requested_value.is_empty() { - // a. If keyLocaleData contains requestedValue, then - if key_locale_data.iter().any(|s| s == requested_value) { - // i. Let value be requestedValue. - value = requested_value.clone().into(); - // ii. Let supportedExtensionAddition be the string-concatenation - // of "-", key, "-", and value. - supported_extension_addition = format!("-{key}-{requested_value}"); - } - // 4. Else if keyLocaleData contains "true", then - } else if key_locale_data.iter().any(|s| s == "true") { - // a. Let value be "true". - value = "true".into(); - // b. Let supportedExtensionAddition be the string-concatenation of "-" and key. - supported_extension_addition = format!("-{key}"); - } - } - } - - // i. If options has a field [[]], then - if options.properties.contains_key(key) { - // i. Let optionsValue be options.[[]]. - // TODO ii. Assert: Type(optionsValue) is either String, Undefined, or Null. - let mut options_value = options - .properties - .get(key) - .unwrap_or(&JsValue::undefined()) - .clone(); - - // iii. If Type(optionsValue) is String, then - if options_value.is_string() { - // TODO 1. Let optionsValue be the string optionsValue after performing the - // algorithm steps to transform Unicode extension values to canonical syntax - // per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale - // Identifiers, treating key as ukey and optionsValue as uvalue productions. - - // TODO 2. Let optionsValue be the string optionsValue after performing the - // algorithm steps to replace Unicode extension values with their canonical - // form per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode - // Locale Identifiers, treating key as ukey and optionsValue as uvalue - // productions. - - // 3. If optionsValue is the empty String, then - if let Some(options_val_str) = options_value.as_string() { - if options_val_str.is_empty() { - // a. Let optionsValue be "true". - options_value = "true".into(); - } - } - } - - // iv. If keyLocaleData contains optionsValue, then - let options_val_str = options_value - .to_string(context) - .unwrap_or_else(|_| "".into()) - .to_std_string_escaped(); - if key_locale_data.iter().any(|s| s == &options_val_str) { - // 1. If SameValue(optionsValue, value) is false, then - if !options_value.eq(&value) { - // a. Let value be optionsValue. - value = options_value; - - // b. Let supportedExtensionAddition be "". - supported_extension_addition = String::new(); - } - } - } - - // j. Set result.[[]] to value. - result.properties.insert(key.to_string(), value); - - // k. Append supportedExtensionAddition to supportedExtension. - supported_extension.push_str(&supported_extension_addition); - } - - // 10. If the number of elements in supportedExtension is greater than 2, then - if supported_extension.len() > 2 { - // a. Let foundLocale be InsertUnicodeExtensionAndCanonicalize(foundLocale, supportedExtension). - found_locale = insert_unicode_extension_and_canonicalize( - &found_locale, - &supported_extension, - context.icu().locale_canonicalizer(), - ); - } - - // 11. Set result.[[locale]] to foundLocale. - result.locale = found_locale; - - // 12. Return result. - result -} - -#[allow(unused)] -pub(crate) enum GetOptionType { - String, - Boolean, -} - -/// Abstract operation `GetOption ( options, property, type, values, fallback )` -/// -/// Extracts the value of the property named `property` from the provided `options` object, -/// converts it to the required `type`, checks whether it is one of a `List` of allowed -/// `values`, and fills in a `fallback` value if necessary. If `values` is -/// undefined, there is no fixed set of values and any is permitted. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma402/#sec-getoption -#[allow(unused)] -pub(crate) fn get_option( - options: &JsObject, - property: &str, - r#type: &GetOptionType, - values: &[&str], - fallback: &JsValue, - context: &mut Context, -) -> JsResult { - // 1. Assert: Type(options) is Object. - // 2. Let value be ? Get(options, property). - let mut value = options.get(property, context)?; - - // 3. If value is undefined, return fallback. - if value.is_undefined() { - return Ok(fallback.clone()); - } - - // 4. Assert: type is "boolean" or "string". - // 5. If type is "boolean", then - // a. Set value to ! ToBoolean(value). - // 6. If type is "string", then - // a. Set value to ? ToString(value). - // 7. If values is not undefined and values does not contain an element equal to value, - // throw a RangeError exception. - value = match r#type { - GetOptionType::Boolean => JsValue::Boolean(value.to_boolean()), - GetOptionType::String => { - let string_value = value.to_string(context)?.to_std_string_escaped(); - if !values.is_empty() && !values.contains(&string_value.as_str()) { - return Err(JsNativeError::range() - .with_message("GetOption: values array does not contain value") - .into()); - } - JsValue::String(string_value.into()) - } - }; - - // 8. Return value. - Ok(value) -} - -/// Abstract operation `GetNumberOption ( options, property, minimum, maximum, fallback )` -/// -/// Extracts the value of the property named `property` from the provided `options` -/// object, converts it to a `Number value`, checks whether it is in the allowed range, -/// and fills in a `fallback` value if necessary. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma402/#sec-getnumberoption -#[allow(unused)] -pub(crate) fn get_number_option( - options: &JsObject, - property: &str, - minimum: f64, - maximum: f64, - fallback: Option, - context: &mut Context, -) -> JsResult> { - // 1. Assert: Type(options) is Object. - // 2. Let value be ? Get(options, property). - let value = options.get(property, context)?; - - // 3. Return ? DefaultNumberOption(value, minimum, maximum, fallback). - default_number_option(&value, minimum, maximum, fallback, context) -} - -/// Abstract operation `DefaultNumberOption ( value, minimum, maximum, fallback )` -/// -/// Converts `value` to a `Number value`, checks whether it is in the allowed range, -/// and fills in a `fallback` value if necessary. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma402/#sec-defaultnumberoption -#[allow(unused)] -pub(crate) fn default_number_option( - value: &JsValue, - minimum: f64, - maximum: f64, - fallback: Option, - context: &mut Context, -) -> JsResult> { - // 1. If value is undefined, return fallback. - if value.is_undefined() { - return Ok(fallback); - } - - // 2. Set value to ? ToNumber(value). - let value = value.to_number(context)?; - - // 3. If value is NaN or less than minimum or greater than maximum, throw a RangeError exception. - if value.is_nan() || value < minimum || value > maximum { - return Err(JsNativeError::range() - .with_message("DefaultNumberOption: value is out of range.") - .into()); - } - - // 4. Return floor(value). - Ok(Some(value.floor())) -} - -/// Abstract operation `CanonicalizeUnicodeLocaleId ( locale )`. -/// -/// This function differs slightly from the specification by modifying in-place -/// the provided [`Locale`] instead of creating a new canonicalized copy. -/// -/// More information: -/// - [ECMAScript reference][spec] -/// -/// [spec]: https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid -fn canonicalize_unicode_locale_id(locale: &mut Locale, canonicalizer: &LocaleCanonicalizer) { - canonicalizer.canonicalize(locale); +trait Service

{ + type LangMarker: KeyedDataMarker; + type Options; + fn resolve(locale: &mut Locale, options: &mut Self::Options, provider: &P); } diff --git a/boa_engine/src/builtins/intl/options.rs b/boa_engine/src/builtins/intl/options.rs new file mode 100644 index 00000000000..8e0533fd80b --- /dev/null +++ b/boa_engine/src/builtins/intl/options.rs @@ -0,0 +1,202 @@ +use std::{fmt::Display, str::FromStr}; + +use crate::{object::JsObject, Context, JsNativeError, JsResult, JsValue}; + +/// `IntlOptions` aggregates the `locale_matcher` selector and any other object +/// property needed for `Intl` object constructors. +/// +/// It is used as the type of the `options` parameter in the operation `resolve_locale`. +#[derive(Debug)] +pub(super) struct IntlOptions { + pub(super) matcher: LocaleMatcher, + pub(super) service_options: O, +} + +/// A type used as an option parameter inside the `Intl` [spec]. +/// +/// [spec]: https://tc39.es/ecma402 +pub(super) trait OptionType: Sized { + /// Parses a [`JsValue`] into an instance of `Self`. + /// + /// Roughly equivalent to the algorithm steps of [9.12.13.3-7][spec], but allows for parsing + /// steps instead of returning a pure string, number or boolean. + /// + /// [spec]: https://tc39.es/ecma402/#sec-getoption + fn from_value(value: JsValue, context: &mut Context) -> JsResult; +} + +trait OptionTypeParsable: FromStr {} + +impl OptionType for T +where + T::Err: Display, +{ + fn from_value(value: JsValue, context: &mut Context) -> JsResult { + value + .to_string(context)? + .to_std_string_escaped() + .parse::() + .map_err(|err| JsNativeError::range().with_message(err.to_string()).into()) + } +} + +/// The default value passed to the [`get_option`] function. +#[derive(Debug, Copy, Clone)] +pub(super) enum GetOptionDefault { + /// Throw an error if the value is `undefined`. + Required, + /// Return `None` if the value is `undefined`. + None, + /// Return T if the value is `undefined`. + Some(T), +} + +/// Abstract operation `GetOption ( options, property, type, values, fallback )` +/// +/// Extracts the value of the property named `property` from the provided `options` object, +/// converts it to the required `type`, checks whether it is one of a `List` of allowed +/// `values`, and fills in a `fallback` value if necessary. If `values` is +/// undefined, there is no fixed set of values and any is permitted. +/// +/// This is a safer alternative to `GetOption`, which tries to parse from the +/// provided property a valid variant of the provided type `T`. It doesn't accept +/// a `type` parameter since the type can specify in its implementation of [`TryFrom`] whether +/// it wants to parse from a [`str`] or convert directly from a boolean or number. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-getoption +pub(super) fn get_option( + options: &JsObject, + property: &str, + default: GetOptionDefault, + context: &mut Context, +) -> JsResult> { + // 1. Let value be ? Get(options, property). + let value = options.get(property, context)?; + + // 2. If value is undefined, then + if value.is_undefined() { + return match default { + // a. If default is required, throw a RangeError exception. + GetOptionDefault::Required => Err(JsNativeError::range() + .with_message("GetOption: option value cannot be undefined") + .into()), + // b. Return default. + GetOptionDefault::None => Ok(None), + GetOptionDefault::Some(val) => Ok(Some(val)), + }; + } + + // The steps 3 to 7 must be made for each `OptionType`. + T::from_value(value, context).map(Some) +} + +impl OptionType for bool { + fn from_value(value: JsValue, _: &mut Context) -> JsResult { + // 5. If type is "boolean", then + // a. Set value to ! ToBoolean(value). + Ok(value.to_boolean()) + } +} + +impl OptionType for String { + fn from_value(value: JsValue, context: &mut Context) -> JsResult { + // 6. If type is "string", then + // a. Set value to ? ToString(value). + value.to_string(context).map(|s| s.to_std_string_escaped()) + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(super) enum LocaleMatcher { + Lookup, + BestFit, +} + +#[derive(Debug)] +pub(super) struct ParseLocaleMatcherError; + +impl Display for ParseLocaleMatcherError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + "provided string was not `lookup` or `best fit`".fmt(f) + } +} + +impl FromStr for LocaleMatcher { + type Err = ParseLocaleMatcherError; + + fn from_str(s: &str) -> Result { + match s { + "lookup" => Ok(Self::Lookup), + "best fit" => Ok(Self::BestFit), + _ => Err(ParseLocaleMatcherError), + } + } +} + +impl OptionTypeParsable for LocaleMatcher {} + +/// Abstract operation `GetNumberOption ( options, property, minimum, maximum, fallback )` +/// +/// Extracts the value of the property named `property` from the provided `options` +/// object, converts it to a `Number value`, checks whether it is in the allowed range, +/// and fills in a `fallback` value if necessary. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-getnumberoption +#[allow(unused)] +pub(super) fn get_number_option( + options: &JsObject, + property: &str, + minimum: f64, + maximum: f64, + fallback: Option, + context: &mut Context, +) -> JsResult> { + // 1. Assert: Type(options) is Object. + // 2. Let value be ? Get(options, property). + let value = options.get(property, context)?; + + // 3. Return ? DefaultNumberOption(value, minimum, maximum, fallback). + default_number_option(&value, minimum, maximum, fallback, context) +} + +/// Abstract operation `DefaultNumberOption ( value, minimum, maximum, fallback )` +/// +/// Converts `value` to a `Number value`, checks whether it is in the allowed range, +/// and fills in a `fallback` value if necessary. +/// +/// More information: +/// - [ECMAScript reference][spec] +/// +/// [spec]: https://tc39.es/ecma402/#sec-defaultnumberoption +#[allow(unused)] +pub(super) fn default_number_option( + value: &JsValue, + minimum: f64, + maximum: f64, + fallback: Option, + context: &mut Context, +) -> JsResult> { + // 1. If value is undefined, return fallback. + if value.is_undefined() { + return Ok(fallback); + } + + // 2. Set value to ? ToNumber(value). + let value = value.to_number(context)?; + + // 3. If value is NaN or less than minimum or greater than maximum, throw a RangeError exception. + if value.is_nan() || value < minimum || value > maximum { + return Err(JsNativeError::range() + .with_message("DefaultNumberOption: value is out of range.") + .into()); + } + + // 4. Return floor(value). + Ok(Some(value.floor())) +} diff --git a/boa_engine/src/builtins/intl/tests.rs b/boa_engine/src/builtins/intl/tests.rs index d0954e351c1..365aa69b995 100644 --- a/boa_engine/src/builtins/intl/tests.rs +++ b/boa_engine/src/builtins/intl/tests.rs @@ -1,547 +1,67 @@ -use crate::{ - builtins::intl::date_time_format::{to_date_time_options, DateTimeReqs}, - builtins::intl::{ - best_available_locale, best_fit_matcher, default_locale, default_number_option, - get_number_option, get_option, insert_unicode_extension_and_canonicalize, lookup_matcher, - resolve_locale, unicode_extension_components, DateTimeFormatRecord, GetOptionType, - }, - object::JsObject, - Context, JsValue, +use icu_datetime::{ + options::preferences::HourCycle, pattern::CoarseHourCycle, + provider::calendar::TimeLengthsV1Marker, }; +use icu_locid::{ + extensions::unicode::Value, extensions_unicode_key as key, extensions_unicode_value as value, + Locale, +}; +use icu_plurals::provider::CardinalV1Marker; +use icu_provider::{DataLocale, DataProvider, DataRequest, DataRequestMetadata}; -use icu_locale_canonicalizer::LocaleCanonicalizer; -use rustc_hash::FxHashMap; - -#[test] -fn best_avail_loc() { - let no_extensions_locale = "en-US"; - let available_locales = Vec::new(); - assert_eq!( - best_available_locale(&available_locales, no_extensions_locale), - None - ); - - let no_extensions_locale = "de-DE"; - let available_locales = vec![no_extensions_locale]; - assert_eq!( - best_available_locale(&available_locales, no_extensions_locale), - Some(no_extensions_locale) - ); - - let locale_part = "fr"; - let no_extensions_locale = locale_part.to_string() + "-CA"; - let available_locales = vec![locale_part]; - assert_eq!( - best_available_locale(&available_locales, &no_extensions_locale), - Some(locale_part) - ); - - let ja_kana_t = "ja-Kana-JP-t"; - let ja_kana = "ja-Kana-JP"; - let no_extensions_locale = "ja-Kana-JP-t-it-latn-it"; - let available_locales = vec![ja_kana_t, ja_kana]; - assert_eq!( - best_available_locale(&available_locales, no_extensions_locale), - Some(ja_kana) - ); -} - -#[test] -fn lookup_match() { - let provider = icu_testdata::get_provider(); - let canonicalizer = - LocaleCanonicalizer::new(&provider).expect("Could not create canonicalizer"); - // available: [], requested: [] - let available_locales = Vec::new(); - let requested_locales = Vec::new(); - - let matcher = lookup_matcher(&available_locales, &requested_locales, &canonicalizer); - assert_eq!( - matcher.locale, - default_locale(&canonicalizer).to_string().as_str() - ); - assert_eq!(matcher.extension, ""); - - // available: [de-DE], requested: [] - let available_locales = vec!["de-DE"]; - let requested_locales = Vec::new(); - - let matcher = lookup_matcher(&available_locales, &requested_locales, &canonicalizer); - assert_eq!( - matcher.locale, - default_locale(&canonicalizer).to_string().as_str() - ); - assert_eq!(matcher.extension, ""); - - // available: [fr-FR], requested: [fr-FR-u-hc-h12] - let available_locales = vec!["fr-FR"]; - let requested_locales = vec!["fr-FR-u-hc-h12"]; - - let matcher = lookup_matcher(&available_locales, &requested_locales, &canonicalizer); - assert_eq!(matcher.locale, "fr-FR"); - assert_eq!(matcher.extension, "u-hc-h12"); - - // available: [es-ES], requested: [es-ES] - let available_locales = vec!["es-ES"]; - let requested_locales = vec!["es-ES"]; - - let matcher = best_fit_matcher(&available_locales, &requested_locales, &canonicalizer); - assert_eq!(matcher.locale, "es-ES"); - assert_eq!(matcher.extension, ""); -} - -#[test] -fn insert_unicode_ext() { - let provider = icu_testdata::get_provider(); - let canonicalizer = - LocaleCanonicalizer::new(&provider).expect("Could not create canonicalizer"); - let locale = "hu-HU"; - let ext = ""; - assert_eq!( - insert_unicode_extension_and_canonicalize(locale, ext, &canonicalizer), - locale - ); - - let locale = "hu-HU"; - let ext = "-u-hc-h12"; - assert_eq!( - insert_unicode_extension_and_canonicalize(locale, ext, &canonicalizer), - "hu-HU-u-hc-h12" - ); - - let locale = "hu-HU-x-PRIVATE"; - let ext = "-u-hc-h12"; - assert_eq!( - insert_unicode_extension_and_canonicalize(locale, ext, &canonicalizer), - "hu-HU-u-hc-h12-x-private" - ); -} - -#[test] -fn uni_ext_comp() { - let ext = "-u-ca-japanese-hc-h12"; - let components = unicode_extension_components(ext); - assert!(components.attributes.is_empty()); - assert_eq!(components.keywords.len(), 2); - assert_eq!(components.keywords[0].key, "ca"); - assert_eq!(components.keywords[0].value, "japanese"); - assert_eq!(components.keywords[1].key, "hc"); - assert_eq!(components.keywords[1].value, "h12"); - - let ext = "-u-alias-co-phonebk-ka-shifted"; - let components = unicode_extension_components(ext); - assert_eq!(components.attributes, vec![String::from("alias")]); - assert_eq!(components.keywords.len(), 2); - assert_eq!(components.keywords[0].key, "co"); - assert_eq!(components.keywords[0].value, "phonebk"); - assert_eq!(components.keywords[1].key, "ka"); - assert_eq!(components.keywords[1].value, "shifted"); - - let ext = "-u-ca-buddhist-kk-nu-thai"; - let components = unicode_extension_components(ext); - assert!(components.attributes.is_empty()); - assert_eq!(components.keywords.len(), 3); - assert_eq!(components.keywords[0].key, "ca"); - assert_eq!(components.keywords[0].value, "buddhist"); - assert_eq!(components.keywords[1].key, "kk"); - assert_eq!(components.keywords[1].value, ""); - assert_eq!(components.keywords[2].key, "nu"); - assert_eq!(components.keywords[2].value, "thai"); - - let ext = "-u-ca-islamic-civil"; - let components = unicode_extension_components(ext); - assert!(components.attributes.is_empty()); - assert_eq!(components.keywords.len(), 1); - assert_eq!(components.keywords[0].key, "ca"); - assert_eq!(components.keywords[0].value, "islamic-civil"); -} - -#[test] -fn locale_resolution() { - let mut context = Context::default(); - - // test lookup - let available_locales = Vec::new(); - let requested_locales = Vec::new(); - let relevant_extension_keys = Vec::new(); - let locale_data = FxHashMap::default(); - let options = DateTimeFormatRecord { - locale_matcher: "lookup".into(), - properties: FxHashMap::default(), - }; - - let locale_record = resolve_locale( - &available_locales, - &requested_locales, - &options, - &relevant_extension_keys, - &locale_data, - &mut context, - ); - assert_eq!( - locale_record.locale, - default_locale(context.icu().locale_canonicalizer()) - .to_string() - .as_str() - ); - assert_eq!( - locale_record.data_locale, - default_locale(context.icu().locale_canonicalizer()) - .to_string() - .as_str() - ); - assert!(locale_record.properties.is_empty()); - - // test best fit - let available_locales = Vec::new(); - let requested_locales = Vec::new(); - let relevant_extension_keys = Vec::new(); - let locale_data = FxHashMap::default(); - let options = DateTimeFormatRecord { - locale_matcher: "best-fit".into(), - properties: FxHashMap::default(), - }; - - let locale_record = resolve_locale( - &available_locales, - &requested_locales, - &options, - &relevant_extension_keys, - &locale_data, - &mut context, - ); - assert_eq!( - locale_record.locale, - default_locale(context.icu().locale_canonicalizer()) - .to_string() - .as_str() - ); - assert_eq!( - locale_record.data_locale, - default_locale(context.icu().locale_canonicalizer()) - .to_string() - .as_str() - ); - assert!(locale_record.properties.is_empty()); - - // available: [es-ES], requested: [es-ES] - let available_locales = vec!["es-ES"]; - let requested_locales = vec!["es-ES"]; - let relevant_extension_keys = Vec::new(); - let locale_data = FxHashMap::default(); - let options = DateTimeFormatRecord { - locale_matcher: "lookup".into(), - properties: FxHashMap::default(), - }; - - let locale_record = resolve_locale( - &available_locales, - &requested_locales, - &options, - &relevant_extension_keys, - &locale_data, - &mut context, - ); - assert_eq!(locale_record.locale, "es-ES"); - assert_eq!(locale_record.data_locale, "es-ES"); - assert!(locale_record.properties.is_empty()); - - // available: [zh-CN], requested: [] - let available_locales = vec!["zh-CN"]; - let requested_locales = Vec::new(); - let relevant_extension_keys = Vec::new(); - let locale_data = FxHashMap::default(); - let options = DateTimeFormatRecord { - locale_matcher: "lookup".into(), - properties: FxHashMap::default(), - }; - - let locale_record = resolve_locale( - &available_locales, - &requested_locales, - &options, - &relevant_extension_keys, - &locale_data, - &mut context, - ); - assert_eq!( - locale_record.locale, - default_locale(context.icu().locale_canonicalizer()) - .to_string() - .as_str() - ); - assert_eq!( - locale_record.data_locale, - default_locale(context.icu().locale_canonicalizer()) - .to_string() - .as_str() - ); - assert!(locale_record.properties.is_empty()); -} - -#[test] -fn get_opt() { - let mut context = Context::default(); - - let values = Vec::new(); - let fallback = JsValue::String("fallback".into()); - let options_obj = JsObject::empty(); - let option_type = GetOptionType::String; - let get_option_result = get_option( - &options_obj, - "", - &option_type, - &values, - &fallback, - &mut context, - ) - .expect("GetOption should not fail on fallback test"); - assert_eq!(get_option_result, fallback); - - let values = Vec::new(); - let fallback = JsValue::String("fallback".into()); - let options_obj = JsObject::empty(); - let locale_value = JsValue::String("en-US".into()); - options_obj - .set("Locale", locale_value.clone(), true, &mut context) - .expect("Setting a property should not fail"); - let option_type = GetOptionType::String; - let get_option_result = get_option( - &options_obj, - "Locale", - &option_type, - &values, - &fallback, - &mut context, - ) - .expect("GetOption should not fail on string test"); - assert_eq!(get_option_result, locale_value); - - let fallback = JsValue::String("fallback".into()); - let options_obj = JsObject::empty(); - let locale_string = "en-US"; - let locale_value = JsValue::String(locale_string.into()); - let values = vec![locale_string]; - options_obj - .set("Locale", locale_value.clone(), true, &mut context) - .expect("Setting a property should not fail"); - let option_type = GetOptionType::String; - let get_option_result = get_option( - &options_obj, - "Locale", - &option_type, - &values, - &fallback, - &mut context, - ) - .expect("GetOption should not fail on values test"); - assert_eq!(get_option_result, locale_value); - - let fallback = JsValue::new(false); - let options_obj = JsObject::empty(); - let boolean_value = JsValue::new(true); - let values = Vec::new(); - options_obj - .set("boolean_val", boolean_value.clone(), true, &mut context) - .expect("Setting a property should not fail"); - let option_type = GetOptionType::Boolean; - let get_option_result = get_option( - &options_obj, - "boolean_val", - &option_type, - &values, - &fallback, - &mut context, - ) - .expect("GetOption should not fail on boolean test"); - assert_eq!(get_option_result, boolean_value); - - let fallback = JsValue::String("fallback".into()); - let options_obj = JsObject::empty(); - let locale_value = JsValue::String("en-US".into()); - let other_locale_str = "de-DE"; - let values = vec![other_locale_str]; - options_obj - .set("Locale", locale_value, true, &mut context) - .expect("Setting a property should not fail"); - let option_type = GetOptionType::String; - let get_option_result = get_option( - &options_obj, - "Locale", - &option_type, - &values, - &fallback, - &mut context, - ); - assert!(get_option_result.is_err()); - - let value = JsValue::undefined(); - let minimum = 1.0; - let maximum = 10.0; - let fallback_val = 5.0; - let fallback = Some(fallback_val); - let get_option_result = - default_number_option(&value, minimum, maximum, fallback, &mut context).unwrap(); - assert_eq!(get_option_result, fallback); - - let value = JsValue::nan(); - let minimum = 1.0; - let maximum = 10.0; - let fallback = Some(5.0); - let get_option_result = default_number_option(&value, minimum, maximum, fallback, &mut context); - assert!(get_option_result.is_err()); - - let value = JsValue::new(0); - let minimum = 1.0; - let maximum = 10.0; - let fallback = Some(5.0); - let get_option_result = default_number_option(&value, minimum, maximum, fallback, &mut context); - assert!(get_option_result.is_err()); - - let value = JsValue::new(11); - let minimum = 1.0; - let maximum = 10.0; - let fallback = Some(5.0); - let get_option_result = default_number_option(&value, minimum, maximum, fallback, &mut context); - assert!(get_option_result.is_err()); - - let value_f64 = 7.0; - let value = JsValue::new(value_f64); - let minimum = 1.0; - let maximum = 10.0; - let fallback = Some(5.0); - let get_option_result = - default_number_option(&value, minimum, maximum, fallback, &mut context).unwrap(); - assert_eq!(get_option_result, Some(value_f64)); - - let options = JsObject::empty(); - let property = "fractionalSecondDigits"; - let minimum = 1.0; - let maximum = 10.0; - let fallback_val = 5.0; - let fallback = Some(fallback_val); - let get_option_result = - get_number_option(&options, property, minimum, maximum, fallback, &mut context).unwrap(); - assert_eq!(get_option_result, fallback); +use super::Service; - let options = JsObject::empty(); - let value_f64 = 8.0; - let value = JsValue::new(value_f64); - let property = "fractionalSecondDigits"; - options - .set(property, value, true, &mut context) - .expect("Setting a property should not fail"); - let minimum = 1.0; - let maximum = 10.0; - let fallback = Some(5.0); - let get_option_result = - get_number_option(&options, property, minimum, maximum, fallback, &mut context).unwrap(); - assert_eq!(get_option_result, Some(value_f64)); +struct TestOptions { + hc: Option, } -#[test] -fn to_date_time_opts() { - let mut context = Context::default(); - - let options_obj = JsObject::empty(); - options_obj - .set("timeStyle", JsObject::empty(), true, &mut context) - .expect("Setting a property should not fail"); - let date_time_opts = to_date_time_options( - &JsValue::new(options_obj), - &DateTimeReqs::Date, - &DateTimeReqs::Date, - &mut context, - ); - assert!(date_time_opts.is_err()); - - let options_obj = JsObject::empty(); - options_obj - .set("dateStyle", JsObject::empty(), true, &mut context) - .expect("Setting a property should not fail"); - let date_time_opts = to_date_time_options( - &JsValue::new(options_obj), - &DateTimeReqs::Time, - &DateTimeReqs::Time, - &mut context, - ); - assert!(date_time_opts.is_err()); - - let date_time_opts = to_date_time_options( - &JsValue::undefined(), - &DateTimeReqs::Date, - &DateTimeReqs::Date, - &mut context, - ) - .expect("toDateTimeOptions should not fail in date test"); - - let numeric_jsstring = JsValue::String("numeric".into()); - assert_eq!( - date_time_opts.get("year", &mut context).unwrap(), - numeric_jsstring - ); - assert_eq!( - date_time_opts.get("month", &mut context).unwrap(), - numeric_jsstring - ); - assert_eq!( - date_time_opts.get("day", &mut context).unwrap(), - numeric_jsstring - ); - - let date_time_opts = to_date_time_options( - &JsValue::undefined(), - &DateTimeReqs::Time, - &DateTimeReqs::Time, - &mut context, - ) - .expect("toDateTimeOptions should not fail in time test"); - - let numeric_jsstring = JsValue::String("numeric".into()); - assert_eq!( - date_time_opts.get("hour", &mut context).unwrap(), - numeric_jsstring - ); - assert_eq!( - date_time_opts.get("minute", &mut context).unwrap(), - numeric_jsstring - ); - assert_eq!( - date_time_opts.get("second", &mut context).unwrap(), - numeric_jsstring - ); - - let date_time_opts = to_date_time_options( - &JsValue::undefined(), - &DateTimeReqs::AnyAll, - &DateTimeReqs::AnyAll, - &mut context, - ) - .expect("toDateTimeOptions should not fail when testing required = 'any'"); - - let numeric_jsstring = JsValue::String("numeric".into()); - assert_eq!( - date_time_opts.get("year", &mut context).unwrap(), - numeric_jsstring - ); - assert_eq!( - date_time_opts.get("month", &mut context).unwrap(), - numeric_jsstring - ); - assert_eq!( - date_time_opts.get("day", &mut context).unwrap(), - numeric_jsstring - ); - assert_eq!( - date_time_opts.get("hour", &mut context).unwrap(), - numeric_jsstring - ); - assert_eq!( - date_time_opts.get("minute", &mut context).unwrap(), - numeric_jsstring - ); - assert_eq!( - date_time_opts.get("second", &mut context).unwrap(), - numeric_jsstring - ); +struct TestService; + +impl

Service

for TestService +where + P: DataProvider, +{ + type LangMarker = CardinalV1Marker; + + type Options = TestOptions; + + fn resolve(locale: &mut Locale, options: &mut Self::Options, provider: &P) { + let loc_hc = locale + .extensions + .unicode + .keywords + .get(&key!("hc")) + .and_then(Value::as_single_subtag) + .and_then(|s| match &**s { + "h11" => Some(HourCycle::H11), + "h12" => Some(HourCycle::H12), + "h23" => Some(HourCycle::H23), + "h24" => Some(HourCycle::H24), + _ => None, + }); + let hc = options.hc.or(loc_hc).unwrap_or_else(|| { + let req = DataRequest { + locale: &DataLocale::from(&*locale), + metadata: DataRequestMetadata::default(), + }; + let preferred = DataProvider::::load(provider, req) + .unwrap() + .take_payload() + .unwrap() + .get() + .preferred_hour_cycle; + match preferred { + CoarseHourCycle::H11H12 => HourCycle::H11, + CoarseHourCycle::H23H24 => HourCycle::H23, + } + }); + let hc_value = match hc { + HourCycle::H11 => value!("h11"), + HourCycle::H12 => value!("h12"), + HourCycle::H23 => value!("h23"), + HourCycle::H24 => value!("h24"), + }; + locale.extensions.unicode.keywords.set(key!("hc"), hc_value); + options.hc = Some(hc); + } } diff --git a/boa_engine/src/context/icu.rs b/boa_engine/src/context/icu.rs index b89f5649907..5a9299a56eb 100644 --- a/boa_engine/src/context/icu.rs +++ b/boa_engine/src/context/icu.rs @@ -1,79 +1,99 @@ -use icu_datetime::provider::{ - calendar::{DatePatternsV1Marker, DateSkeletonPatternsV1Marker, DateSymbolsV1Marker}, - week_data::WeekDataV1Marker, -}; -use icu_locale_canonicalizer::{ - provider::{AliasesV1Marker, LikelySubtagsV1Marker}, - LocaleCanonicalizer, +use std::fmt::Debug; + +use icu_locid_transform::{LocaleCanonicalizer, LocaleTransformError}; +use icu_provider::{ + yoke::{trait_hack::YokeTraitHack, Yokeable}, + zerofrom::ZeroFrom, + AnyProvider, AsDeserializingBufferProvider, AsDowncastingAnyProvider, BufferProvider, + DataError, DataProvider, DataRequest, DataResponse, KeyedDataMarker, MaybeSendSync, }; -use icu_plurals::provider::OrdinalV1Marker; -use icu_provider::prelude::*; +use serde::Deserialize; -/// Trait encompassing all the required implementations that define -/// a valid icu data provider. -pub trait BoaProvider: - ResourceProvider - + ResourceProvider - + ResourceProvider - + ResourceProvider - + ResourceProvider - + ResourceProvider - + ResourceProvider -{ +pub enum BoaProvider { + Buffer(Box), + Any(Box), +} + +impl Debug for BoaProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Buffer(_) => f.debug_tuple("Buffer").field(&"_").finish(), + Self::Any(_) => f.debug_tuple("Any").field(&"_").finish(), + } + } } -impl BoaProvider for T where - T: ResourceProvider - + ResourceProvider - + ResourceProvider - + ResourceProvider - + ResourceProvider - + ResourceProvider - + ResourceProvider - + ?Sized +impl DataProvider for BoaProvider +where + M: KeyedDataMarker + 'static, + for<'de> YokeTraitHack<>::Output>: Deserialize<'de>, + for<'a> YokeTraitHack<>::Output>: Clone, + M::Yokeable: ZeroFrom<'static, M::Yokeable>, + M::Yokeable: MaybeSendSync, { + fn load(&self, req: DataRequest<'_>) -> Result, DataError> { + match self { + BoaProvider::Buffer(provider) => provider.as_deserializing().load(req), + BoaProvider::Any(provider) => provider.as_downcasting().load(req), + } + } +} + +impl BoaProvider { + /// Creates a new [`LocaleCanonicalizer`] from the provided [`DataProvider`]. + pub(crate) fn try_new_locale_canonicalizer( + &self, + ) -> Result { + match self { + BoaProvider::Buffer(buffer) => { + LocaleCanonicalizer::try_new_with_buffer_provider(buffer) + } + BoaProvider::Any(any) => LocaleCanonicalizer::try_new_with_any_provider(any), + } + } } -/// Collection of tools initialized from a [`BoaProvider`] that are used +/// Collection of tools initialized from a [`DataProvider`] that are used /// for the functionality of `Intl`. -#[allow(unused)] -pub(crate) struct Icu { - provider: Box, +pub(crate) struct Icu

{ + provider: P, locale_canonicalizer: LocaleCanonicalizer, } -impl std::fmt::Debug for Icu { +impl Debug for Icu

{ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - #[derive(Debug)] - struct Canonicalizer; f.debug_struct("Icu") - .field("locale_canonicalizer", &Canonicalizer) + .field("provider", &self.provider) + .field("locale_canonicalizer", &"LocaleCanonicalizer") .finish() } } -impl Icu { - /// Create a new [`Icu`] from a valid [`BoaProvider`] +impl

Icu

{ + /// Gets the [`LocaleCanonicalizer`] tool. + pub(crate) fn locale_canonicalizer(&self) -> &LocaleCanonicalizer { + &self.locale_canonicalizer + } + + /// Gets the inner icu data provider + #[allow(unused)] + pub(crate) fn provider(&self) -> &P { + &self.provider + } +} + +impl Icu { + /// Creates a new [`Icu`] from a valid [`BoaProvider`] /// /// # Errors /// /// This method will return an error if any of the tools /// required cannot be constructed. - pub(crate) fn new(provider: Box) -> Result { + pub(crate) fn new(provider: BoaProvider) -> Result { + let locale_canonicalizer = provider.try_new_locale_canonicalizer()?; Ok(Self { - locale_canonicalizer: LocaleCanonicalizer::new(&*provider)?, provider, + locale_canonicalizer, }) } - - /// Get the [`LocaleCanonicalizer`] tool. - pub(crate) const fn locale_canonicalizer(&self) -> &LocaleCanonicalizer { - &self.locale_canonicalizer - } - - /// Get the inner icu data provider - #[allow(unused)] - pub(crate) fn provider(&self) -> &dyn BoaProvider { - self.provider.as_ref() - } } diff --git a/boa_engine/src/context/mod.rs b/boa_engine/src/context/mod.rs index ca21953f1ae..fc4b386142f 100644 --- a/boa_engine/src/context/mod.rs +++ b/boa_engine/src/context/mod.rs @@ -3,11 +3,10 @@ pub mod intrinsics; #[cfg(feature = "intl")] -mod icu; - -use std::collections::VecDeque; +pub(crate) mod icu; use intrinsics::{IntrinsicObjects, Intrinsics}; +use std::collections::VecDeque; #[cfg(feature = "console")] use crate::builtins::console::Console; @@ -30,13 +29,6 @@ use boa_interner::{Interner, Sym}; use boa_parser::{Error as ParseError, Parser}; use boa_profiler::Profiler; -#[cfg(feature = "intl")] -use icu_provider::DataError; - -#[doc(inline)] -#[cfg(all(feature = "intl", doc))] -pub use icu::BoaProvider; - /// ECMAScript context. It is the primary way to interact with the runtime. /// /// `Context`s constructed in a thread share the same runtime, therefore it @@ -78,7 +70,6 @@ pub use icu::BoaProvider; /// /// assert_eq!(value.as_number(), Some(12.0)) /// ``` -#[derive(Debug)] pub struct Context { /// realm holds both the global object and the environment pub(crate) realm: Realm, @@ -95,7 +86,7 @@ pub struct Context { /// ICU related utilities #[cfg(feature = "intl")] - icu: icu::Icu, + icu: icu::Icu, /// Number of instructions remaining before a forced exit #[cfg(feature = "fuzz")] @@ -108,6 +99,26 @@ pub struct Context { pub(crate) kept_alive: Vec, } +impl std::fmt::Debug for Context { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut debug = f.debug_struct("Context"); + + debug + .field("realm", &self.realm) + .field("interner", &self.interner); + + #[cfg(feature = "console")] + debug.field("console", &self.console); + + debug + .field("intrinsics", &self.intrinsics) + .field("vm", &self.vm) + .field("promise_job_queue", &self.promise_job_queue) + .field("icu", &self.icu) + .finish() + } +} + impl Default for Context { fn default() -> Self { ContextBuilder::default().build() @@ -552,7 +563,7 @@ impl Context { #[cfg(feature = "intl")] /// Get the ICU related utilities - pub(crate) const fn icu(&self) -> &icu::Icu { + pub(crate) fn icu(&self) -> &icu::Icu { &self.icu } @@ -591,11 +602,11 @@ impl Context { feature = "intl", doc = "The required data in a valid provider is specified in [`BoaProvider`]" )] -#[derive(Debug, Default)] +#[derive(Default, Debug)] pub struct ContextBuilder { interner: Option, #[cfg(feature = "intl")] - icu: Option, + icu: Option>, #[cfg(feature = "fuzz")] instructions_remaining: usize, } @@ -615,8 +626,11 @@ impl ContextBuilder { /// Provides an icu data provider to the [`Context`]. /// /// This function is only available if the `intl` feature is enabled. - #[cfg(any(feature = "intl", docs))] - pub fn icu_provider(mut self, provider: Box) -> Result { + #[cfg(feature = "intl")] + pub fn icu_provider( + mut self, + provider: icu::BoaProvider, + ) -> Result { self.icu = Some(icu::Icu::new(provider)?); Ok(self) } @@ -658,8 +672,8 @@ impl ContextBuilder { #[cfg(feature = "intl")] icu: self.icu.unwrap_or_else(|| { // TODO: Replace with a more fitting default - icu::Icu::new(Box::new(icu_testdata::get_provider())) - .expect("Failed to initialize default icu data.") + let provider = icu::BoaProvider::Any(Box::new(icu_testdata::any())); + icu::Icu::new(provider).expect("Failed to initialize default icu data.") }), #[cfg(feature = "fuzz")] instructions_remaining: self.instructions_remaining, diff --git a/boa_engine/src/string/mod.rs b/boa_engine/src/string/mod.rs index e8b9cdfd72a..72bdede031d 100644 --- a/boa_engine/src/string/mod.rs +++ b/boa_engine/src/string/mod.rs @@ -31,10 +31,12 @@ use std::{ alloc::{alloc, dealloc, Layout}, borrow::Borrow, cell::Cell, + convert::Infallible, hash::{Hash, Hasher}, ops::{Deref, Index}, ptr::{self, NonNull}, slice::SliceIndex, + str::FromStr, }; use self::common::{COMMON_STRINGS, COMMON_STRINGS_CACHE, MAX_COMMON_STRING_LENGTH}; @@ -882,6 +884,14 @@ impl PartialOrd for JsString { } } +impl FromStr for JsString { + type Err = Infallible; + + fn from_str(s: &str) -> Result { + Ok(JsString::from(s)) + } +} + /// Utility trait that adds trimming functionality to every `UTF-16` string. pub(crate) trait Utf16Trim { /// Trims both leading and trailing space from `self`. From b9ca228e43834b8fc051b13c3ebdd0095d40b6b6 Mon Sep 17 00:00:00 2001 From: jedel1043 Date: Wed, 7 Dec 2022 22:35:49 -0600 Subject: [PATCH 02/20] Implement `Intl.Locale` --- .../src/builtins/intl/date_time_format.rs | 17 + boa_engine/src/builtins/intl/locale/mod.rs | 716 +++++++++++++++++- boa_engine/src/builtins/intl/locale/tests.rs | 135 ++-- boa_engine/src/builtins/intl/locale/utils.rs | 96 ++- boa_engine/src/builtins/intl/mod.rs | 5 +- boa_engine/src/builtins/intl/options.rs | 159 ++-- boa_engine/src/builtins/intl/tests.rs | 67 -- boa_engine/src/context/icu.rs | 27 +- boa_engine/src/context/intrinsics.rs | 15 + boa_engine/src/context/mod.rs | 2 +- boa_engine/src/object/mod.rs | 34 +- test_ignore.toml | 1 - 12 files changed, 1054 insertions(+), 220 deletions(-) delete mode 100644 boa_engine/src/builtins/intl/tests.rs diff --git a/boa_engine/src/builtins/intl/date_time_format.rs b/boa_engine/src/builtins/intl/date_time_format.rs index 103220794f1..45628510e25 100644 --- a/boa_engine/src/builtins/intl/date_time_format.rs +++ b/boa_engine/src/builtins/intl/date_time_format.rs @@ -20,6 +20,23 @@ use crate::{ use boa_gc::{Finalize, Trace}; use boa_profiler::Profiler; +use icu_datetime::options::preferences::HourCycle; + +use super::options::OptionType; + +impl OptionType for HourCycle { + fn from_value(value: JsValue, context: &mut Context) -> JsResult { + match value.to_string(context)?.to_std_string_escaped().as_str() { + "h11" => Ok(HourCycle::H11), + "h12" => Ok(HourCycle::H12), + "h23" => Ok(HourCycle::H23), + "h24" => Ok(HourCycle::H24), + _ => Err(JsNativeError::range() + .with_message("provided string was not `h11`, `h12`, `h23` or `h24`") + .into()), + } + } +} /// JavaScript `Intl.DateTimeFormat` object. #[derive(Debug, Clone, Trace, Finalize)] diff --git a/boa_engine/src/builtins/intl/locale/mod.rs b/boa_engine/src/builtins/intl/locale/mod.rs index 2f96fc18792..5fe12d57f22 100644 --- a/boa_engine/src/builtins/intl/locale/mod.rs +++ b/boa_engine/src/builtins/intl/locale/mod.rs @@ -1,20 +1,712 @@ +#![allow(clippy::string_lit_as_bytes)] + +use boa_profiler::Profiler; +use icu_collator::CaseFirst; +use icu_datetime::options::preferences::HourCycle; +use icu_locid::{ + extensions::unicode::Value, + extensions_unicode_key as key, extensions_unicode_value as value, + subtags::{Language, Region, Script}, +}; +use tap::{Conv, Pipe}; + #[cfg(test)] mod tests; mod utils; - -use icu_collator::CaseFirst; -use icu_datetime::options::preferences::HourCycle; -use icu_locid::Locale; pub(super) use utils::*; -use crate::JsString; +use crate::{ + builtins::{BuiltIn, JsArgs}, + context::intrinsics::StandardConstructors, + js_string, + object::{ + internal_methods::get_prototype_from_constructor, ConstructorBuilder, FunctionBuilder, + JsObject, ObjectData, + }, + property::Attribute, + symbol::WellKnownSymbols, + Context, JsNativeError, JsResult, JsString, JsValue, +}; + +use super::options::{coerce_options_to_object, get_option, GetOptionDefault}; + +#[derive(Debug, Clone)] +pub(crate) struct Locale; + +impl BuiltIn for Locale { + const NAME: &'static str = "Locale"; + + fn init(context: &mut Context) -> Option { + let _timer = Profiler::global().start_event(Self::NAME, "init"); + + let base_name = FunctionBuilder::native(context, Self::base_name) + .name("get baseName") + .constructor(false) + .build(); + + let calendar = FunctionBuilder::native(context, Self::calendar) + .name("get calendar") + .constructor(false) + .build(); + + let case_first = FunctionBuilder::native(context, Self::case_first) + .name("get caseFirst") + .constructor(false) + .build(); + + let collation = FunctionBuilder::native(context, Self::collation) + .name("get collation") + .constructor(false) + .build(); + + let hour_cycle = FunctionBuilder::native(context, Self::hour_cycle) + .name("get hourCycle") + .constructor(false) + .build(); + + let numeric = FunctionBuilder::native(context, Self::numeric) + .name("get numeric") + .constructor(false) + .build(); + + let numbering_system = FunctionBuilder::native(context, Self::numbering_system) + .name("get numberingSystem") + .constructor(false) + .build(); + + let language = FunctionBuilder::native(context, Self::language) + .name("get language") + .constructor(false) + .build(); + + let script = FunctionBuilder::native(context, Self::script) + .name("get script") + .constructor(false) + .build(); + + let region = FunctionBuilder::native(context, Self::region) + .name("get region") + .constructor(false) + .build(); + + ConstructorBuilder::with_standard_constructor( + context, + Self::constructor, + context.intrinsics().constructors().locale().clone(), + ) + .name(Self::NAME) + .length(Self::LENGTH) + .property( + WellKnownSymbols::to_string_tag(), + "Intl.Locale", + Attribute::CONFIGURABLE, + ) + .method(Self::maximize, "maximize", 0) + .method(Self::minimize, "minimize", 0) + .method(Self::to_string, "toString", 0) + .accessor("baseName", Some(base_name), None, Attribute::CONFIGURABLE) + .accessor("calendar", Some(calendar), None, Attribute::CONFIGURABLE) + .accessor("caseFirst", Some(case_first), None, Attribute::CONFIGURABLE) + .accessor("collation", Some(collation), None, Attribute::CONFIGURABLE) + .accessor("hourCycle", Some(hour_cycle), None, Attribute::CONFIGURABLE) + .accessor("numeric", Some(numeric), None, Attribute::CONFIGURABLE) + .accessor( + "numberingSystem", + Some(numbering_system), + None, + Attribute::CONFIGURABLE, + ) + .accessor("language", Some(language), None, Attribute::CONFIGURABLE) + .accessor("script", Some(script), None, Attribute::CONFIGURABLE) + .accessor("region", Some(region), None, Attribute::CONFIGURABLE) + .build() + .conv::() + .pipe(Some) + } +} + +impl Locale { + pub(crate) const LENGTH: usize = 1; + + /// Constructor [`Intl.Locale ( tag [ , options ] )`][spec] + /// + /// Constructor for `Locale` objects. + /// + /// More information: + /// - [MDN documentation][mdn] + /// + /// [spec]: https://tc39.es/ecma402/#sec-Intl.Locale + /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Locale + pub(crate) fn constructor( + new_target: &JsValue, + args: &[JsValue], + context: &mut Context, + ) -> JsResult { + // 1. If NewTarget is undefined, throw a TypeError exception. + if new_target.is_undefined() { + return Err(JsNativeError::typ() + .with_message("cannot call `Intl.Locale` constructor without `new`") + .into()); + } + + let tag = args.get_or_undefined(0); + let options = args.get_or_undefined(1); + + // 2. Let relevantExtensionKeys be %Locale%.[[RelevantExtensionKeys]]. + // 3. Let internalSlotsList be « [[InitializedLocale]], [[Locale]], [[Calendar]], [[Collation]], [[HourCycle]], [[NumberingSystem]] ». + // 4. If relevantExtensionKeys contains "kf", then + // a. Append [[CaseFirst]] as the last element of internalSlotsList. + // 5. If relevantExtensionKeys contains "kn", then + // a. Append [[Numeric]] as the last element of internalSlotsList. + + // 7. If Type(tag) is not String or Object, throw a TypeError exception. + if !(tag.is_object() || tag.is_string()) { + return Err(JsNativeError::typ() + .with_message("Intl.Locale: `tag` should be a String or Object") + .into()); + } + + // 8. If Type(tag) is Object and tag has an [[InitializedLocale]] internal slot, then + + let mut tag = if let Some(tag) = tag + .as_object() + .and_then(|obj| obj.borrow().as_locale().cloned()) + { + // a. Let tag be tag.[[Locale]]. + tag + } + // 9. Else, + else { + // a. Let tag be ? ToString(tag). + tag.to_string(context)? + .to_std_string_escaped() + .parse() + .map_err(|_| { + JsNativeError::range() + .with_message("Intl.Locale: `tag` is not a structurally valid language tag") + })? + }; + + // 10. Set options to ? CoerceOptionsToObject(options). + let options = &coerce_options_to_object(options, context)?; + + // 11. Set tag to ? ApplyOptionsToTag(tag, options). + + // Abstract operation [`ApplyOptionsToTag ( tag, options )`][https://tc39.es/ecma402/#sec-apply-options-to-tag] + { + // 1. Assert: Type(tag) is String. + // 2. Assert: Type(options) is Object. + // 3. If ! IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + // 4. Let language be ? GetOption(options, "language", string, empty, undefined). + // 5. If language is not undefined, then + let language = + get_option::(options, "language", GetOptionDefault::None, context)? + // a. If language does not match the unicode_language_subtag production, throw a RangeError exception. + .map(|s| s.to_std_string_escaped().parse::()) + .transpose() + .map_err(|e| JsNativeError::range().with_message(e.to_string()))?; + + // 6. Let script be ? GetOption(options, "script", string, empty, undefined). + // 7. If script is not undefined, then + let script = + get_option::(options, "script", GetOptionDefault::None, context)? + .map(|s| s.to_std_string_escaped().parse::