From 6c7019cfe91c2e6de504a43594a416ea6e8d16e4 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Tue, 15 Aug 2023 16:27:13 -0700 Subject: [PATCH] Use correct BCP47 tags for islamic calendars (#3871) * Move to the correct BCP47 tags for islamic * Fix skeleta data loading * fix * clippy * fmt --- components/calendar/src/any_calendar.rs | 102 +++++++++--------- components/datetime/src/calendar.rs | 42 +++++++- components/datetime/src/provider/date_time.rs | 9 ++ .../tests/fixtures/tests/components.json | 12 +-- 4 files changed, 104 insertions(+), 61 deletions(-) diff --git a/components/calendar/src/any_calendar.rs b/components/calendar/src/any_calendar.rs index acb73584031..1ade14e2070 100644 --- a/components/calendar/src/any_calendar.rs +++ b/components/calendar/src/any_calendar.rs @@ -904,9 +904,9 @@ impl AnyCalendarKind { b"gregory" => AnyCalendarKind::Gregorian, b"hebrew" => AnyCalendarKind::Hebrew, b"indian" => AnyCalendarKind::Indian, - b"islamic_civil" => AnyCalendarKind::IslamicCivil, - b"islamic_tbla" => AnyCalendarKind::IslamicTabular, - b"islamic_umalqura" => AnyCalendarKind::IslamicUmmAlQura, + b"islamic-civil" | b"islamicc" => AnyCalendarKind::IslamicCivil, + b"islamic-tbla" => AnyCalendarKind::IslamicTabular, + b"islamic-umalqura" => AnyCalendarKind::IslamicUmmAlQura, b"islamic" => AnyCalendarKind::IslamicObservational, b"iso" => AnyCalendarKind::Iso, b"japanese" => AnyCalendarKind::Japanese, @@ -925,47 +925,46 @@ impl AnyCalendarKind { /// Returns None if the calendar is unknown. If you prefer an error, use /// [`CalendarError::unknown_any_calendar_kind`]. pub fn get_for_bcp47_value(x: &Value) -> Option { - Some(if *x == value!("buddhist") { - AnyCalendarKind::Buddhist - } else if *x == value!("chinese") { - AnyCalendarKind::Chinese - } else if *x == value!("coptic") { - AnyCalendarKind::Coptic - } else if *x == value!("dangi") { - AnyCalendarKind::Dangi - } else if *x == value!("ethioaa") { - AnyCalendarKind::EthiopianAmeteAlem - } else if *x == value!("ethiopic") { - AnyCalendarKind::Ethiopian - } else if *x == value!("gregory") { - AnyCalendarKind::Gregorian - } else if *x == value!("hebrew") { - AnyCalendarKind::Hebrew - } else if *x == value!("indian") { - AnyCalendarKind::Indian - } else if *x == value!("islamic") { - AnyCalendarKind::IslamicObservational - } else if *x == value!("islamicc") { - AnyCalendarKind::IslamicCivil - } else if *x == value!("iso") { - AnyCalendarKind::Iso - } else if *x == value!("japanese") { - AnyCalendarKind::Japanese - } else if *x == value!("japanext") { - AnyCalendarKind::JapaneseExtended - } else if *x == value!("persian") { - AnyCalendarKind::Persian - } else if *x == value!("roc") { - AnyCalendarKind::Roc - } else if *x == value!("tbla") { - AnyCalendarKind::IslamicTabular - } else if *x == value!("umalqura") { - AnyCalendarKind::IslamicUmmAlQura - } else { - // Log a warning when a calendar value is passed in but doesn't match any calendars - DataError::custom("bcp47_value did not match any calendars").with_display_context(x); - return None; - }) + let slice = x.as_tinystr_slice(); + + if slice.len() <= 2 { + if let Some(first) = slice.get(0) { + if let Some(second) = slice.get(1) { + if first == "islamic" { + match second.as_str() { + "civil" => return Some(AnyCalendarKind::IslamicCivil), + "tbla" => return Some(AnyCalendarKind::IslamicTabular), + "umalqura" => return Some(AnyCalendarKind::IslamicUmmAlQura), + _ => (), + } + } + } else { + match first.as_str() { + "buddhist" => return Some(AnyCalendarKind::Buddhist), + "chinese" => return Some(AnyCalendarKind::Chinese), + "coptic" => return Some(AnyCalendarKind::Coptic), + "dangi" => return Some(AnyCalendarKind::Dangi), + "ethioaa" => return Some(AnyCalendarKind::EthiopianAmeteAlem), + "ethiopic" => return Some(AnyCalendarKind::Ethiopian), + "gregory" => return Some(AnyCalendarKind::Gregorian), + "hebrew" => return Some(AnyCalendarKind::Hebrew), + "indian" => return Some(AnyCalendarKind::Indian), + "islamic" => return Some(AnyCalendarKind::IslamicObservational), + "islamicc" => return Some(AnyCalendarKind::IslamicCivil), + "iso" => return Some(AnyCalendarKind::Iso), + "japanese" => return Some(AnyCalendarKind::Japanese), + "japanext" => return Some(AnyCalendarKind::JapaneseExtended), + "persian" => return Some(AnyCalendarKind::Persian), + "roc" => return Some(AnyCalendarKind::Roc), + _ => (), + } + } + } + } + + // Log a warning when a calendar value is passed in but doesn't match any calendars + DataError::custom("bcp47_value did not match any calendars").with_display_context(x); + None } /// Convert to a BCP-47 string @@ -980,10 +979,10 @@ impl AnyCalendarKind { AnyCalendarKind::Gregorian => "gregory", AnyCalendarKind::Hebrew => "hebrew", AnyCalendarKind::Indian => "indian", - AnyCalendarKind::IslamicCivil => "islamicc", + AnyCalendarKind::IslamicCivil => "islamic-civil", AnyCalendarKind::IslamicObservational => "islamic", - AnyCalendarKind::IslamicTabular => "tbla", - AnyCalendarKind::IslamicUmmAlQura => "umalqura", + AnyCalendarKind::IslamicTabular => "islamic-tbla", + AnyCalendarKind::IslamicUmmAlQura => "islamic-umalqura", AnyCalendarKind::Iso => "iso", AnyCalendarKind::Japanese => "japanese", AnyCalendarKind::JapaneseExtended => "japanext", @@ -993,6 +992,7 @@ impl AnyCalendarKind { } /// Convert to a BCP-47 `Value` + #[allow(clippy::unwrap_used)] // these are known-good BCP47 unicode extension values pub fn as_bcp47_value(self) -> Value { match self { AnyCalendarKind::Buddhist => value!("buddhist"), @@ -1004,10 +1004,12 @@ impl AnyCalendarKind { AnyCalendarKind::Gregorian => value!("gregory"), AnyCalendarKind::Hebrew => value!("hebrew"), AnyCalendarKind::Indian => value!("indian"), - AnyCalendarKind::IslamicCivil => value!("islamicc"), + AnyCalendarKind::IslamicCivil => Value::try_from_bytes(b"islamic-civil").unwrap(), AnyCalendarKind::IslamicObservational => value!("islamic"), - AnyCalendarKind::IslamicTabular => value!("tbla"), - AnyCalendarKind::IslamicUmmAlQura => value!("umalqura"), + AnyCalendarKind::IslamicTabular => Value::try_from_bytes(b"islamic-tbla").unwrap(), + AnyCalendarKind::IslamicUmmAlQura => { + Value::try_from_bytes(b"islamic-umalqura").unwrap() + } AnyCalendarKind::Iso => value!("iso"), AnyCalendarKind::Japanese => value!("japanese"), AnyCalendarKind::JapaneseExtended => value!("japanext"), diff --git a/components/datetime/src/calendar.rs b/components/datetime/src/calendar.rs index be8014b96bd..1b31f5e4d69 100644 --- a/components/datetime/src/calendar.rs +++ b/components/datetime/src/calendar.rs @@ -14,15 +14,17 @@ use icu_calendar::{ }; use icu_locid::extensions::unicode::{value, Value}; use icu_provider::prelude::*; - +use tinystr::{tinystr, TinyAsciiStr}; /// A calendar that can be found in CLDR /// /// New implementors of this trait will likely also wish to modify `get_era_code_map()` /// in the CLDR transformer to support any new era maps. pub trait CldrCalendar { - /// The Unicode BCP 47 identifier for the calendar + /// The Unicode BCP 47 identifier for the calendar's skeleton /// If multiple BCP 47 identifiers work, this should be /// the default one when no others are provided + /// + /// If `is_identifier_allowed_for_calendar()` is set, this only is used for loading skeletons data const DEFAULT_BCP_47_IDENTIFIER: Value; /// The data marker for loading symbols for this calendar. @@ -39,6 +41,18 @@ pub trait CldrCalendar { } } +/// Check if the provided value is of the form `islamic-{subcal}` +fn is_islamic_subcal(value: &Value, subcal: TinyAsciiStr<8>) -> bool { + let slice = value.as_tinystr_slice(); + if slice.len() > 2 { + return false; + } + if let (Some(first), Some(second)) = (slice.get(0), slice.get(1)) { + return *first == tinystr!(8, "islamic") && *second == subcal; + } + + false +} impl CldrCalendar for Buddhist { const DEFAULT_BCP_47_IDENTIFIER: Value = value!("buddhist"); type DateSymbolsV1Marker = BuddhistDateSymbolsV1Marker; @@ -91,9 +105,15 @@ impl CldrCalendar for Indian { } impl CldrCalendar for IslamicCivil { - const DEFAULT_BCP_47_IDENTIFIER: Value = value!("islamicc"); + // this value is not actually a valid identifier for this calendar, + // however since we are overriding is_identifier_allowed_for_calendar we are using + // this solely for its effects on skeleton data loading + const DEFAULT_BCP_47_IDENTIFIER: Value = value!("islamic"); type DateSymbolsV1Marker = IslamicDateSymbolsV1Marker; type DateLengthsV1Marker = IslamicDateLengthsV1Marker; + fn is_identifier_allowed_for_calendar(value: &Value) -> bool { + *value == value!("islamicc") || is_islamic_subcal(value, tinystr!(8, "civil")) + } } impl CldrCalendar for IslamicObservational { @@ -103,15 +123,27 @@ impl CldrCalendar for IslamicObservational { } impl CldrCalendar for IslamicTabular { - const DEFAULT_BCP_47_IDENTIFIER: Value = value!("tbla"); + // this value is not actually a valid identifier for this calendar, + // however since we are overriding is_identifier_allowed_for_calendar we are using + // this solely for its effects on skeleton data loading + const DEFAULT_BCP_47_IDENTIFIER: Value = value!("islamic"); type DateSymbolsV1Marker = IslamicDateSymbolsV1Marker; type DateLengthsV1Marker = IslamicDateLengthsV1Marker; + fn is_identifier_allowed_for_calendar(value: &Value) -> bool { + is_islamic_subcal(value, tinystr!(8, "tbla")) + } } impl CldrCalendar for IslamicUmmAlQura { - const DEFAULT_BCP_47_IDENTIFIER: Value = value!("umalqura"); + // this value is not actually a valid identifier for this calendar, + // however since we are overriding is_identifier_allowed_for_calendar we are using + // this solely for its effects on skeleton data loading + const DEFAULT_BCP_47_IDENTIFIER: Value = value!("islamic"); type DateSymbolsV1Marker = IslamicDateSymbolsV1Marker; type DateLengthsV1Marker = IslamicDateLengthsV1Marker; + fn is_identifier_allowed_for_calendar(value: &Value) -> bool { + is_islamic_subcal(value, tinystr!(8, "umalqura")) + } } impl CldrCalendar for Japanese { diff --git a/components/datetime/src/provider/date_time.rs b/components/datetime/src/provider/date_time.rs index f05e521c150..e50826b5f60 100644 --- a/components/datetime/src/provider/date_time.rs +++ b/components/datetime/src/provider/date_time.rs @@ -280,15 +280,23 @@ where #[cfg(feature = "experimental")] fn skeleton_data_payload(&self) -> Result> { use icu_locid::extensions::unicode::{key, value}; + use tinystr::tinystr; let mut locale = self.locale.clone(); #[allow(clippy::expect_used)] // experimental let cal_val = self.cal_val.expect("should be present for components bag"); // Skeleton data for ethioaa is stored under ethiopic if cal_val == &value!("ethioaa") { locale.set_unicode_ext(key!("ca"), value!("ethiopic")); + } else if cal_val == &value!("islamic") + || cal_val == &value!("islamicc") + || cal_val.as_tinystr_slice().get(0) == Some(&tinystr!(8, "islamic")) + { + // All islamic calendars store skeleton data under islamic, not their individual extension keys + locale.set_unicode_ext(key!("ca"), value!("islamic")); } else { locale.set_unicode_ext(key!("ca"), cal_val.clone()); }; + let data = self .data_provider .load(DataRequest { @@ -296,6 +304,7 @@ where metadata: Default::default(), })? .take_payload()?; + Ok(data) } } diff --git a/components/datetime/tests/fixtures/tests/components.json b/components/datetime/tests/fixtures/tests/components.json index b5cb3a3acea..91479e35030 100644 --- a/components/datetime/tests/fixtures/tests/components.json +++ b/components/datetime/tests/fixtures/tests/components.json @@ -31,12 +31,12 @@ "en-u-ca-indian": "Tuesday, Magha 01, 1941 Saka, 08:25:07", "en-u-ca-islamic": "Tuesday, Jumada I 25, 1441 AH, 08:25:07", "fr-u-ca-islamic": "mardi 25 joumada al oula 1441 ère de l’Hégire 08:25:07", - "en-u-ca-islamicc": "Tuesday, Jumada I 25, 1441 AH, 08:25:07", - "fr-u-ca-islamicc": "mardi 25 joumada al oula 1441 ère de l’Hégire 08:25:07", - "en-u-ca-umalqura": "Tuesday, Jumada I 26, 1441 AH, 08:25:07", - "fr-u-ca-umalqura": "mardi 26 joumada al oula 1441 ère de l’Hégire 08:25:07", - "en-u-ca-tbla": "Tuesday, Jumada I 26, 1441 AH, 08:25:07", - "fr-u-ca-tbla": "mardi 26 joumada al oula 1441 ère de l’Hégire 08:25:07", + "en-u-ca-islamic-civil": "Tuesday, Jumada I 25, 1441 AH, 08:25:07", + "fr-u-ca-islamic-civil": "mardi 25 joumada al oula 1441 ère de l’Hégire 08:25:07", + "en-u-ca-islamic-umalqura": "Tuesday, Jumada I 26, 1441 AH, 08:25:07", + "fr-u-ca-islamic-umalqura": "mardi 26 joumada al oula 1441 ère de l’Hégire 08:25:07", + "en-u-ca-islamic-tbla": "Tuesday, Jumada I 26, 1441 AH, 08:25:07", + "fr-u-ca-islamic-tbla": "mardi 26 joumada al oula 1441 ère de l’Hégire 08:25:07", "en-u-ca-persian": "Tuesday, Bahman 01, 1398 AP, 08:25:07", "fr-u-ca-persian": "mardi 01 bahman 1398 Anno Persico 08:25:07", "en-u-ca-hebrew": "Tuesday, 24 Tevet 5780 AM, 08:25:07",