Skip to content

Commit

Permalink
Use correct BCP47 tags for islamic calendars (#3871)
Browse files Browse the repository at this point in the history
* Move to the correct BCP47 tags for islamic

* Fix skeleta data loading

* fix

* clippy

* fmt
  • Loading branch information
Manishearth authored Aug 15, 2023
1 parent 9fbcc11 commit 6c7019c
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 61 deletions.
102 changes: 52 additions & 50 deletions components/calendar/src/any_calendar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -904,9 +904,9 @@ impl AnyCalendarKind {
b"gregory" => AnyCalendarKind::Gregorian,
b"hebrew" => AnyCalendarKind::Hebrew,
b"indian" => AnyCalendarKind::Indian,
b"islamic_civil" => AnyCalendarKind::IslamicCivil,
b"islamic_tbla" => AnyCalendarKind::IslamicTabular,
b"islamic_umalqura" => AnyCalendarKind::IslamicUmmAlQura,
b"islamic-civil" | b"islamicc" => AnyCalendarKind::IslamicCivil,
b"islamic-tbla" => AnyCalendarKind::IslamicTabular,
b"islamic-umalqura" => AnyCalendarKind::IslamicUmmAlQura,
b"islamic" => AnyCalendarKind::IslamicObservational,
b"iso" => AnyCalendarKind::Iso,
b"japanese" => AnyCalendarKind::Japanese,
Expand All @@ -925,47 +925,46 @@ impl AnyCalendarKind {
/// Returns None if the calendar is unknown. If you prefer an error, use
/// [`CalendarError::unknown_any_calendar_kind`].
pub fn get_for_bcp47_value(x: &Value) -> Option<Self> {
Some(if *x == value!("buddhist") {
AnyCalendarKind::Buddhist
} else if *x == value!("chinese") {
AnyCalendarKind::Chinese
} else if *x == value!("coptic") {
AnyCalendarKind::Coptic
} else if *x == value!("dangi") {
AnyCalendarKind::Dangi
} else if *x == value!("ethioaa") {
AnyCalendarKind::EthiopianAmeteAlem
} else if *x == value!("ethiopic") {
AnyCalendarKind::Ethiopian
} else if *x == value!("gregory") {
AnyCalendarKind::Gregorian
} else if *x == value!("hebrew") {
AnyCalendarKind::Hebrew
} else if *x == value!("indian") {
AnyCalendarKind::Indian
} else if *x == value!("islamic") {
AnyCalendarKind::IslamicObservational
} else if *x == value!("islamicc") {
AnyCalendarKind::IslamicCivil
} else if *x == value!("iso") {
AnyCalendarKind::Iso
} else if *x == value!("japanese") {
AnyCalendarKind::Japanese
} else if *x == value!("japanext") {
AnyCalendarKind::JapaneseExtended
} else if *x == value!("persian") {
AnyCalendarKind::Persian
} else if *x == value!("roc") {
AnyCalendarKind::Roc
} else if *x == value!("tbla") {
AnyCalendarKind::IslamicTabular
} else if *x == value!("umalqura") {
AnyCalendarKind::IslamicUmmAlQura
} else {
// Log a warning when a calendar value is passed in but doesn't match any calendars
DataError::custom("bcp47_value did not match any calendars").with_display_context(x);
return None;
})
let slice = x.as_tinystr_slice();

if slice.len() <= 2 {
if let Some(first) = slice.get(0) {
if let Some(second) = slice.get(1) {
if first == "islamic" {
match second.as_str() {
"civil" => return Some(AnyCalendarKind::IslamicCivil),
"tbla" => return Some(AnyCalendarKind::IslamicTabular),
"umalqura" => return Some(AnyCalendarKind::IslamicUmmAlQura),
_ => (),
}
}
} else {
match first.as_str() {
"buddhist" => return Some(AnyCalendarKind::Buddhist),
"chinese" => return Some(AnyCalendarKind::Chinese),
"coptic" => return Some(AnyCalendarKind::Coptic),
"dangi" => return Some(AnyCalendarKind::Dangi),
"ethioaa" => return Some(AnyCalendarKind::EthiopianAmeteAlem),
"ethiopic" => return Some(AnyCalendarKind::Ethiopian),
"gregory" => return Some(AnyCalendarKind::Gregorian),
"hebrew" => return Some(AnyCalendarKind::Hebrew),
"indian" => return Some(AnyCalendarKind::Indian),
"islamic" => return Some(AnyCalendarKind::IslamicObservational),
"islamicc" => return Some(AnyCalendarKind::IslamicCivil),
"iso" => return Some(AnyCalendarKind::Iso),
"japanese" => return Some(AnyCalendarKind::Japanese),
"japanext" => return Some(AnyCalendarKind::JapaneseExtended),
"persian" => return Some(AnyCalendarKind::Persian),
"roc" => return Some(AnyCalendarKind::Roc),
_ => (),
}
}
}
}

// Log a warning when a calendar value is passed in but doesn't match any calendars
DataError::custom("bcp47_value did not match any calendars").with_display_context(x);
None
}

/// Convert to a BCP-47 string
Expand All @@ -980,10 +979,10 @@ impl AnyCalendarKind {
AnyCalendarKind::Gregorian => "gregory",
AnyCalendarKind::Hebrew => "hebrew",
AnyCalendarKind::Indian => "indian",
AnyCalendarKind::IslamicCivil => "islamicc",
AnyCalendarKind::IslamicCivil => "islamic-civil",
AnyCalendarKind::IslamicObservational => "islamic",
AnyCalendarKind::IslamicTabular => "tbla",
AnyCalendarKind::IslamicUmmAlQura => "umalqura",
AnyCalendarKind::IslamicTabular => "islamic-tbla",
AnyCalendarKind::IslamicUmmAlQura => "islamic-umalqura",
AnyCalendarKind::Iso => "iso",
AnyCalendarKind::Japanese => "japanese",
AnyCalendarKind::JapaneseExtended => "japanext",
Expand All @@ -993,6 +992,7 @@ impl AnyCalendarKind {
}

/// Convert to a BCP-47 `Value`
#[allow(clippy::unwrap_used)] // these are known-good BCP47 unicode extension values
pub fn as_bcp47_value(self) -> Value {
match self {
AnyCalendarKind::Buddhist => value!("buddhist"),
Expand All @@ -1004,10 +1004,12 @@ impl AnyCalendarKind {
AnyCalendarKind::Gregorian => value!("gregory"),
AnyCalendarKind::Hebrew => value!("hebrew"),
AnyCalendarKind::Indian => value!("indian"),
AnyCalendarKind::IslamicCivil => value!("islamicc"),
AnyCalendarKind::IslamicCivil => Value::try_from_bytes(b"islamic-civil").unwrap(),
AnyCalendarKind::IslamicObservational => value!("islamic"),
AnyCalendarKind::IslamicTabular => value!("tbla"),
AnyCalendarKind::IslamicUmmAlQura => value!("umalqura"),
AnyCalendarKind::IslamicTabular => Value::try_from_bytes(b"islamic-tbla").unwrap(),
AnyCalendarKind::IslamicUmmAlQura => {
Value::try_from_bytes(b"islamic-umalqura").unwrap()
}
AnyCalendarKind::Iso => value!("iso"),
AnyCalendarKind::Japanese => value!("japanese"),
AnyCalendarKind::JapaneseExtended => value!("japanext"),
Expand Down
42 changes: 37 additions & 5 deletions components/datetime/src/calendar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@ use icu_calendar::{
};
use icu_locid::extensions::unicode::{value, Value};
use icu_provider::prelude::*;

use tinystr::{tinystr, TinyAsciiStr};
/// A calendar that can be found in CLDR
///
/// New implementors of this trait will likely also wish to modify `get_era_code_map()`
/// in the CLDR transformer to support any new era maps.
pub trait CldrCalendar {
/// The Unicode BCP 47 identifier for the calendar
/// The Unicode BCP 47 identifier for the calendar's skeleton
/// If multiple BCP 47 identifiers work, this should be
/// the default one when no others are provided
///
/// If `is_identifier_allowed_for_calendar()` is set, this only is used for loading skeletons data
const DEFAULT_BCP_47_IDENTIFIER: Value;

/// The data marker for loading symbols for this calendar.
Expand All @@ -39,6 +41,18 @@ pub trait CldrCalendar {
}
}

/// Check if the provided value is of the form `islamic-{subcal}`
fn is_islamic_subcal(value: &Value, subcal: TinyAsciiStr<8>) -> bool {
let slice = value.as_tinystr_slice();
if slice.len() > 2 {
return false;
}
if let (Some(first), Some(second)) = (slice.get(0), slice.get(1)) {
return *first == tinystr!(8, "islamic") && *second == subcal;
}

false
}
impl CldrCalendar for Buddhist {
const DEFAULT_BCP_47_IDENTIFIER: Value = value!("buddhist");
type DateSymbolsV1Marker = BuddhistDateSymbolsV1Marker;
Expand Down Expand Up @@ -91,9 +105,15 @@ impl CldrCalendar for Indian {
}

impl CldrCalendar for IslamicCivil {
const DEFAULT_BCP_47_IDENTIFIER: Value = value!("islamicc");
// this value is not actually a valid identifier for this calendar,
// however since we are overriding is_identifier_allowed_for_calendar we are using
// this solely for its effects on skeleton data loading
const DEFAULT_BCP_47_IDENTIFIER: Value = value!("islamic");
type DateSymbolsV1Marker = IslamicDateSymbolsV1Marker;
type DateLengthsV1Marker = IslamicDateLengthsV1Marker;
fn is_identifier_allowed_for_calendar(value: &Value) -> bool {
*value == value!("islamicc") || is_islamic_subcal(value, tinystr!(8, "civil"))
}
}

impl CldrCalendar for IslamicObservational {
Expand All @@ -103,15 +123,27 @@ impl CldrCalendar for IslamicObservational {
}

impl CldrCalendar for IslamicTabular {
const DEFAULT_BCP_47_IDENTIFIER: Value = value!("tbla");
// this value is not actually a valid identifier for this calendar,
// however since we are overriding is_identifier_allowed_for_calendar we are using
// this solely for its effects on skeleton data loading
const DEFAULT_BCP_47_IDENTIFIER: Value = value!("islamic");
type DateSymbolsV1Marker = IslamicDateSymbolsV1Marker;
type DateLengthsV1Marker = IslamicDateLengthsV1Marker;
fn is_identifier_allowed_for_calendar(value: &Value) -> bool {
is_islamic_subcal(value, tinystr!(8, "tbla"))
}
}

impl CldrCalendar for IslamicUmmAlQura {
const DEFAULT_BCP_47_IDENTIFIER: Value = value!("umalqura");
// this value is not actually a valid identifier for this calendar,
// however since we are overriding is_identifier_allowed_for_calendar we are using
// this solely for its effects on skeleton data loading
const DEFAULT_BCP_47_IDENTIFIER: Value = value!("islamic");
type DateSymbolsV1Marker = IslamicDateSymbolsV1Marker;
type DateLengthsV1Marker = IslamicDateLengthsV1Marker;
fn is_identifier_allowed_for_calendar(value: &Value) -> bool {
is_islamic_subcal(value, tinystr!(8, "umalqura"))
}
}

impl CldrCalendar for Japanese {
Expand Down
9 changes: 9 additions & 0 deletions components/datetime/src/provider/date_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,22 +280,31 @@ where
#[cfg(feature = "experimental")]
fn skeleton_data_payload(&self) -> Result<DataPayload<DateSkeletonPatternsV1Marker>> {
use icu_locid::extensions::unicode::{key, value};
use tinystr::tinystr;
let mut locale = self.locale.clone();
#[allow(clippy::expect_used)] // experimental
let cal_val = self.cal_val.expect("should be present for components bag");
// Skeleton data for ethioaa is stored under ethiopic
if cal_val == &value!("ethioaa") {
locale.set_unicode_ext(key!("ca"), value!("ethiopic"));
} else if cal_val == &value!("islamic")
|| cal_val == &value!("islamicc")
|| cal_val.as_tinystr_slice().get(0) == Some(&tinystr!(8, "islamic"))
{
// All islamic calendars store skeleton data under islamic, not their individual extension keys
locale.set_unicode_ext(key!("ca"), value!("islamic"));
} else {
locale.set_unicode_ext(key!("ca"), cal_val.clone());
};

let data = self
.data_provider
.load(DataRequest {
locale: &locale,
metadata: Default::default(),
})?
.take_payload()?;

Ok(data)
}
}
Expand Down
12 changes: 6 additions & 6 deletions components/datetime/tests/fixtures/tests/components.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@
"en-u-ca-indian": "Tuesday, Magha 01, 1941 Saka, 08:25:07",
"en-u-ca-islamic": "Tuesday, Jumada I 25, 1441 AH, 08:25:07",
"fr-u-ca-islamic": "mardi 25 joumada al oula 1441 ère de l’Hégire 08:25:07",
"en-u-ca-islamicc": "Tuesday, Jumada I 25, 1441 AH, 08:25:07",
"fr-u-ca-islamicc": "mardi 25 joumada al oula 1441 ère de l’Hégire 08:25:07",
"en-u-ca-umalqura": "Tuesday, Jumada I 26, 1441 AH, 08:25:07",
"fr-u-ca-umalqura": "mardi 26 joumada al oula 1441 ère de l’Hégire 08:25:07",
"en-u-ca-tbla": "Tuesday, Jumada I 26, 1441 AH, 08:25:07",
"fr-u-ca-tbla": "mardi 26 joumada al oula 1441 ère de l’Hégire 08:25:07",
"en-u-ca-islamic-civil": "Tuesday, Jumada I 25, 1441 AH, 08:25:07",
"fr-u-ca-islamic-civil": "mardi 25 joumada al oula 1441 ère de l’Hégire 08:25:07",
"en-u-ca-islamic-umalqura": "Tuesday, Jumada I 26, 1441 AH, 08:25:07",
"fr-u-ca-islamic-umalqura": "mardi 26 joumada al oula 1441 ère de l’Hégire 08:25:07",
"en-u-ca-islamic-tbla": "Tuesday, Jumada I 26, 1441 AH, 08:25:07",
"fr-u-ca-islamic-tbla": "mardi 26 joumada al oula 1441 ère de l’Hégire 08:25:07",
"en-u-ca-persian": "Tuesday, Bahman 01, 1398 AP, 08:25:07",
"fr-u-ca-persian": "mardi 01 bahman 1398 Anno Persico 08:25:07",
"en-u-ca-hebrew": "Tuesday, 24 Tevet 5780 AM, 08:25:07",
Expand Down

0 comments on commit 6c7019c

Please sign in to comment.