Skip to content

Commit

Permalink
Deduplicate tz locations against root (#5759)
Browse files Browse the repository at this point in the history
A lot of location names are the same in many languages. 

For exemplar cities we have root data, however for territories we don't.
For territories I'm resolving the `und` name by using a Latin-script
endonym (i.e. for region `XY`, resolve `und-Latn-XY`), and if that
fails, just use English (not perfect, but it's better than the raw
region code for both display purposes and deduplication).
  • Loading branch information
robertbastian authored Nov 3, 2024
1 parent ed143c9 commit 983ce33
Show file tree
Hide file tree
Showing 18 changed files with 681 additions and 3,019 deletions.
12 changes: 12 additions & 0 deletions components/datetime/src/format/neo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,7 @@ impl From<RawDateTimeNames<DateMarker>> for RawDateTimeNames<DateTimeMarker> {
weekday_names: other.weekday_names,
dayperiod_names: DateTimeNamesData2::none(),
zone_essentials: (),
locations_root: (),
locations: (),
mz_generic_long: (),
mz_generic_short: (),
Expand All @@ -517,6 +518,7 @@ impl From<RawDateTimeNames<TimeMarker>> for RawDateTimeNames<DateTimeMarker> {
weekday_names: DateTimeNamesData2::none(),
dayperiod_names: other.dayperiod_names,
zone_essentials: (),
locations_root: (),
locations: (),
mz_generic_long: (),
mz_generic_short: (),
Expand Down Expand Up @@ -545,6 +547,8 @@ pub(crate) struct RawDateTimeNames<R: DateTimeNamesMarker> {
>,
zone_essentials:
<R::ZoneEssentials as DateTimeNamesHolderTrait<tz::EssentialsV1Marker>>::Container<()>,
locations_root:
<R::ZoneLocations as DateTimeNamesHolderTrait<tz::LocationsV1Marker>>::Container<()>,
locations: <R::ZoneLocations as DateTimeNamesHolderTrait<tz::LocationsV1Marker>>::Container<()>,
mz_generic_long:
<R::ZoneGenericLong as DateTimeNamesHolderTrait<tz::MzGenericLongV1Marker>>::Container<()>,
Expand Down Expand Up @@ -589,6 +593,7 @@ pub(crate) struct RawDateTimeNamesBorrowed<'l> {
weekday_names: OptionalNames<(fields::Weekday, FieldLength), &'l LinearNamesV1<'l>>,
dayperiod_names: OptionalNames<FieldLength, &'l LinearNamesV1<'l>>,
zone_essentials: OptionalNames<(), &'l tz::EssentialsV1<'l>>,
locations_root: OptionalNames<(), &'l tz::LocationsV1<'l>>,
locations: OptionalNames<(), &'l tz::LocationsV1<'l>>,
mz_generic_long: OptionalNames<(), &'l tz::MzGenericV1<'l>>,
mz_generic_short: OptionalNames<(), &'l tz::MzGenericV1<'l>>,
Expand Down Expand Up @@ -1659,6 +1664,7 @@ impl<R: DateTimeNamesMarker> RawDateTimeNames<R> {
weekday_names: <R::WeekdayNames as DateTimeNamesHolderTrait<WeekdayNamesV1Marker>>::Container::<_>::new_empty(),
dayperiod_names: <R::DayPeriodNames as DateTimeNamesHolderTrait<DayPeriodNamesV1Marker>>::Container::<_>::new_empty(),
zone_essentials: <R::ZoneEssentials as DateTimeNamesHolderTrait<tz::EssentialsV1Marker>>::Container::<_>::new_empty(),
locations_root: <R::ZoneLocations as DateTimeNamesHolderTrait<tz::LocationsV1Marker>>::Container::<_>::new_empty(),
locations: <R::ZoneLocations as DateTimeNamesHolderTrait<tz::LocationsV1Marker>>::Container::<_>::new_empty(),
mz_generic_long: <R::ZoneGenericLong as DateTimeNamesHolderTrait<tz::MzGenericLongV1Marker>>::Container::<_>::new_empty(),
mz_generic_short: <R::ZoneGenericShort as DateTimeNamesHolderTrait<tz::MzGenericShortV1Marker>>::Container::<_>::new_empty(),
Expand All @@ -1677,6 +1683,7 @@ impl<R: DateTimeNamesMarker> RawDateTimeNames<R> {
weekday_names: self.weekday_names.get().inner,
dayperiod_names: self.dayperiod_names.get().inner,
zone_essentials: self.zone_essentials.get().inner,
locations_root: self.locations_root.get().inner,
locations: self.locations.get().inner,
mz_generic_long: self.mz_generic_long.get().inner,
mz_generic_short: self.mz_generic_short.get().inner,
Expand Down Expand Up @@ -1896,6 +1903,10 @@ impl<R: DateTimeNamesMarker> RawDateTimeNames<R> {
id: DataIdentifierBorrowed::for_locale(locale),
..Default::default()
};
self.locations_root
.load_put(provider, Default::default(), variables)
.map_err(|e| MaybePayloadError2::into_single_load_error(e, field))?
.map_err(SingleLoadError::Data)?;
self.locations
.load_put(provider, req, variables)
.map_err(|e| MaybePayloadError2::into_single_load_error(e, field))?
Expand Down Expand Up @@ -2626,6 +2637,7 @@ impl<'data> RawDateTimeNamesBorrowed<'data> {
pub(crate) fn get_payloads(&self) -> crate::time_zone::TimeZoneDataPayloadsBorrowed<'data> {
TimeZoneDataPayloadsBorrowed {
essentials: self.zone_essentials.get_option(),
locations_root: self.locations_root.get_option(),
locations: self.locations.get_option(),
mz_generic_long: self.mz_generic_long.get_option(),
mz_generic_short: self.mz_generic_short.get_option(),
Expand Down
32 changes: 28 additions & 4 deletions components/datetime/src/time_zone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,9 @@ impl ResolvedNeoTimeZoneSkeleton {
pub(crate) struct TimeZoneDataPayloadsBorrowed<'a> {
/// The data that contains meta information about how to display content.
pub(crate) essentials: Option<&'a provider::time_zones::TimeZoneEssentialsV1<'a>>,
/// The location names, e.g. Germany Time
/// The root location names, e.g. Toronto
pub(crate) locations_root: Option<&'a provider::time_zones::LocationsV1<'a>>,
/// The language specific location names, e.g. Italy
pub(crate) locations: Option<&'a provider::time_zones::LocationsV1<'a>>,
/// The generic long metazone names, e.g. Pacific Time
pub(crate) mz_generic_long: Option<&'a provider::time_zones::MetazoneGenericNamesV1<'a>>,
Expand Down Expand Up @@ -564,7 +566,15 @@ impl FormatTimeZone for GenericLocationFormat {
return Ok(Err(FormatTimeZoneError::MissingZoneSymbols));
};

let Some(location) = locations.locations.get(&time_zone_id) else {
let Some(locations_root) = data_payloads.locations_root else {
return Ok(Err(FormatTimeZoneError::MissingZoneSymbols));
};

let Some(location) = locations
.locations
.get(&time_zone_id)
.or_else(|| locations_root.locations.get(&time_zone_id))
else {
return Ok(Err(FormatTimeZoneError::Fallback));
};

Expand Down Expand Up @@ -600,8 +610,15 @@ impl FormatTimeZone for SpecificLocationFormat {
let Some(locations) = data_payloads.locations else {
return Ok(Err(FormatTimeZoneError::MissingZoneSymbols));
};
let Some(locations_root) = data_payloads.locations_root else {
return Ok(Err(FormatTimeZoneError::MissingZoneSymbols));
};

let Some(location) = locations.locations.get(&time_zone_id) else {
let Some(location) = locations
.locations
.get(&time_zone_id)
.or_else(|| locations_root.locations.get(&time_zone_id))
else {
return Ok(Err(FormatTimeZoneError::Fallback));
};

Expand Down Expand Up @@ -642,6 +659,9 @@ impl FormatTimeZone for GenericPartialLocationFormat {
let Some(locations) = data_payloads.locations else {
return Ok(Err(FormatTimeZoneError::MissingZoneSymbols));
};
let Some(locations_root) = data_payloads.locations_root else {
return Ok(Err(FormatTimeZoneError::MissingZoneSymbols));
};
let Some(non_locations) = (match self.0 {
FieldLength::Wide => data_payloads.mz_generic_long.as_ref(),
_ => data_payloads.mz_generic_short.as_ref(),
Expand All @@ -651,7 +671,11 @@ impl FormatTimeZone for GenericPartialLocationFormat {
let Some(metazone_period) = data_payloads.mz_periods else {
return Ok(Err(FormatTimeZoneError::MissingZoneSymbols));
};
let Some(location) = locations.locations.get(&time_zone_id) else {
let Some(location) = locations
.locations
.get(&time_zone_id)
.or_else(|| locations_root.locations.get(&time_zone_id))
else {
return Ok(Err(FormatTimeZoneError::Fallback));
};
let Some(non_location) = non_locations.overrides.get(&time_zone_id).or_else(|| {
Expand Down
300 changes: 150 additions & 150 deletions provider/data/datetime/data/locations_v1_marker.rs.data

Large diffs are not rendered by default.

Loading

0 comments on commit 983ce33

Please sign in to comment.