diff --git a/.github/workflows/artifacts-build.yml b/.github/workflows/artifacts-build.yml index 90bed0c42de..5155fb0f729 100644 --- a/.github/workflows/artifacts-build.yml +++ b/.github/workflows/artifacts-build.yml @@ -159,6 +159,7 @@ jobs: run: | cd ffi/diplomat/js/examples/node make + cd ../wasm-demo npm ci - name: Run Webpack @@ -544,9 +545,13 @@ jobs: run: | mkdir -p benchmarks/datasize - - name: Measure size of selected data package provider/datagen/tests/data/testdata.postcard + - name: Generate testdata run: | - cargo run --package icu_benchmark_binsize -- provider/datagen/tests/data/testdata.postcard file | tee benchmarks/datasize/output.txt + cargo run --bin make-testdata-legacy + + - name: Measure size of selected data package provider/testdata/data/testdata.postcard + run: | + cargo run --package icu_benchmark_binsize -- provider/testdata/data/testdata.postcard file | tee benchmarks/datasize/output.txt - name: Download previous benchmark data run: | diff --git a/Cargo.lock b/Cargo.lock index 4a7b51c1a68..f75787ce723 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1078,6 +1078,12 @@ version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "half" version = "1.8.2" @@ -1453,6 +1459,7 @@ dependencies = [ "displaydoc", "elsa", "eyre", + "glob", "icu", "icu_calendar", "icu_casemap", @@ -1467,7 +1474,6 @@ dependencies = [ "icu_locid", "icu_locid_transform", "icu_normalizer", - "icu_personnames", "icu_plurals", "icu_properties", "icu_provider", @@ -1716,19 +1722,6 @@ dependencies = [ "writeable", ] -[[package]] -name = "icu_personnames" -version = "0.1.0" -dependencies = [ - "databake", - "icu_collections", - "icu_locid", - "icu_provider", - "litemap", - "serde", - "zerovec", -] - [[package]] name = "icu_plurals" version = "1.2.0" diff --git a/Cargo.toml b/Cargo.toml index d70a2dcbf0e..b51ad252f26 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -83,7 +83,8 @@ exclude = [ "ffi/gn", # Testdata will still be published in the 1.x stream, but is deprecated # and we don't use it anymore. As we don't want to keep the actual data - # in the repo it doesn't build without running `cargo make testdata` first. + # in the repo it doesn't build without running `cargo make testdata-legacy` + # first. "provider/testdata", # Tutorials are tested in their own cargo workspace against released and # local crates diff --git a/Makefile.toml b/Makefile.toml index fc9aa9b251d..0fae50066c1 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -54,7 +54,7 @@ args = ["check", "--all-targets", "--all-features"] description = "Check ICU4X build with no features (covered in CI via cargo make check-all-features)" category = "ICU4X Development" command = "cargo" -args = ["check", "--all-targets", "--no-default-features"] +args = ["check", "--no-default-features"] [tasks.ci-job-msrv-check] description = "Run all tests for the CI 'check' job" diff --git a/components/calendar/src/astronomy.rs b/components/calendar/src/astronomy.rs index a52378d64b3..ac4f6859b45 100644 --- a/components/calendar/src/astronomy.rs +++ b/components/calendar/src/astronomy.rs @@ -12,7 +12,6 @@ use crate::iso::Iso; use crate::rata_die::RataDie; use crate::types::Moment; use crate::{Date, Gregorian}; -use alloc::vec; #[derive(Debug, Copy, Clone, Default)] /// A Location on the Earth given as a latitude, longitude, and elevation, @@ -20,11 +19,20 @@ use alloc::vec; /// longitude in degrees from -180 to 180, /// and elevation in meters. pub(crate) struct Location { - latitude: f64, // latitude from -90 to 90 - longitude: f64, // longitude from -180 to 180 - elevation: f64, // elevation in meters - zone: f64, // UTC timezone offset + pub(crate) latitude: f64, // latitude from -90 to 90 + pub(crate) longitude: f64, // longitude from -180 to 180 + pub(crate) elevation: f64, // elevation in meters + pub(crate) zone: f64, // UTC timezone offset } + +#[allow(dead_code)] +pub(crate) const MECCA: Location = Location { + latitude: 6427.0 / 300.0, + longitude: 11947.0 / 300.0, + elevation: 298.0, + zone: (1_f64 / 8_f64), +}; + #[allow(clippy::excessive_precision)] pub(crate) const PI: f64 = 3.14159265358979323846264338327950288_f64; @@ -47,7 +55,7 @@ pub(crate) const MAX_UTC_OFFSET: f64 = 14.0 / 24.0; impl Location { /// Create a location; latitude is from -90 to 90, and longitude is from -180 to 180; /// attempting to create a location outside of these bounds will result in a LocationError. - #[allow(dead_code)] // TODO: Remove dead_code tag after use + #[allow(dead_code)] pub(crate) fn try_new( latitude: f64, longitude: f64, @@ -83,19 +91,19 @@ impl Location { } /// Get the longitude of a Location - #[allow(dead_code)] // TODO: Remove dead_code tag after use + #[allow(dead_code)] pub(crate) fn longitude(&self) -> f64 { self.longitude } /// Get the latitude of a Location - #[allow(dead_code)] // TODO: Remove dead_code tag after use + #[allow(dead_code)] pub(crate) fn latitude(&self) -> f64 { self.latitude } /// Get the elevation of a Location - #[allow(dead_code)] // TODO: Remove dead_code tag after use + #[allow(dead_code)] pub(crate) fn elevation(&self) -> f64 { self.elevation } @@ -110,12 +118,11 @@ impl Location { /// this yields the difference in Moment given a longitude /// e.g. a longitude of 90 degrees is 0.25 (90 / 360) days ahead /// of a location with a longitude of 0 degrees. - #[allow(dead_code)] // TODO: Remove dead_code tag after use pub(crate) fn zone_from_longitude(longitude: f64) -> f64 { longitude / (360.0) } + // Convert standard time to local mean time given a location and a time zone with given offset - #[allow(dead_code)] pub(crate) fn standard_from_local(standard_time: Moment, location: Location) -> Moment { Self::standard_from_universal( Self::universal_from_local(standard_time, location), @@ -129,7 +136,7 @@ impl Location { } /// Convert from universal time to local time given a location - #[allow(dead_code)] // TODO: Remove dead_code tag after use + #[allow(dead_code)] pub(crate) fn local_from_universal(universal_time: Moment, location: Location) -> Moment { universal_time + Self::zone_from_longitude(location.longitude) } @@ -146,7 +153,6 @@ impl Location { /// return the Moment in standard time from the time zone with the given offset. /// The field utc_offset should be within the range of possible offsets given by /// the constand fields `MIN_UTC_OFFSET` and `MAX_UTC_OFFSET`. - #[allow(dead_code)] pub(crate) fn standard_from_universal(standard_time: Moment, location: Location) -> Moment { debug_assert!(location.zone > MIN_UTC_OFFSET && location.zone < MAX_UTC_OFFSET, "UTC offset {0} was not within the possible range of offsets (see astronomy::MIN_UTC_OFFSET and astronomy::MAX_UTC_OFFSET)", location.zone); standard_time + location.zone @@ -260,44 +266,43 @@ impl Astronomical { pub(crate) fn equation_of_time(moment: Moment) -> f64 { let c = Self::julian_centuries(moment); - let lambda = poly(c, vec![280.46645, 36000.76983, 0.0003032]); - let anomaly = poly(c, vec![357.52910, 35999.05030, -0.0001559, -0.00000048]); - let eccentricity = poly(c, vec![0.016708617, -0.000042037, -0.0000001236]); + let lambda = poly(c, &[280.46645, 36000.76983, 0.0003032]); + let anomaly = poly(c, &[357.52910, 35999.05030, -0.0001559, -0.00000048]); + let eccentricity = poly(c, &[0.016708617, -0.000042037, -0.0000001236]); let varepsilon = Self::obliquity(moment); let y = libm::pow(tan_degrees(varepsilon / 2.0), 2.0); - let equation = 1.0 / (2.0 * PI) - * (y * sin_degrees(2.0 * lambda) - 2.0 * eccentricity * sin_degrees(anomaly) - + 4.0 * eccentricity * y * sin_degrees(anomaly) * cos_degrees(2.0 * lambda) - - 0.5 * libm::pow(y, 2.0) * sin_degrees(4.0 * lambda) - - 1.25 * libm::pow(eccentricity, 2.0) * sin_degrees(2.0 * anomaly)); + let equation = (y * sin_degrees(2.0 * lambda) - 2.0 * eccentricity * sin_degrees(anomaly) + + 4.0 * eccentricity * y * sin_degrees(anomaly) * cos_degrees(2.0 * lambda) + - 0.5 * libm::pow(y, 2.0) * sin_degrees(4.0 * lambda) + - 1.25 * libm::pow(eccentricity, 2.0) * sin_degrees(2.0 * anomaly)) + / (2.0 * PI); signum(equation) * libm::fabs(equation).min(12.0 / 24.0) } - #[allow(dead_code, clippy::unwrap_used)] + #[allow(clippy::unwrap_used)] pub(crate) fn dusk(date: f64, location: Location, alpha: f64) -> Option { let evening = false; - let result = - Self::moment_of_depression(Moment::new(date + (18.0 / 24.0)), location, alpha, evening); - - result?; - Some(Location::standard_from_local(result.unwrap(), location)) + let moment_of_depression = Self::moment_of_depression( + Moment::new(date + (18.0 / 24.0)), + location, + alpha, + evening, + )?; + Some(Location::standard_from_local( + moment_of_depression, + location, + )) } // Calculates the obliquity of the ecliptic at a given moment pub fn obliquity(moment: Moment) -> f64 { let c = Self::julian_centuries(moment); let angle = 23.0 + 26.0 / 60.0 + 21.448 / 3600.0; - let list = [0.0, -46.8150 / 3600.0, -0.00059 / 3600.0, 0.001813 / 3600.0]; - - let mut result = angle; - let mut c_power = c; - for coef in list.into_iter().skip(1) { - result += coef * c_power; - c_power *= c; - } - result + let coefs = &[0.0, -46.8150 / 3600.0, -0.00059 / 3600.0, 0.001813 / 3600.0]; + angle + poly(c, coefs) } + // Calculates declination at a given Moment of UTC time for the latitude and longitude of an object lambda pub(crate) fn declination(moment: Moment, beta: f64, lambda: f64) -> f64 { let varepsilon = Self::obliquity(moment); @@ -306,6 +311,7 @@ impl Astronomical { + cos_degrees(beta) * sin_degrees(varepsilon) * sin_degrees(lambda), ) } + pub(crate) fn right_ascension( moment: Moment, beta: f64, @@ -329,8 +335,7 @@ impl Astronomical { alpha: f64, early: bool, ) -> Option { - let mut t = Self::sine_offset(moment, location, alpha); - let date = moment.as_rata_die().to_i64_date() as f64; + let date = libm::floor(moment.as_rata_die().to_f64_date()); let alt = if alpha >= 0.0 { if early { date @@ -341,11 +346,10 @@ impl Astronomical { date + 12.0 / 24.0 }; - let value = if libm::fabs(t) > 1.0 { - t = Self::sine_offset(Moment::new(alt), location, alpha); - t + let value = if libm::fabs(Self::sine_offset(moment, location, alpha)) > 1.0 { + Self::sine_offset(Moment::new(alt), location, alpha) } else { - t + Self::sine_offset(moment, location, alpha) }; if libm::fabs(value) <= 1.0 { @@ -367,6 +371,7 @@ impl Astronomical { None } } + #[allow(clippy::unwrap_used)] pub(crate) fn moment_of_depression( approx: Moment, @@ -374,14 +379,14 @@ impl Astronomical { alpha: f64, early: bool, ) -> Option { - let moment = Self::approx_moment_of_depression(approx, location, alpha, early); - moment?; - if libm::fabs(approx - moment.unwrap()) < 30.0 { - Some(moment.unwrap()) + let moment = Self::approx_moment_of_depression(approx, location, alpha, early)?; + if libm::fabs(approx - moment) < 30.0 { + Some(moment) } else { - Self::moment_of_depression(moment.unwrap(), location, alpha, early) + Self::moment_of_depression(moment, location, alpha, early) } } + // Refraction angle at given moment in given location pub(crate) fn refraction(location: Location) -> f64 { // The moment is not used. @@ -488,10 +493,9 @@ impl Astronomical { Self::universal_from_dynamical(approx + correction + extra + additional) } - #[allow(dead_code)] pub(crate) fn sidereal_from_moment(moment: Moment) -> f64 { let c = (moment - J2000) / 36525.0; - let coefficients = vec![ + let coefficients = &[ (280.46061837), (36525.0 * 360.98564736629), (0.000387933), @@ -506,7 +510,6 @@ impl Astronomical { /// Latitude of the moon (in degrees) at a given moment /// /// Reference code: https://github.com/EdReingold/calendar-code2/blob/main/calendar.l#L4466 - #[allow(dead_code)] // TODO: Remove dead_code tag after use pub(crate) fn lunar_latitude(moment: Moment) -> f64 { let c = Self::julian_centuries(moment); let l = Self::mean_lunar_longitude(c); @@ -553,17 +556,19 @@ impl Astronomical { ]; let mut correction = 0.0; - let len = sine_coeff.len(); - #[allow(clippy::indexing_slicing)] - for i in 0..len { - let v = sine_coeff[i]; - let w = args_lunar_elongation[i]; - let x = args_solar_anomaly[i]; - let y = args_lunar_anomaly[i]; - let z = args_moon_node[i]; - + for (v, w, x, y, z) in sine_coeff + .iter() + .zip( + args_lunar_elongation.iter().zip( + args_solar_anomaly + .iter() + .zip(args_lunar_anomaly.iter().zip(args_moon_node.iter())), + ), + ) + .map(|(v, (w, (x, (y, z))))| (v, w, x, y, z)) + { correction += - v * libm::pow(e, libm::fabs(x)) * sin_degrees(w * d + x * ms + y * ml + z * f); + v * libm::pow(e, libm::fabs(*x)) * sin_degrees(w * d + x * ms + y * ml + z * f); } correction /= 1_000_000.0; @@ -661,6 +666,38 @@ impl Astronomical { div_rem_euclid_f64(n, 360.0).1 } + pub fn phasis_on_or_after(date: RataDie, location: Location) -> RataDie { + let moon = Self::lunar_phase_at_or_before(0.0, date.as_moment()); + let age = date - moon.as_rata_die(); + let tau = if age <= 4 || Self::visible_crescent((date - 1).as_moment(), location) { + moon + 29.0 // Next new moon + } else { + date.as_moment() + }; + next_moment(tau, location, Self::visible_crescent) + } + + pub fn phasis_on_or_before(date: RataDie, location: Location) -> RataDie { + let moon = Self::lunar_phase_at_or_before(0.0, date.as_moment()); + let age = date - moon.as_rata_die(); + let tau = if age <= 3 && !Self::visible_crescent((date).as_moment(), location) { + moon - 30.0 // Next new moon + } else { + moon + }; + next_moment(tau, location, Self::visible_crescent) + } + + #[allow(clippy::unwrap_used)] + pub fn month_length(date: RataDie, location: Location) -> u8 { + let moon = Self::phasis_on_or_after(date + 1, location); + let prev = Self::phasis_on_or_before(date, location); + + debug_assert!(moon > prev); + debug_assert!(moon - prev < u8::MAX.into()); + (moon - prev) as u8 + } + // Lunar elongation (the moon's angular distance east of the Sun) at a given Moment in Julian centuries // // Reference code: https://github.com/EdReingold/calendar-code2/blob/main/calendar.l#L4160-L4170 @@ -673,11 +710,12 @@ impl Astronomical { ) .1 } + /// Altitude of the moon (in degrees) at a given moment /// /// Lisp code reference: https://github.com/EdReingold/calendar-code2/blob/main/calendar.l#L4537 - #[allow(dead_code, clippy::unwrap_used)] + #[allow(clippy::unwrap_used)] pub(crate) fn lunar_altitude(moment: Moment, location: Location) -> f64 { let phi = location.latitude; let psi = location.longitude; @@ -696,6 +734,7 @@ impl Astronomical { mod3(altitude, -180.0, 180.0) } + #[allow(dead_code)] pub(crate) fn lunar_distance(moment: Moment) -> f64 { let c = Self::julian_centuries(moment); let cap_d = Self::lunar_elongation(c); @@ -813,6 +852,7 @@ impl Astronomical { /// Parallax of moon at tee at location. /// Adapted from "Astronomical Algorithms" by Jean Meeus, /// Willmann-Bell, 2nd edn., 1998. + #[allow(dead_code)] pub(crate) fn lunar_parallax(moment: Moment, location: Location) -> f64 { let geo = Self::lunar_altitude(moment, location); let cap_delta = Self::lunar_distance(moment); @@ -823,12 +863,14 @@ impl Astronomical { /// Topocentric altitude of moon at moment at location, /// as a small positive/negative angle in degrees. + #[allow(dead_code)] fn topocentric_lunar_altitude(moment: Moment, location: Location) -> f64 { Self::lunar_altitude(moment, location) - Self::lunar_parallax(moment, location) } /// Observed altitude of upper limb of moon at moment at location, /// as a small positive/negative angle in degrees. + #[allow(dead_code)] fn observed_lunar_altitude(moment: Moment, location: Location) -> f64 { let r = Self::topocentric_lunar_altitude(moment, location); let y = Self::refraction(location); @@ -906,6 +948,7 @@ impl Astronomical { Location::standard_from_universal(set, location).inner(), date.inner(), )); + debug_assert!(std >= date, "std should not be less than date"); if std < date { return None; } @@ -916,25 +959,18 @@ impl Astronomical { } #[allow(dead_code)] - fn sunset(date: Moment, location: Location) -> Option { + pub(crate) fn sunset(date: Moment, location: Location) -> Option { let alpha = Self::refraction(location) + (16.0 / 60.0); Self::dusk(date.inner(), location, alpha) } #[allow(dead_code, clippy::unwrap_used, clippy::eq_op)] - fn moonlag(date: Moment, location: Location) -> Option { - let sun = Self::sunset(date, location); - let moon = Self::moonset(date, location); - - // Sunset does not occur - sun?; - if moon.is_none() { - Some(24.0 / 24.0) // Arbitrary value as stated by lisp code - } else { - #[allow(clippy::unnecessary_unwrap)] - Some(moon.unwrap() - sun.unwrap()) - } + pub(crate) fn moonlag(date: Moment, location: Location) -> Option { + let sun = Self::sunset(date, location)?; + let moon = Self::moonset(date, location)?; + + Some(moon - sun) } // Longitudinal nutation (periodic variation in the inclination of the Earth's axis) at a given Moment @@ -971,10 +1007,10 @@ impl Astronomical { a } } - #[allow(dead_code)] - pub(crate) fn lunar_phase_at_or_before(phase: f64, moment: Moment) -> f64 { + + pub(crate) fn lunar_phase_at_or_before(phase: f64, moment: Moment) -> Moment { let tau = moment.inner() - - MEAN_SYNODIC_MONTH / (360.0 / phase) * ((Self::lunar_phase(moment) - phase) % 360.0); + - (MEAN_SYNODIC_MONTH / 360.0) * ((Self::lunar_phase(moment) - phase) % 360.0); let a = tau - 2.0; let b = if moment.inner() <= (tau + 2.0) { moment.inner() @@ -984,7 +1020,7 @@ impl Astronomical { let lunar_phase_f64 = |x: f64| -> f64 { Self::lunar_phase(Moment::new(x)) }; - invert_angular(lunar_phase_f64, phase, (a, b)) + Moment::new(invert_angular(lunar_phase_f64, phase, (a, b))) } /// The longitude of the Sun at a given Moment in degrees @@ -1067,6 +1103,50 @@ impl Astronomical { div_rem_euclid_f64(lambda + Self::aberration(c) + Self::nutation(moment), 360.0).1 } + // Best viewing time (UT) in evening. + fn simple_best_view(date: RataDie, location: Location) -> Moment { + let dark = Self::dusk(date.to_f64_date(), location, 4.5); + let best = dark.unwrap_or((date + 1).as_moment()); + + Location::universal_from_standard(best, location) + } + + // Angular separation of sun and moon at a specific moment + fn arc_of_light(moment: Moment) -> f64 { + arccos_degrees( + cos_degrees(Self::lunar_latitude(moment)) * cos_degrees(Self::lunar_phase(moment)), + ) + } + + fn shaukat_criterion(date: Moment, location: Location) -> bool { + let tee = Self::simple_best_view((date - 1.0).as_rata_die(), location); + let phase = Self::lunar_phase(tee); + let h = Self::lunar_altitude(tee, location); + let cap_arcl = Self::arc_of_light(tee); + + let new = 0.0; + let first_quarter = 90.0; + let deg_10_6 = 10.6; + let deg_90 = 90.0; + let deg_4_1 = 4.1; + + if phase > new + && phase < first_quarter + && cap_arcl >= deg_10_6 + && cap_arcl <= deg_90 + && h > deg_4_1 + { + return true; + } + + false + } + + // Only for use in Islamic calendar + pub(crate) fn visible_crescent(date: Moment, location: Location) -> bool { + Self::shaukat_criterion(date, location) + } + // Given an angle and a Moment moment, approximate the Moment at or before moment // at which solar longitude exceeded the given angle. pub(crate) fn estimate_prior_solar_longitude(angle: f64, moment: Moment) -> Moment { @@ -1082,32 +1162,28 @@ impl Astronomical { } } - #[allow(dead_code)] // TODO: Remove dead_code tag after use - // This code differs from the lisp/book code by taking in a julian centuries value instead of - // a Moment; this is because aberration is only ever called in the fn solar_longitude, which - // already converts moment to julian centuries. Thus this function takes the julian centuries - // to avoid unnecessarily calculating the same value twice. + // This code differs from the lisp/book code by taking in a julian centuries value instead of + // a Moment; this is because aberration is only ever called in the fn solar_longitude, which + // already converts moment to julian centuries. Thus this function takes the julian centuries + // to avoid unnecessarily calculating the same value twice. fn aberration(c: f64) -> f64 { 0.0000974 * libm::cos((177.63 + 35999.01848 * c).to_radians()) - 0.005575 } /// Find the time of the new moon preceding a given Moment /// (the last new moon before moment) - #[allow(dead_code)] // TODO: Remove dead_code tag after use pub(crate) fn new_moon_before(moment: Moment) -> Moment { Self::nth_new_moon(Self::num_of_new_moon_at_or_after(moment) - 1) } /// Find the time of the new moon following a given Moment /// (the first new moon after moment) - #[allow(dead_code)] // TODO: Remove dead_code tag after use pub(crate) fn new_moon_at_or_after(moment: Moment) -> Moment { Self::nth_new_moon(Self::num_of_new_moon_at_or_after(moment)) } // Function to find the number of the new moon at or after a given moment; // helper function for new_moon_before and new_moon_at_or_after - #[allow(dead_code)] // TODO: Remove dead_code tag after use fn num_of_new_moon_at_or_after(moment: Moment) -> i32 { let t0: Moment = Self::nth_new_moon(0); let phi = Self::lunar_phase(moment); @@ -1127,7 +1203,7 @@ impl Astronomical { } result } - #[allow(dead_code)] + pub(crate) fn sine_offset(moment: Moment, location: Location, alpha: f64) -> f64 { let phi = location.latitude; let tee_prime = Location::universal_from_local(moment, location); @@ -1147,14 +1223,6 @@ mod tests { const TEST_LOWER_BOUND_FACTOR: f64 = 0.9999999; const TEST_UPPER_BOUND_FACTOR: f64 = 1.0000001; - // Location of mecca from the lisp code - const MECCA: Location = Location { - latitude: 6427.0 / 300.0, - longitude: 11947.0 / 300.0, - elevation: 298.0, - zone: (1_f64 / 8_f64), - }; - macro_rules! assert_eq_f64 { ($expected_value:expr, $value:expr, $moment:expr) => { if $expected_value > 0.0 { diff --git a/components/calendar/src/buddhist.rs b/components/calendar/src/buddhist.rs index a9af5679f15..a13c1b7ad76 100644 --- a/components/calendar/src/buddhist.rs +++ b/components/calendar/src/buddhist.rs @@ -75,7 +75,7 @@ impl Calendar for Buddhist { } let year = year - BUDDHIST_ERA_OFFSET; - ArithmeticDate::new_from_solar_codes(self, year, month_code, day).map(IsoDateInner) + ArithmeticDate::new_from_codes(self, year, month_code, day).map(IsoDateInner) } fn date_from_iso(&self, iso: Date) -> IsoDateInner { *iso.inner() @@ -220,3 +220,259 @@ fn iso_year_as_buddhist(year: i32) -> types::FormattableYear { related_iso: None, } } + +#[cfg(test)] +mod test { + use crate::rata_die::RataDie; + + use super::*; + + #[test] + fn test_buddhist_roundtrip_near_rd_zero() { + for i in -10000..=10000 { + let rd = RataDie::new(i); + let iso1 = Iso::iso_from_fixed(rd); + let buddhist = iso1.to_calendar(Buddhist); + let iso2 = buddhist.to_calendar(Iso); + let result = Iso::fixed_from_iso(iso2.inner); + assert_eq!(rd, result); + } + } + + #[test] + fn test_buddhist_roundtrip_near_epoch() { + // Buddhist epoch start RD: -198326 + for i in -208326..=-188326 { + let rd = RataDie::new(i); + let iso1 = Iso::iso_from_fixed(rd); + let buddhist = iso1.to_calendar(Buddhist); + let iso2 = buddhist.to_calendar(Iso); + let result = Iso::fixed_from_iso(iso2.inner); + assert_eq!(rd, result); + } + } + + #[test] + fn test_buddhist_directionality_near_rd_zero() { + for i in -100..=100 { + for j in -100..=100 { + let iso_i = Iso::iso_from_fixed(RataDie::new(i)); + let iso_j = Iso::iso_from_fixed(RataDie::new(j)); + + let buddhist_i = Date::new_from_iso(iso_i, Buddhist); + let buddhist_j = Date::new_from_iso(iso_j, Buddhist); + + assert_eq!( + i.cmp(&j), + iso_i.cmp(&iso_j), + "ISO directionality inconsistent with directionality for i: {i}, j: {j}" + ); + + assert_eq!( + i.cmp(&j), + buddhist_i.cmp(&buddhist_j), + "Buddhist directionality inconsistent with directionality for i: {i}, j: {j}" + ); + } + } + } + + #[test] + fn test_buddhist_directionality_near_epoch() { + // Buddhist epoch start RD: -198326 + for i in -198426..=-198226 { + for j in -198426..=-198226 { + let iso_i = Iso::iso_from_fixed(RataDie::new(i)); + let iso_j = Iso::iso_from_fixed(RataDie::new(j)); + + let buddhist_i = Date::new_from_iso(iso_i, Buddhist); + let buddhist_j = Date::new_from_iso(iso_j, Buddhist); + + assert_eq!( + i.cmp(&j), + iso_i.cmp(&iso_j), + "ISO directionality inconsistent with directionality for i: {i}, j: {j}" + ); + + assert_eq!( + i.cmp(&j), + buddhist_i.cmp(&buddhist_j), + "Buddhist directionality inconsistent with directionality for i: {i}, j: {j}" + ); + } + } + } + + #[derive(Debug)] + struct TestCase { + iso_year: i32, + iso_month: u8, + iso_day: u8, + buddhist_year: i32, + buddhist_month: u8, + buddhist_day: u8, + } + + fn check_test_case(case: TestCase) { + let iso_year = case.iso_year; + let iso_month = case.iso_month; + let iso_day = case.iso_day; + let buddhist_year = case.buddhist_year; + let buddhist_month = case.buddhist_month; + let buddhist_day = case.buddhist_day; + + let iso1 = Date::try_new_iso_date(iso_year, iso_month, iso_day).unwrap(); + let buddhist1 = iso1.to_calendar(Buddhist); + assert_eq!( + buddhist1.year().number, + buddhist_year, + "Iso -> Buddhist year check failed for case: {case:?}" + ); + assert_eq!( + buddhist1.month().ordinal, + buddhist_month as u32, + "Iso -> Buddhist month check failed for case: {case:?}" + ); + assert_eq!( + buddhist1.day_of_month().0, + buddhist_day as u32, + "Iso -> Buddhist day check failed for case: {case:?}" + ); + + let buddhist2 = + Date::try_new_buddhist_date(buddhist_year, buddhist_month, buddhist_day).unwrap(); + let iso2 = buddhist2.to_calendar(Iso); + assert_eq!( + iso2.year().number, + iso_year, + "Buddhist -> Iso year check failed for case: {case:?}" + ); + assert_eq!( + iso2.month().ordinal, + iso_month as u32, + "Buddhist -> Iso month check failed for case: {case:?}" + ); + assert_eq!( + iso2.day_of_month().0, + iso_day as u32, + "Buddhist -> Iso day check failed for case: {case:?}" + ); + } + + #[test] + fn test_buddhist_cases_near_rd_zero() { + let cases = [ + TestCase { + iso_year: -100, + iso_month: 2, + iso_day: 15, + buddhist_year: 443, + buddhist_month: 2, + buddhist_day: 15, + }, + TestCase { + iso_year: -3, + iso_month: 10, + iso_day: 29, + buddhist_year: 540, + buddhist_month: 10, + buddhist_day: 29, + }, + TestCase { + iso_year: 0, + iso_month: 12, + iso_day: 31, + buddhist_year: 543, + buddhist_month: 12, + buddhist_day: 31, + }, + TestCase { + iso_year: 1, + iso_month: 1, + iso_day: 1, + buddhist_year: 544, + buddhist_month: 1, + buddhist_day: 1, + }, + TestCase { + iso_year: 4, + iso_month: 2, + iso_day: 29, + buddhist_year: 547, + buddhist_month: 2, + buddhist_day: 29, + }, + ]; + + for case in cases { + check_test_case(case); + } + } + + #[test] + fn test_buddhist_cases_near_epoch() { + // 1 BE = 543 BCE = -542 ISO + let cases = [ + TestCase { + iso_year: -554, + iso_month: 12, + iso_day: 31, + buddhist_year: -11, + buddhist_month: 12, + buddhist_day: 31, + }, + TestCase { + iso_year: -553, + iso_month: 1, + iso_day: 1, + buddhist_year: -10, + buddhist_month: 1, + buddhist_day: 1, + }, + TestCase { + iso_year: -544, + iso_month: 8, + iso_day: 31, + buddhist_year: -1, + buddhist_month: 8, + buddhist_day: 31, + }, + TestCase { + iso_year: -543, + iso_month: 5, + iso_day: 12, + buddhist_year: 0, + buddhist_month: 5, + buddhist_day: 12, + }, + TestCase { + iso_year: -543, + iso_month: 12, + iso_day: 31, + buddhist_year: 0, + buddhist_month: 12, + buddhist_day: 31, + }, + TestCase { + iso_year: -542, + iso_month: 1, + iso_day: 1, + buddhist_year: 1, + buddhist_month: 1, + buddhist_day: 1, + }, + TestCase { + iso_year: -541, + iso_month: 7, + iso_day: 9, + buddhist_year: 2, + buddhist_month: 7, + buddhist_day: 9, + }, + ]; + + for case in cases { + check_test_case(case); + } + } +} diff --git a/components/calendar/src/calendar_arithmetic.rs b/components/calendar/src/calendar_arithmetic.rs index c977ec0576d..3e11deddfab 100644 --- a/components/calendar/src/calendar_arithmetic.rs +++ b/components/calendar/src/calendar_arithmetic.rs @@ -193,11 +193,12 @@ impl ArithmeticDate { /// The [`types::FormattableMonth`] for the current month (with month code) for a solar calendar /// Lunar calendars should not use this method and instead manually implement a month code /// resolver. + /// Originally "solar_month" but renamed because it can be used for some lunar calendars /// /// Returns "und" if run with months that are out of bounds for the current /// calendar. #[inline] - pub fn solar_month(&self) -> types::FormattableMonth { + pub fn month(&self) -> types::FormattableMonth { let code = match self.month { a if a > C::months_for_every_year(self.year) => tinystr!(4, "und"), 1 => tinystr!(4, "M01"), @@ -223,7 +224,8 @@ impl ArithmeticDate { /// Construct a new arithmetic date from a year, month code, and day, bounds checking /// the month and day - pub fn new_from_solar_codes( + /// Originally (new_from_solar_codes) but renamed because it works for some lunar calendars + pub fn new_from_codes( // Separate type since the debug_name() impl may differ when DateInner types // are nested (e.g. in GregorianDateInner) cal: &C2, @@ -231,7 +233,7 @@ impl ArithmeticDate { month_code: types::MonthCode, day: u8, ) -> Result { - let month = if let Some(ordinal) = ordinal_solar_month_from_code(month_code) { + let month = if let Some(ordinal) = ordinal_month_from_code(month_code) { ordinal } else { return Err(CalendarError::UnknownMonthCode( @@ -260,7 +262,8 @@ impl ArithmeticDate { /// Construct a new arithmetic date from a year, month ordinal, and day, bounds checking /// the month and day - pub fn new_from_solar_ordinals(year: i32, month: u8, day: u8) -> Result { + /// Originally (new_from_solar_ordinals) but renamed because it works for some lunar calendars + pub fn new_from_ordinals(year: i32, month: u8, day: u8) -> Result { let max_month = C::months_for_every_year(year); if month > max_month { return Err(CalendarError::Overflow { @@ -280,15 +283,15 @@ impl ArithmeticDate { Ok(Self::new_unchecked(year, month, day)) } - /// This fn currently just calls [`new_from_solar_ordinals`], but exists separately for + /// This fn currently just calls [`new_from_ordinals`], but exists separately for /// lunar calendars in case different logic needs to be implemented later. pub fn new_from_lunar_ordinals(year: i32, month: u8, day: u8) -> Result { - Self::new_from_solar_ordinals(year, month, day) + Self::new_from_ordinals(year, month, day) } } /// For solar calendars, get the month number from the month code -pub fn ordinal_solar_month_from_code(code: types::MonthCode) -> Option { +pub fn ordinal_month_from_code(code: types::MonthCode) -> Option { // Match statements on tinystrs are annoying so instead // we calculate it from the bytes directly if code.0.len() != 3 { diff --git a/components/calendar/src/coptic.rs b/components/calendar/src/coptic.rs index ef0c769f20a..2801b70639b 100644 --- a/components/calendar/src/coptic.rs +++ b/components/calendar/src/coptic.rs @@ -124,7 +124,7 @@ impl Calendar for Coptic { return Err(CalendarError::UnknownEra(era.0, self.debug_name())); }; - ArithmeticDate::new_from_solar_codes(self, year, month_code, day).map(CopticDateInner) + ArithmeticDate::new_from_codes(self, year, month_code, day).map(CopticDateInner) } fn date_from_iso(&self, iso: Date) -> CopticDateInner { let fixed_iso = Iso::fixed_from_iso(*iso.inner()); @@ -173,7 +173,7 @@ impl Calendar for Coptic { } fn month(&self, date: &Self::DateInner) -> types::FormattableMonth { - date.0.solar_month() + date.0.month() } fn day_of_month(&self, date: &Self::DateInner) -> types::DayOfMonth { @@ -266,7 +266,7 @@ impl Date { month: u8, day: u8, ) -> Result, CalendarError> { - ArithmeticDate::new_from_solar_ordinals(year, month, day) + ArithmeticDate::new_from_ordinals(year, month, day) .map(CopticDateInner) .map(|inner| Date::from_raw(inner, Coptic)) } diff --git a/components/calendar/src/error.rs b/components/calendar/src/error.rs index 5ae875e6314..6964883a445 100644 --- a/components/calendar/src/error.rs +++ b/components/calendar/src/error.rs @@ -60,6 +60,7 @@ pub enum CalendarError { /// A list of error outcomes for exceeding location bounds #[derive(Display, Debug, Copy, Clone, PartialEq)] +#[allow(dead_code)] // TODO: Delete after use pub enum LocationError { /// Latitude value was out of bounds #[displaydoc("Latitude {0} outside bounds of -90 to 90")] diff --git a/components/calendar/src/ethiopian.rs b/components/calendar/src/ethiopian.rs index bbf85edb668..fc778fc4942 100644 --- a/components/calendar/src/ethiopian.rs +++ b/components/calendar/src/ethiopian.rs @@ -146,7 +146,7 @@ impl Calendar for Ethiopian { return Err(CalendarError::UnknownEra(era.0, self.debug_name())); }; - ArithmeticDate::new_from_solar_codes(self, year, month_code, day).map(EthiopianDateInner) + ArithmeticDate::new_from_codes(self, year, month_code, day).map(EthiopianDateInner) } fn date_from_iso(&self, iso: Date) -> EthiopianDateInner { let fixed_iso = Iso::fixed_from_iso(*iso.inner()); @@ -195,7 +195,7 @@ impl Calendar for Ethiopian { } fn month(&self, date: &Self::DateInner) -> types::FormattableMonth { - date.0.solar_month() + date.0.month() } fn day_of_month(&self, date: &Self::DateInner) -> types::DayOfMonth { @@ -344,7 +344,7 @@ impl Date { if era_style == EthiopianEraStyle::AmeteAlem { year -= AMETE_ALEM_OFFSET; } - ArithmeticDate::new_from_solar_ordinals(year, month, day) + ArithmeticDate::new_from_ordinals(year, month, day) .map(EthiopianDateInner) .map(|inner| Date::from_raw(inner, Ethiopian::new_with_era_style(era_style))) } diff --git a/components/calendar/src/gregorian.rs b/components/calendar/src/gregorian.rs index 68880194f20..37cd37152e6 100644 --- a/components/calendar/src/gregorian.rs +++ b/components/calendar/src/gregorian.rs @@ -79,7 +79,7 @@ impl Calendar for Gregorian { return Err(CalendarError::UnknownEra(era.0, self.debug_name())); }; - ArithmeticDate::new_from_solar_codes(self, year, month_code, day) + ArithmeticDate::new_from_codes(self, year, month_code, day) .map(IsoDateInner) .map(GregorianDateInner) } diff --git a/components/calendar/src/helpers.rs b/components/calendar/src/helpers.rs index 7b1cfb19f36..11da1a2bede 100644 --- a/components/calendar/src/helpers.rs +++ b/components/calendar/src/helpers.rs @@ -2,8 +2,11 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::astronomy::PI; -use alloc::vec::Vec; +use crate::{ + astronomy::{Location, PI}, + rata_die::RataDie, + types::Moment, +}; /// Calculate `(n / d, n % d)` such that the remainder is always positive. /// @@ -86,16 +89,19 @@ pub const fn quotient64(n: i64, d: i64) -> i64 { a - 1 } } + // cosine of x in radians pub fn cos_degrees(x: f64) -> f64 { let radians = x.to_radians(); libm::cos(radians) } + // sine of x in radians pub fn sin_degrees(x: f64) -> f64 { let radians = x.to_radians(); libm::sin(radians) } + // tan of x in radians pub fn tan_degrees(x: f64) -> f64 { let radians = x.to_radians(); @@ -146,13 +152,15 @@ pub fn arctan_degrees(y: f64, x: f64) -> Result { Ok(mod_degrees(if x >= 0.0 { alpha } else { alpha + 180.0 })) } } + // TODO: convert recursive into iterative -pub fn poly(x: f64, coeffs: Vec) -> f64 { +pub fn poly(x: f64, coeffs: &[f64]) -> f64 { match coeffs.split_first() { - Some((first, rest)) => first + x * poly(x, rest.to_vec()), + Some((first, rest)) => first + x * poly(x, rest), None => 0.0, } } + // A generic function that finds a value within an interval // where a certain condition is satisfied. pub fn binary_search(mut l: f64, mut h: f64, test: F, end: G) -> f64 @@ -177,6 +185,7 @@ where } } } + // Returns a number that represents the sign of `self`. // - `1.0` if the number is positive, `+0.0` or `INFINITY` // - `-1.0` if the number is negative, `-0.0` or `NEG_INFINITY` @@ -202,6 +211,32 @@ pub fn invert_angular f64>(f: F, y: f64, r: (f64, f64)) -> f64 { |u, l| (u - l) < varepsilon, ) } + +// Used for Umm-Al-Qura calculations +pub(crate) fn next_moment(mut index: Moment, location: Location, condition: F) -> RataDie +where + F: Fn(Moment, Location) -> bool, +{ + loop { + if condition(index, location) { + return index.as_rata_die(); + } + index += 1.0; + } +} +#[allow(dead_code)] +pub(crate) fn next(mut index: RataDie, condition: F) -> RataDie +where + F: Fn(RataDie) -> bool, +{ + loop { + if condition(index) { + return index; + } + index += 1; + } +} + #[test] fn test_binary_search() { struct TestCase { diff --git a/components/calendar/src/indian.rs b/components/calendar/src/indian.rs index 40aa7c73a3b..1a9dd9e6c9e 100644 --- a/components/calendar/src/indian.rs +++ b/components/calendar/src/indian.rs @@ -113,7 +113,7 @@ impl Calendar for Indian { return Err(CalendarError::UnknownEra(era.0, self.debug_name())); } - ArithmeticDate::new_from_solar_codes(self, year, month_code, day).map(IndianDateInner) + ArithmeticDate::new_from_codes(self, year, month_code, day).map(IndianDateInner) } // @@ -193,7 +193,7 @@ impl Calendar for Indian { } fn month(&self, date: &Self::DateInner) -> types::FormattableMonth { - date.0.solar_month() + date.0.month() } fn day_of_month(&self, date: &Self::DateInner) -> types::DayOfMonth { @@ -264,7 +264,7 @@ impl Date { month: u8, day: u8, ) -> Result, CalendarError> { - ArithmeticDate::new_from_solar_ordinals(year, month, day) + ArithmeticDate::new_from_ordinals(year, month, day) .map(IndianDateInner) .map(|inner| Date::from_raw(inner, Indian)) } diff --git a/components/calendar/src/islamic.rs b/components/calendar/src/islamic.rs new file mode 100644 index 00000000000..4a2651b117c --- /dev/null +++ b/components/calendar/src/islamic.rs @@ -0,0 +1,531 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! This module contains types and implementations for the Islamic calendars. +//! +//! ```rust +//! use icu::calendar::{Date, DateTime}; +//! +//! // `Date` type +//! let islamic_date = Date::try_new_observational_islamic_date(1348, 10, 11) +//! .expect("Failed to initialize islamic Date instance."); +//! +//! // `DateTime` type +//! let islamic_datetime = DateTime::try_new_observational_islamic_datetime(1348, 10, 11, 13, 1, 0) +//! .expect("Failed to initialize islamic DateTime instance."); +//! +//! // `Date` checks +//! assert_eq!(islamic_date.year().number, 1348); +//! assert_eq!(islamic_date.month().ordinal, 10); +//! assert_eq!(islamic_date.day_of_month().0, 11); +//! +//! // `DateTime` checks +//! assert_eq!(islamic_datetime.date.year().number, 1348); +//! assert_eq!(islamic_datetime.date.month().ordinal, 10); +//! assert_eq!(islamic_datetime.date.day_of_month().0, 11); +//! assert_eq!(islamic_datetime.time.hour.number(), 13); +//! assert_eq!(islamic_datetime.time.minute.number(), 1); +//! assert_eq!(islamic_datetime.time.second.number(), 0); +//! ``` + +use crate::calendar_arithmetic::{ArithmeticDate, CalendarArithmetic}; +use crate::helpers::div_rem_euclid; +use crate::julian::Julian; +use crate::rata_die::RataDie; +use crate::{astronomy::*, Iso}; +use crate::{types, Calendar, CalendarError, Date, DateDuration, DateDurationUnit, DateTime}; +use ::tinystr::tinystr; + +#[derive(Copy, Clone, Debug, Default, Hash, Eq, PartialEq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_structs)] +/// Islamic Observational Calendar (Default) +pub struct IslamicObservational; +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_structs)] +/// Civil / Arithmetical Islamic Calendar (Used for administrative purposes) +pub struct IslamicCivil; +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_structs)] +/// Umm-al-Qura Hijri Calendar (Used in Saudi Arabia) +pub struct UmmalQura; +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)] +#[allow(clippy::exhaustive_structs)] +/// A Tabular version of the Arithmetical Islamic Calendar +pub struct IslamicTabular; + +// Lisp code reference: https://github.com/EdReingold/calendar-code2/blob/main/calendar.l#L2066 +const FIXED_ISLAMIC_EPOCH_FRIDAY: RataDie = Julian::fixed_from_julian_integers(622, 7, 16); +// const FIXED_ISLAMIC_EPOCH_THURSDAY: RataDie = Julian::fixed_from_julian_integers(622, 7, 15); + +// Lisp code reference: https://github.com/EdReingold/calendar-code2/blob/main/calendar.l#L6898 +const CAIRO: Location = Location { + latitude: 30.1, + longitude: 31.3, + elevation: 200.0, + zone: (1_f64 / 12_f64), +}; + +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)] +/// The inner date type used for representing [`Date`]s of [`IslamicObservational`]. See [`Date`] and [`IslamicObservational`] for more details. +pub struct IslamicDateInner(ArithmeticDate); + +// OBSERVATIONAL CALENDAR + +impl CalendarArithmetic for IslamicObservational { + fn month_days(year: i32, month: u8) -> u8 { + let midmonth = FIXED_ISLAMIC_EPOCH_FRIDAY.to_f64_date() + + (((year - 1) as f64) * 12.0 + month as f64 - 0.5) * MEAN_SYNODIC_MONTH; + + let f_date = Astronomical::phasis_on_or_before(RataDie::new(midmonth as i64), CAIRO); + + Astronomical::month_length(f_date, CAIRO) + } + + fn months_for_every_year(_year: i32) -> u8 { + 12 + } + + fn days_in_provided_year(_year: i32) -> u32 { + 355 + } + + // As an observational-lunar calendar, it does not have leap years. + fn is_leap_year(_year: i32) -> bool { + false + } + + fn last_month_day_in_year(year: i32) -> (u8, u8) { + let days = Self::month_days(year, 12); + + (12, days) + } +} + +impl Calendar for IslamicObservational { + type DateInner = IslamicDateInner; + fn date_from_codes( + &self, + era: types::Era, + year: i32, + month_code: types::MonthCode, + day: u8, + ) -> Result { + let year = if era.0 == tinystr!(16, "ah") { + year + } else { + return Err(CalendarError::UnknownEra(era.0, self.debug_name())); + }; + + ArithmeticDate::new_from_codes(self, year, month_code, day).map(IslamicDateInner) + } + + fn date_from_iso(&self, iso: Date) -> Self::DateInner { + let fixed_iso = Iso::fixed_from_iso(*iso.inner()); + Self::islamic_from_fixed(fixed_iso).inner + } + + fn date_to_iso(&self, date: &Self::DateInner) -> Date { + let fixed_islamic = Self::fixed_from_islamic(*date); + Iso::iso_from_fixed(fixed_islamic) + } + + fn months_in_year(&self, date: &Self::DateInner) -> u8 { + date.0.months_in_year() + } + + fn days_in_year(&self, date: &Self::DateInner) -> u32 { + date.0.days_in_year() + } + + fn days_in_month(&self, date: &Self::DateInner) -> u8 { + date.0.days_in_month() + } + + fn day_of_week(&self, date: &Self::DateInner) -> types::IsoWeekday { + Iso.day_of_week(self.date_to_iso(date).inner()) + } + + fn offset_date(&self, date: &mut Self::DateInner, offset: DateDuration) { + date.0.offset_date(offset) + } + + fn until( + &self, + date1: &Self::DateInner, + date2: &Self::DateInner, + _calendar2: &Self, + _largest_unit: DateDurationUnit, + _smallest_unit: DateDurationUnit, + ) -> DateDuration { + date1.0.until(date2.0, _largest_unit, _smallest_unit) + } + + fn debug_name(&self) -> &'static str { + "IslamicObservational" + } + + fn year(&self, date: &Self::DateInner) -> types::FormattableYear { + Self::year_as_islamic(date.0.year) + } + + fn month(&self, date: &Self::DateInner) -> types::FormattableMonth { + date.0.month() + } + + fn day_of_month(&self, date: &Self::DateInner) -> types::DayOfMonth { + date.0.day_of_month() + } + + fn day_of_year_info(&self, date: &Self::DateInner) -> types::DayOfYearInfo { + let prev_year = date.0.year.saturating_sub(1); + let next_year = date.0.year.saturating_add(1); + types::DayOfYearInfo { + day_of_year: date.0.day_of_year(), + days_in_year: date.0.days_in_year(), + prev_year: Self::year_as_islamic(prev_year), + days_in_prev_year: Self::days_in_provided_year(prev_year), + next_year: Self::year_as_islamic(next_year), + } + } + + // fn days_in_month(&self, date: &Self::DateInner) -> u8 { + // todo!() + // } + // TODO: ADD TO ANYCALENDAR + // fn any_calendar_kind(&self) -> Option { + // Some(AnyCalendarKind::IslamicObservational) + // } +} + +impl IslamicObservational { + /// Constructs a new Islamic Observational Calendar + pub fn new() -> Self { + Self + } + + // "Fixed" is a day count representation of calendars staring from Jan 1st of year 1 of the Georgian Calendar. + // The fixed date algorithms are from + // Dershowitz, Nachum, and Edward M. Reingold. _Calendrical calculations_. Cambridge University Press, 2008. + // + // Lisp code reference: https://github.com/EdReingold/calendar-code2/blob/main/calendar.l#L6904 + fn fixed_from_islamic(i_date: IslamicDateInner) -> RataDie { + let year = i64::from(i_date.0.year); + let month = i64::from(i_date.0.month); + let day = i64::from(i_date.0.day); + + let midmonth = FIXED_ISLAMIC_EPOCH_FRIDAY.to_f64_date() + + (((year - 1) as f64) * 12.0 + month as f64 - 0.5) * MEAN_SYNODIC_MONTH; + // Midmonth can be casted down because we just want a date between the 30 day interval, precision is not important. + Astronomical::phasis_on_or_before(RataDie::new(midmonth as i64), CAIRO) + day - 1 + } + + // Lisp code reference: https://github.com/EdReingold/calendar-code2/blob/main/calendar.l#L6920 + #[allow(clippy::unwrap_used)] + fn islamic_from_fixed(date: RataDie) -> Date { + let crescent = Astronomical::phasis_on_or_before(date, CAIRO); + let elapsed_months = + (libm::round((crescent - FIXED_ISLAMIC_EPOCH_FRIDAY) as f64 / MEAN_SYNODIC_MONTH)) + as i32; + let year = div_rem_euclid(elapsed_months, 12).0 + 1; + let month = div_rem_euclid(elapsed_months, 12).1 + 1; + let day = (date - crescent + 1) as u8; + + Date::try_new_observational_islamic_date(year, month as u8, day).unwrap() + } + + // pub(crate) fn fixed_from_islamic_integers(year: i32, month: u8, day: u8) -> Option { + // Date::try_new_observational_islamic_date(year, month, day) + // .ok() + // .map(|d| *d.inner()) + // .map(Self::fixed_from_islamic) + // } + + fn year_as_islamic(year: i32) -> types::FormattableYear { + types::FormattableYear { + era: types::Era(tinystr!(16, "ah")), + number: year, + cyclic: None, + related_iso: None, + } + } +} + +impl Date { + /// Construct new Islamic Observational Date. + /// + /// Has no negative years, only era is the AH. + /// + /// ```rust + /// use icu::calendar::Date; + /// + /// let date_islamic = Date::try_new_observational_islamic_date(1392, 4, 25) + /// .expect("Failed to initialize Islamic Date instance."); + /// + /// assert_eq!(date_islamic.year().number, 1392); + /// assert_eq!(date_islamic.month().ordinal, 4); + /// assert_eq!(date_islamic.day_of_month().0, 25); + /// ``` + pub fn try_new_observational_islamic_date( + year: i32, + month: u8, + day: u8, + ) -> Result, CalendarError> { + ArithmeticDate::new_from_lunar_ordinals(year, month, day) + .map(IslamicDateInner) + .map(|inner| Date::from_raw(inner, IslamicObservational)) + } +} + +impl DateTime { + /// Construct a new Islamic Observational datetime from integers. + /// + /// ```rust + /// use icu::calendar::DateTime; + /// + /// let datetime_islamic = DateTime::try_new_observational_islamic_datetime(474, 10, 11, 13, 1, 0) + /// .expect("Failed to initialize Islamic DateTime instance."); + /// + /// assert_eq!(datetime_islamic.date.year().number, 474); + /// assert_eq!(datetime_islamic.date.month().ordinal, 10); + /// assert_eq!(datetime_islamic.date.day_of_month().0, 11); + /// assert_eq!(datetime_islamic.time.hour.number(), 13); + /// assert_eq!(datetime_islamic.time.minute.number(), 1); + /// assert_eq!(datetime_islamic.time.second.number(), 0); + /// ``` + pub fn try_new_observational_islamic_datetime( + year: i32, + month: u8, + day: u8, + hour: u8, + minute: u8, + second: u8, + ) -> Result, CalendarError> { + Ok(DateTime { + date: Date::try_new_observational_islamic_date(year, month, day)?, + time: types::Time::try_new(hour, minute, second, 0)?, + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[derive(Debug)] + struct DateCase { + year: i32, + month: u8, + day: u8, + } + + static TEST_FIXED_DATE: [i64; 33] = [ + -214193, -61387, 25469, 49217, 171307, 210155, 253427, 369740, 400085, 434355, 452605, + 470160, 473837, 507850, 524156, 544676, 567118, 569477, 601716, 613424, 626596, 645554, + 664224, 671401, 694799, 704424, 708842, 709409, 709580, 727274, 728714, 744313, 764652, + ]; + + static ASTRONOMICAL_CASES: [DateCase; 33] = [ + DateCase { + year: -1245, + month: 12, + day: 11, + }, + DateCase { + year: -813, + month: 2, + day: 25, + }, + DateCase { + year: -568, + month: 4, + day: 2, + }, + DateCase { + year: -501, + month: 4, + day: 7, + }, + DateCase { + year: -157, + month: 10, + day: 18, + }, + DateCase { + year: -47, + month: 6, + day: 3, + }, + DateCase { + year: 75, + month: 7, + day: 13, + }, + DateCase { + year: 403, + month: 10, + day: 5, + }, + DateCase { + year: 489, + month: 5, + day: 22, + }, + DateCase { + year: 586, + month: 2, + day: 7, + }, + DateCase { + year: 637, + month: 8, + day: 7, + }, + DateCase { + year: 687, + month: 2, + day: 21, + }, + DateCase { + year: 697, + month: 7, + day: 7, + }, + DateCase { + year: 793, + month: 6, + day: 30, + }, + DateCase { + year: 839, + month: 7, + day: 6, + }, + DateCase { + year: 897, + month: 6, + day: 2, + }, + DateCase { + year: 960, + month: 9, + day: 30, + }, + DateCase { + year: 967, + month: 5, + day: 27, + }, + DateCase { + year: 1058, + month: 5, + day: 18, + }, + DateCase { + year: 1091, + month: 6, + day: 3, + }, + DateCase { + year: 1128, + month: 8, + day: 4, + }, + DateCase { + year: 1182, + month: 2, + day: 4, + }, + DateCase { + year: 1234, + month: 10, + day: 10, + }, + DateCase { + year: 1255, + month: 1, + day: 11, + }, + DateCase { + year: 1321, + month: 1, + day: 20, + }, + DateCase { + year: 1348, + month: 3, + day: 19, + }, + DateCase { + year: 1360, + month: 9, + day: 7, + }, + DateCase { + year: 1362, + month: 4, + day: 14, + }, + DateCase { + year: 1362, + month: 10, + day: 7, + }, + DateCase { + year: 1412, + month: 9, + day: 12, + }, + DateCase { + year: 1416, + month: 10, + day: 5, + }, + DateCase { + year: 1460, + month: 10, + day: 12, + }, + DateCase { + year: 1518, + month: 3, + day: 5, + }, + ]; + + #[test] + fn test_observational_islamic_from_fixed() { + for (case, f_date) in ASTRONOMICAL_CASES.iter().zip(TEST_FIXED_DATE.iter()) { + let date = + Date::try_new_observational_islamic_date(case.year, case.month, case.day).unwrap(); + assert_eq!( + IslamicObservational::islamic_from_fixed(RataDie::new(*f_date)), + date, + "{case:?}" + ); + } + } + + #[test] + fn test_fixed_from_observational_islamic() { + for (case, f_date) in ASTRONOMICAL_CASES.iter().zip(TEST_FIXED_DATE.iter()) { + let date = IslamicDateInner(ArithmeticDate::new_unchecked( + case.year, case.month, case.day, + )); + assert_eq!( + IslamicObservational::fixed_from_islamic(date), + RataDie::new(*f_date), + "{case:?}" + ); + } + } + + #[test] + fn test_islamic_epoch() { + let epoch = FIXED_ISLAMIC_EPOCH_FRIDAY.to_i64_date(); + // Iso year of Islamic Epoch + let epoch_year_from_fixed = Iso::iso_from_fixed(RataDie::new(epoch)).inner.0.year; + // 622 is the correct ISO year for the Islamic Epoch + assert_eq!(epoch_year_from_fixed, 622); + } +} diff --git a/components/calendar/src/iso.rs b/components/calendar/src/iso.rs index 05e577ce4d8..43ebd2cf484 100644 --- a/components/calendar/src/iso.rs +++ b/components/calendar/src/iso.rs @@ -107,7 +107,7 @@ impl Calendar for Iso { return Err(CalendarError::UnknownEra(era.0, self.debug_name())); } - ArithmeticDate::new_from_solar_codes(self, year, month_code, day).map(IsoDateInner) + ArithmeticDate::new_from_codes(self, year, month_code, day).map(IsoDateInner) } fn date_from_iso(&self, iso: Date) -> IsoDateInner { @@ -199,7 +199,7 @@ impl Calendar for Iso { /// The calendar-specific month represented by `date` fn month(&self, date: &Self::DateInner) -> types::FormattableMonth { - date.0.solar_month() + date.0.month() } /// The calendar-specific day-of-month represented by `date` @@ -242,7 +242,7 @@ impl Date { /// assert_eq!(date_iso.day_of_month().0, 2); /// ``` pub fn try_new_iso_date(year: i32, month: u8, day: u8) -> Result, CalendarError> { - ArithmeticDate::new_from_solar_ordinals(year, month, day) + ArithmeticDate::new_from_ordinals(year, month, day) .map(IsoDateInner) .map(|inner| Date::from_raw(inner, Iso)) } diff --git a/components/calendar/src/japanese.rs b/components/calendar/src/japanese.rs index 0edecacf647..267f06a04c4 100644 --- a/components/calendar/src/japanese.rs +++ b/components/calendar/src/japanese.rs @@ -151,7 +151,7 @@ impl Japanese { day: u8, debug_name: &'static str, ) -> Result { - let month = crate::calendar_arithmetic::ordinal_solar_month_from_code(month_code); + let month = crate::calendar_arithmetic::ordinal_month_from_code(month_code); let month = if let Some(month) = month { month } else { diff --git a/components/calendar/src/julian.rs b/components/calendar/src/julian.rs index 839c5f39c2c..29082a75259 100644 --- a/components/calendar/src/julian.rs +++ b/components/calendar/src/julian.rs @@ -118,7 +118,7 @@ impl Calendar for Julian { return Err(CalendarError::UnknownEra(era.0, self.debug_name())); }; - ArithmeticDate::new_from_solar_codes(self, year, month_code, day).map(JulianDateInner) + ArithmeticDate::new_from_codes(self, year, month_code, day).map(JulianDateInner) } fn date_from_iso(&self, iso: Date) -> JulianDateInner { let fixed_iso = Iso::fixed_from_iso(*iso.inner()); @@ -170,7 +170,7 @@ impl Calendar for Julian { /// The calendar-specific month represented by `date` fn month(&self, date: &Self::DateInner) -> types::FormattableMonth { - date.0.solar_month() + date.0.month() } /// The calendar-specific day-of-month represented by `date` @@ -296,7 +296,7 @@ impl Date { month: u8, day: u8, ) -> Result, CalendarError> { - ArithmeticDate::new_from_solar_ordinals(year, month, day) + ArithmeticDate::new_from_ordinals(year, month, day) .map(JulianDateInner) .map(|inner| Date::from_raw(inner, Julian)) } diff --git a/components/calendar/src/lib.rs b/components/calendar/src/lib.rs index 6dfb881a881..2704a964c1a 100644 --- a/components/calendar/src/lib.rs +++ b/components/calendar/src/lib.rs @@ -128,6 +128,7 @@ pub mod ethiopian; pub mod gregorian; mod helpers; pub mod indian; +pub mod islamic; pub mod iso; pub mod japanese; pub mod julian; diff --git a/components/calendar/src/persian.rs b/components/calendar/src/persian.rs index b3074094f19..d86ec27b5f2 100644 --- a/components/calendar/src/persian.rs +++ b/components/calendar/src/persian.rs @@ -122,7 +122,7 @@ impl Calendar for Persian { return Err(CalendarError::UnknownEra(era.0, self.debug_name())); }; - ArithmeticDate::new_from_solar_codes(self, year, month_code, day).map(PersianDateInner) + ArithmeticDate::new_from_codes(self, year, month_code, day).map(PersianDateInner) } fn date_from_iso(&self, iso: Date) -> PersianDateInner { @@ -172,7 +172,7 @@ impl Calendar for Persian { } fn month(&self, date: &Self::DateInner) -> types::FormattableMonth { - date.0.solar_month() + date.0.month() } fn day_of_month(&self, date: &Self::DateInner) -> types::DayOfMonth { @@ -319,7 +319,7 @@ impl Date { month: u8, day: u8, ) -> Result, CalendarError> { - ArithmeticDate::new_from_solar_ordinals(year, month, day) + ArithmeticDate::new_from_ordinals(year, month, day) .map(PersianDateInner) .map(|inner| Date::from_raw(inner, Persian)) } diff --git a/components/calendar/src/rata_die.rs b/components/calendar/src/rata_die.rs index d5c63256166..0f92859f5aa 100644 --- a/components/calendar/src/rata_die.rs +++ b/components/calendar/src/rata_die.rs @@ -21,6 +21,7 @@ impl RataDie { result.check(); result } + #[cfg(debug_assertions)] pub const fn check(&self) { if self.0 > i64::MAX / 256 { @@ -36,14 +37,21 @@ impl RataDie { ); } } + /// A valid RataDie that is intended to be below all dates representable in calendars #[cfg(test)] pub const fn big_negative() -> Self { Self::new(i64::MIN / 256 / 256) } + pub const fn to_i64_date(self) -> i64 { self.0 } + + pub const fn to_f64_date(self) -> f64 { + self.0 as f64 + } + /// Calculate the number of days between two RataDie in a const-friendly way pub const fn const_diff(self, rhs: Self) -> i64 { self.0 - rhs.0 diff --git a/components/calendar/src/types.rs b/components/calendar/src/types.rs index 6e9f67afe0e..70673b33c09 100644 --- a/components/calendar/src/types.rs +++ b/components/calendar/src/types.rs @@ -766,6 +766,7 @@ impl Moment { pub const fn inner(&self) -> f64 { self.0 } + /// Get the RataDie of a Moment pub fn as_rata_die(&self) -> RataDie { RataDie::new(libm::floor(self.0) as i64) diff --git a/docs/process/release.md b/docs/process/release.md index 3fed110f3eb..6c871e48738 100644 --- a/docs/process/release.md +++ b/docs/process/release.md @@ -29,6 +29,7 @@ Once the release is complete, the assigned release driver will: * This will only update crates that have changed, and will ask you which version number to bump for each crate * You can use commands like `git log icu@1.0.0..@ -- components/plurals/src/` and `cargo public-api -p icu_list diff 1.0.0` to figure out whether to do a major, minor, or patch release * Get this reviewed and checked in before continuing +* `cargo make testdata-legacy` to generate data for `icu_testdata` (which is gitignored) * Use `cargo workspaces publish --from-git` to automatically publish the crates in the correct order * Add `icu4x-release` group as owners to each new component you're publishing * `cargo owner -a github:unicode-org:icu4x-release` diff --git a/docs/tutorials/writing_a_new_data_struct.md b/docs/tutorials/writing_a_new_data_struct.md index 79d2db78bf1..7e584662428 100644 --- a/docs/tutorials/writing_a_new_data_struct.md +++ b/docs/tutorials/writing_a_new_data_struct.md @@ -126,7 +126,7 @@ When finished, run from the top level: $ cargo make testdata ``` -If everything is hooked together properly, JSON files for your new data struct should appear under *provider/testdata/data/json*, and the file *provider/testdata/data/testdata.postcard* should have changed. +If everything is hooked together properly, JSON files for your new data struct should appear under *provider/datagen/tests/data/json*. ## Example diff --git a/ffi/diplomat/tests/missing_apis.txt b/ffi/diplomat/tests/missing_apis.txt index 9377485715c..1d8934234c4 100644 --- a/ffi/diplomat/tests/missing_apis.txt +++ b/ffi/diplomat/tests/missing_apis.txt @@ -15,14 +15,22 @@ icu::calendar::Date::try_new_chinese_date#FnInStruct +icu::calendar::Date::try_new_observational_islamic_date#FnInStruct icu::calendar::Date::try_new_persian_date#FnInStruct icu::calendar::DateTime::try_new_chinese_datetime#FnInStruct +icu::calendar::DateTime::try_new_observational_islamic_datetime#FnInStruct icu::calendar::DateTime::try_new_persian_datetime#FnInStruct icu::calendar::chinese::Chinese#Struct icu::calendar::chinese::Chinese::chinese_new_year_on_or_before_iso#FnInStruct icu::calendar::chinese::Chinese::major_solar_term_from_iso#FnInStruct icu::calendar::chinese::Chinese::minor_solar_term_from_iso#FnInStruct icu::calendar::chinese::ChineseDateInner#Struct +icu::calendar::islamic::IslamicCivil#Struct +icu::calendar::islamic::IslamicDateInner#Struct +icu::calendar::islamic::IslamicObservational#Struct +icu::calendar::islamic::IslamicObservational::new#FnInStruct +icu::calendar::islamic::IslamicTabular#Struct +icu::calendar::islamic::UmmalQura#Struct icu::calendar::persian::Persian#Struct icu::calendar::persian::Persian::new#FnInStruct icu::calendar::persian::PersianDateInner#Struct diff --git a/provider/datagen/Cargo.toml b/provider/datagen/Cargo.toml index 84b72708ea6..c875b62d9cb 100644 --- a/provider/datagen/Cargo.toml +++ b/provider/datagen/Cargo.toml @@ -36,19 +36,18 @@ all-features = true # ICU components icu_calendar = { version = "1.2.0", path = "../../components/calendar", default-features = false, features = ["datagen"] } -icu_casemap = { version = "0.7.1", path = "../../experimental/casemap", default-features = false, features = ["datagen"] } +icu_casemap = { version = "0.7.1", path = "../../experimental/casemap", default-features = false, features = ["datagen"], optional = true } icu_collator = { version = "1.2.0", path = "../../components/collator", default-features = false, features = ["datagen"] } -icu_compactdecimal = { version = "0.2.0", path = "../../experimental/compactdecimal", default-features = false, features = ["datagen"] } +icu_compactdecimal = { version = "0.2.0", path = "../../experimental/compactdecimal", default-features = false, features = ["datagen"], optional = true } icu_datetime = { version = "1.2.0", path = "../../components/datetime", default-features = false, features = ["datagen"] } icu_decimal = { version = "1.2.0", path = "../../components/decimal", default-features = false, features = ["datagen"] } -icu_displaynames = { version = "0.10.0", path = "../../experimental/displaynames", default-features = false, features = ["datagen"] } +icu_displaynames = { version = "0.10.0", path = "../../experimental/displaynames", default-features = false, features = ["datagen"], optional = true } icu_list = { version = "1.2.0", path = "../../components/list", default-features = false, features = ["datagen"]} icu_locid_transform = { version = "1.2.0", path = "../../components/locid_transform", default-features = false, features = ["datagen"] } icu_normalizer = { version = "1.2.0", path = "../../components/normalizer", default-features = false, features = ["datagen"] } -icu_personnames = { version = "0.1.0", path = "../../experimental/personnames", default-features = false, features = ["datagen"] } icu_plurals = { version = "1.2.0", path = "../../components/plurals", default-features = false, features = ["datagen"] } icu_properties = { version = "1.2.0", path = "../../components/properties", default-features = false, features = ["datagen"]} -icu_relativetime = { version = "0.1.0", path = "../../experimental/relativetime", default-features = false, features = ["datagen"] } +icu_relativetime = { version = "0.1.0", path = "../../experimental/relativetime", default-features = false, features = ["datagen"], optional = true } icu_segmenter = { version = "1.2.0", path = "../../components/segmenter", default-features = false, features = ["datagen", "lstm"] } icu_timezone = { version = "1.2.0", path = "../../components/timezone", default-features = false, features = ["datagen"] } @@ -93,11 +92,9 @@ simple_logger = { version = "4.1.0", default-features = false, optional = true } [dev-dependencies] icu = { path = "../../components/icu" } -icu_provider_blob = { path = "../blob" } - -[target.'cfg(not(target_os = "windows"))'.dev-dependencies] -# The verify-zero-copy test is causing problems on Windows +glob = "0.3.1" dhat = "0.3.0" +simple_logger = { version = "4.1.0", default-features = false } [features] default = ["bin", "use_wasm", "networking", "legacy_api", "rayon"] @@ -121,9 +118,12 @@ path = "src/bin/datagen/mod.rs" required-features = ["bin"] [[test]] -name = "icu4x-verify-zero-copy" -path = "tests/verify-zero-copy.rs" +name = "make-testdata" +path = "tests/make-testdata.rs" +required-features = ["provider_fs", "use_wasm"] [package.metadata.cargo-all-features] # We don't need working CPT builders for check skip_feature_sets = [["use_icu4c"], ["use_wasm"]] +always_include_features = ["icu_casemap", "icu_compactdecimal", "icu_displaynames", "icu_relativetime"] +max_combination_size = 3 diff --git a/provider/datagen/src/registry.rs b/provider/datagen/src/registry.rs index f0f175db550..2af680709f5 100644 --- a/provider/datagen/src/registry.rs +++ b/provider/datagen/src/registry.rs @@ -109,11 +109,15 @@ macro_rules! registry { } )+ )+ - unreachable!("unregistered marker") + unreachable!("unregistered key {key:?}") } #[doc(hidden)] pub fn deserialize_and_discard(key: DataKey, buf: DataPayload, r: impl Fn() -> R) -> Result { + if key.path() == icu_provider::hello_world::HelloWorldV1Marker::KEY.path() { + let _reified_data: DataPayload = buf.into_deserialized(icu_provider::buf::BufferFormat::Postcard1)?; + return Ok(r()); + } $( $( #[cfg($feature)] @@ -123,7 +127,7 @@ macro_rules! registry { } )+ )+ - unreachable!("unregistered marker") + unreachable!("unregistered key {key:?}") } } } @@ -133,7 +137,7 @@ registry!( icu_calendar::provider::JapaneseErasV1Marker = "calendar/japanese@1", icu_calendar::provider::JapaneseExtendedErasV1Marker = "calendar/japanext@1", icu_calendar::provider::WeekDataV1Marker = "datetime/week_data@1", - #[cfg(any(all(), feature = "icu_casemap"))] + #[cfg(feature = "icu_casemap")] icu_casemap::provider::CaseMapV1Marker = "props/casemap@1", #[cfg(any(all(), feature = "icu_collator"))] icu_collator::provider::CollationDataV1Marker = "collator/data@1", @@ -142,7 +146,7 @@ registry!( icu_collator::provider::CollationMetadataV1Marker = "collator/meta@1", icu_collator::provider::CollationReorderingV1Marker = "collator/reord@1", icu_collator::provider::CollationSpecialPrimariesV1Marker = "collator/prim@1", - #[cfg(any(all(), feature = "icu_compactdecimal"))] + #[cfg(feature = "icu_compactdecimal")] icu_compactdecimal::provider::LongCompactDecimalFormatDataV1Marker = "compactdecimal/long@1", icu_compactdecimal::provider::ShortCompactDecimalFormatDataV1Marker = "compactdecimal/short@1", #[cfg(any(all(), feature = "icu_datetime"))] @@ -185,7 +189,7 @@ registry!( icu_datetime::provider::time_zones::ExemplarCitiesV1Marker = "time_zone/exemplar_cities@1", #[cfg(any(all(), feature = "icu_decimal"))] icu_decimal::provider::DecimalSymbolsV1Marker = "decimal/symbols@1", - #[cfg(any(all(), feature = "icu_displaynames"))] + #[cfg(feature = "icu_displaynames")] icu_displaynames::provider::RegionDisplayNamesV1Marker = "displaynames/regions@1", icu_displaynames::provider::LanguageDisplayNamesV1Marker = "displaynames/languages@1", icu_displaynames::provider::LocaleDisplayNamesV1Marker = "displaynames/locales@1", @@ -343,7 +347,7 @@ registry!( icu_properties::provider::VariationSelectorV1Marker = "props/VS@1", icu_properties::provider::WhiteSpaceV1Marker = "props/WSpace@1", icu_properties::provider::WordBreakV1Marker = "props/WB@1", - #[cfg(any(all(), feature = "icu_relativetime"))] + #[cfg(feature = "icu_relativetime")] icu_relativetime::provider::LongSecondRelativeTimeFormatDataV1Marker = "relativetime/long/second@1", icu_relativetime::provider::ShortSecondRelativeTimeFormatDataV1Marker = diff --git a/provider/datagen/src/transform/cldr/decimal/mod.rs b/provider/datagen/src/transform/cldr/decimal/mod.rs index 21ab15b10a9..e8adaad7c26 100644 --- a/provider/datagen/src/transform/cldr/decimal/mod.rs +++ b/provider/datagen/src/transform/cldr/decimal/mod.rs @@ -9,7 +9,9 @@ use icu_locid::LanguageIdentifier; use icu_provider::prelude::*; use tinystr::TinyAsciiStr; +#[cfg(feature = "icu_compactdecimal")] mod compact; +#[cfg(feature = "icu_compactdecimal")] mod compact_decimal_pattern; mod decimal_pattern; mod symbols; diff --git a/provider/datagen/src/transform/cldr/mod.rs b/provider/datagen/src/transform/cldr/mod.rs index 60191cb4fdb..d2178622f50 100644 --- a/provider/datagen/src/transform/cldr/mod.rs +++ b/provider/datagen/src/transform/cldr/mod.rs @@ -9,11 +9,13 @@ pub mod characters; pub mod cldr_serde; pub mod datetime; pub mod decimal; +#[cfg(feature = "icu_displaynames")] pub mod displaynames; pub mod fallback; pub mod list; pub mod locale_canonicalizer; pub mod plurals; +#[cfg(feature = "icu_relativetime")] pub mod relativetime; pub mod source; pub mod time_zones; diff --git a/provider/datagen/src/transform/cldr/source.rs b/provider/datagen/src/transform/cldr/source.rs index 60c8816d97e..8dcfd69467c 100644 --- a/provider/datagen/src/transform/cldr/source.rs +++ b/provider/datagen/src/transform/cldr/source.rs @@ -2,6 +2,8 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +#![allow(dead_code)] // features + use crate::source::SerdeCache; use icu_locid::LanguageIdentifier; use icu_provider::DataError; diff --git a/provider/datagen/src/transform/icuexport/mod.rs b/provider/datagen/src/transform/icuexport/mod.rs index 13bfdb8e971..41d0133a394 100644 --- a/provider/datagen/src/transform/icuexport/mod.rs +++ b/provider/datagen/src/transform/icuexport/mod.rs @@ -7,5 +7,6 @@ pub mod collator; pub mod normalizer; +#[cfg(feature = "icu_casemap")] pub mod ucase; pub mod uprops; diff --git a/provider/datagen/tests/data/json/core/helloworld@1/bn.json b/provider/datagen/tests/data/json/core/helloworld@1/bn.json deleted file mode 100644 index 2d210b2803d..00000000000 --- a/provider/datagen/tests/data/json/core/helloworld@1/bn.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "message": "ওহে বিশ্ব" -} diff --git a/provider/datagen/tests/data/json/core/helloworld@1/en.json b/provider/datagen/tests/data/json/core/helloworld@1/en.json deleted file mode 100644 index 5ea152e675d..00000000000 --- a/provider/datagen/tests/data/json/core/helloworld@1/en.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "message": "Hello World" -} diff --git a/provider/datagen/tests/data/json/core/helloworld@1/ja.json b/provider/datagen/tests/data/json/core/helloworld@1/ja.json deleted file mode 100644 index c74eb649b04..00000000000 --- a/provider/datagen/tests/data/json/core/helloworld@1/ja.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "message": "こんにちは世界" -} diff --git a/provider/datagen/tests/data/json/core/helloworld@1/ru.json b/provider/datagen/tests/data/json/core/helloworld@1/ru.json deleted file mode 100644 index 1245ffbacbb..00000000000 --- a/provider/datagen/tests/data/json/core/helloworld@1/ru.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "message": "Привет, мир" -} diff --git a/provider/datagen/tests/data/json/fingerprints.csv b/provider/datagen/tests/data/json/fingerprints.csv index d23e984cda6..34b9ce5f8e8 100644 --- a/provider/datagen/tests/data/json/fingerprints.csv +++ b/provider/datagen/tests/data/json/fingerprints.csv @@ -94,10 +94,6 @@ compactdecimal/short@1, th, 481B, a95d6a755c3d7ed5 compactdecimal/short@1, th-u-nu-thai, 481B, a95d6a755c3d7ed5 compactdecimal/short@1, tr, 492B, 828289fdc150803c compactdecimal/short@1, und, 481B, 9c88371991f25ca7 -core/helloworld@1, bn, 45B, e8884de30df5c979 -core/helloworld@1, en, 31B, 9a9ba4e85fe8531 -core/helloworld@1, ja, 41B, 8de6a6a3531edc6a -core/helloworld@1, ru, 40B, 2334c18558b88018 datetime/buddhist/datelengths@1, ar, 278B, 3eb0af54334b9b7f datetime/buddhist/datelengths@1, ar-EG, 278B, 3eb0af54334b9b7f datetime/buddhist/datelengths@1, bn, 261B, 4b0ab29537b75244 diff --git a/tools/testdata-scripts/locales.rs.data b/provider/datagen/tests/data/locales.rs.data similarity index 100% rename from tools/testdata-scripts/locales.rs.data rename to provider/datagen/tests/data/locales.rs.data diff --git a/provider/datagen/tests/data/postcard/fingerprints.csv b/provider/datagen/tests/data/postcard/fingerprints.csv index 7e981c68111..4dd74a9a80a 100644 --- a/provider/datagen/tests/data/postcard/fingerprints.csv +++ b/provider/datagen/tests/data/postcard/fingerprints.csv @@ -94,10 +94,6 @@ compactdecimal/short@1, th, 52B, f0a39a85493a674d compactdecimal/short@1, th-u-nu-thai, 52B, f0a39a85493a674d compactdecimal/short@1, tr, 63B, 38574745ff1e12e3 compactdecimal/short@1, und, 52B, c10b79e54779e6bd -core/helloworld@1, bn, 26B, 219e744e649d8150 -core/helloworld@1, en, 12B, 6a847ba13c479232 -core/helloworld@1, ja, 22B, 9c8d10466cdc04cd -core/helloworld@1, ru, 21B, d84d0724633c97c6 datetime/buddhist/datelengths@1, ar, 165B, c768a2600c7063f2 datetime/buddhist/datelengths@1, ar-EG, 165B, c768a2600c7063f2 datetime/buddhist/datelengths@1, bn, 150B, bc5c367f3d0719cd diff --git a/provider/datagen/tests/data/testdata.postcard b/provider/datagen/tests/data/testdata.postcard deleted file mode 100644 index b342fff5646..00000000000 Binary files a/provider/datagen/tests/data/testdata.postcard and /dev/null differ diff --git a/provider/datagen/tests/make-testdata.rs b/provider/datagen/tests/make-testdata.rs new file mode 100644 index 00000000000..f373a7a4b02 --- /dev/null +++ b/provider/datagen/tests/make-testdata.rs @@ -0,0 +1,135 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use icu_datagen::fs_exporter::serializers::{Json, Postcard}; +use icu_datagen::fs_exporter::*; +use icu_datagen::prelude::*; +use icu_provider::datagen::*; +use icu_provider::prelude::*; +use std::collections::BTreeSet; +use std::path::Path; + +#[global_allocator] +static ALLOC: dhat::Alloc = dhat::Alloc; + +include!("data/locales.rs.data"); + +#[test] +fn generate_fs_and_verify_zero_copy() { + simple_logger::SimpleLogger::new() + .env() + .with_level(log::LevelFilter::Info) + .init() + .unwrap(); + + let data_root = Path::new(concat!(core::env!("CARGO_MANIFEST_DIR"), "/tests/data/")); + + let source = SourceData::offline() + .with_cldr(data_root.join("cldr"), Default::default()) + .unwrap() + .with_icuexport(data_root.join("icuexport")) + .unwrap(); + + let json_out = Box::new( + FilesystemExporter::try_new(Box::new(Json::pretty()), { + let mut options = ExporterOptions::default(); + options.root = data_root.join("json"); + options.overwrite = OverwriteOption::RemoveAndReplace; + options.fingerprint = true; + options + }) + .unwrap(), + ); + + let postcard_out = Box::new( + FilesystemExporter::try_new(Box::::default(), { + let mut options = ExporterOptions::default(); + options.root = data_root.join("postcard"); + options.overwrite = OverwriteOption::RemoveAndReplace; + options.fingerprint = true; + options + }) + .unwrap(), + ); + + let mut options = options::Options::default(); + options.locales = options::LocaleInclude::Explicit(LOCALES.iter().cloned().collect()); + + DatagenProvider::try_new(options, source) + .unwrap() + .export( + icu_datagen::all_keys().into_iter().collect(), + MultiExporter::new(vec![json_out, postcard_out]), + ) + .unwrap(); + + // don't drop to avoid dhat from printing stats at the end + core::mem::forget(dhat::Profiler::new_heap()); + + // violations for net_bytes_allocated + let mut net_violations = BTreeSet::new(); + // violations for total_bytes_allocated (but not net_bytes_allocated) + let mut total_violations = BTreeSet::new(); + + for key in icu_datagen::all_keys() { + for entry in glob::glob( + &data_root + .join("postcard") + .join(key.path().get()) + .join("**/*.postcard") + .display() + .to_string(), + ) + .unwrap() + { + let payload = DataPayload::from_owned_buffer( + std::fs::read(&entry.unwrap()).unwrap().into_boxed_slice(), + ); + + let stats_before = dhat::HeapStats::get(); + + // We need to generate the stats before the deserialized struct gets dropped, in order + // to distinguish between a temporary and permanent allocation. + let stats_after = + icu_datagen::deserialize_and_discard(key, payload, dhat::HeapStats::get).unwrap(); + + if stats_after.total_bytes != stats_before.total_bytes { + if stats_after.curr_bytes != stats_before.curr_bytes { + net_violations.insert(key.path().get()); + } else { + total_violations.insert(key.path().get()); + } + } + } + } + + // Types in this list cannot be zero-copy deserialized. + // + // Such types contain some data that was allocated during deserializations + // + // Every entry in this list is a bug that needs to be addressed before ICU4X 1.0. + const EXPECTED_NET_VIOLATIONS: &[&str] = &[ + // https://github.com/unicode-org/icu4x/issues/1678 + "datetime/skeletons@1", + ]; + + // Types in this list can be zero-copy deserialized (and do not contain allocated data), + // however there is some allocation that occurs during deserialization for validation. + // + // Entries in this list represent a less-than-ideal state of things, however ICU4X is shippable with violations + // in this list since it does not affect databake. + const EXPECTED_TOTAL_VIOLATIONS: &[&str] = &[ + // Regex DFAs need to be validated, which involved creating a BTreeMap + "list/and@1", + "list/or@1", + "list/unit@1", + ]; + + assert!(total_violations.iter().eq(EXPECTED_TOTAL_VIOLATIONS.iter()) && net_violations.iter().eq(EXPECTED_NET_VIOLATIONS.iter()), + "Expected violations list does not match found violations!\n\ + If the new list is smaller, please update EXPECTED_VIOLATIONS in make-testdata.rs\n\ + If it is bigger and that was unexpected, please make sure the key remains zero-copy, or ask ICU4X team members if it is okay\ + to temporarily allow for this key to be allowlisted.\n\ + Expected (net):\n{EXPECTED_NET_VIOLATIONS:?}\nFound (net):\n{net_violations:?}\nExpected (total):\n{EXPECTED_TOTAL_VIOLATIONS:?}\nFound (total):\n{total_violations:?}"); +} diff --git a/provider/datagen/tests/verify-zero-copy.rs b/provider/datagen/tests/verify-zero-copy.rs deleted file mode 100644 index 68ea2a6f8f2..00000000000 --- a/provider/datagen/tests/verify-zero-copy.rs +++ /dev/null @@ -1,122 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -#[cfg(not(target_os = "windows"))] -#[global_allocator] -static ALLOC: dhat::Alloc = dhat::Alloc; - -// Something is broken wrt Windows and this test on CI. Disable for now. -#[cfg(not(target_os = "windows"))] -pub mod test { - use icu_datagen::{all_keys, DatagenProvider, SourceData}; - use icu_provider::datagen::IterableDynamicDataProvider; - use icu_provider::prelude::*; - use std::cmp; - use std::collections::BTreeSet; - - // Types in this list cannot be zero-copy deserialized. - // - // Such types contain some data that was allocated during deserializations - // - // Every entry in this list is a bug that needs to be addressed before ICU4X 1.0. - static EXPECTED_NET_VIOLATIONS: &[&str] = &[ - // https://github.com/unicode-org/icu4x/issues/1678 - "datetime/skeletons@1", - ]; - - // Types in this list can be zero-copy deserialized (and do not contain allocated data), - // however there is some allocation that occurs during deserialization for validation. - // - // Entries in this list represent a less-than-ideal state of things, however ICU4X is shippable with violations - // in this list since it does not affect databake. - static EXPECTED_TOTAL_VIOLATIONS: &[&str] = &[ - // Regex DFAs need to be validated, which involved creating a BTreeMap - "list/and@1", - "list/or@1", - "list/unit@1", - ]; - - #[test] - fn test_zero_copy() { - // don't drop to avoid dhat from printing stats at the end - core::mem::forget(dhat::Profiler::new_heap()); - - let data_root = std::path::Path::new(std::env!("CARGO_MANIFEST_DIR")).join("tests/data"); - - // Actual data is only needed to determine included locales. - let locale_provider = DatagenProvider::try_new( - Default::default(), - SourceData::offline() - .with_cldr(data_root.join("cldr"), Default::default()) - .unwrap() - .with_icuexport(data_root.join("icuexport")) - .unwrap(), - ) - .unwrap(); - - let postcard_provider = icu_provider_blob::BlobDataProvider::try_new_from_static_blob( - include_bytes!("data/testdata.postcard"), - ) - .unwrap(); - - // violations for net_bytes_allocated - let mut net_violations = BTreeSet::new(); - // violations for total_bytes_allocated (but not net_bytes_allocated) - let mut total_violations = BTreeSet::new(); - - for key in all_keys().into_iter() { - let mut max_total_violation = 0; - let mut max_net_violation = 0; - - for locale in locale_provider.supported_locales_for_key(key).unwrap() { - let payload = match postcard_provider.load_buffer( - key, - DataRequest { - locale: &locale, - metadata: Default::default(), - }, - ) { - Err(DataError { - kind: DataErrorKind::MissingLocale, - .. - }) => continue, - r => r.unwrap().take_payload().unwrap(), - }; - - let stats_before = dhat::HeapStats::get(); - - // We need to generate the stats before the deserialized struct gets dropped, in order - // to distinguish between a temporary and permanent allocation. - let stats_after = - icu_datagen::deserialize_and_discard(key, payload, dhat::HeapStats::get) - .unwrap(); - - let vio_total = stats_after.total_bytes - stats_before.total_bytes; - let vio_net = stats_after.curr_bytes - stats_before.curr_bytes; - max_total_violation = cmp::max(vio_total, max_total_violation); - max_net_violation = cmp::max(vio_net, max_net_violation); - } - if max_total_violation != 0 { - if max_net_violation != 0 { - net_violations.insert(key.path().get()); - } else { - total_violations.insert(key.path().get()); - } - } - } - - assert!(total_violations.iter().eq(EXPECTED_TOTAL_VIOLATIONS.iter()) && net_violations.iter().eq(EXPECTED_NET_VIOLATIONS.iter()), - "Expected violations list does not match found violations!\n\ - If the new list is smaller, please update EXPECTED_VIOLATIONS in verify-zero-copy.rs\n\ - If it is bigger and that was unexpected, please make sure the key remains zero-copy, or ask ICU4X team members if it is okay\ - to temporarily allow for this key to be allowlisted.\n\ - Expected (net):\n{EXPECTED_NET_VIOLATIONS:?}\nFound (net):\n{net_violations:?}\nExpected (total):\n{EXPECTED_TOTAL_VIOLATIONS:?}\nFound (total):\n{total_violations:?}") - } -} - -#[cfg(target_os = "windows")] -#[test] -fn test_zero_copy() { - // do nothing -} diff --git a/tools/make/data.toml b/tools/make/data.toml index f0f4637dc8c..d5dd9ebe057 100644 --- a/tools/make/data.toml +++ b/tools/make/data.toml @@ -7,21 +7,36 @@ ###### CI TASKS [tasks.testdata] -description = "Rebuild all ICU4X testdata from source data checked into the repository" +description = "Run the testdata generation test in icu_datagen" +category = "ICU4X Data" +command = "cargo" +args = [ + "test", + "-p=icu_datagen", + "--no-default-features", + "--features=provider_fs,use_wasm,rayon,icu_casemap,icu_compactdecimal,icu_displaynames,icu_relativetime", + "generate_fs_and_verify_zero_copy", + "--", + "--nocapture" +] + +[tasks.testdata-legacy] +description = "Build the data for the legacy icu_testdata crate" category = "ICU4X Data" command = "cargo" args = [ "run", - "--bin=make-testdata", + "--bin=make-testdata-legacy", ] + [tasks.testdata-check] description = "Rebuild ICU4X testdata and ensure that the working copy is clean" category = "ICU4X Data" dependencies = [ + "download-repo-sources", "testdata", "testdata-hello-world", - "download-repo-sources", ] script_runner = "@duckscript" script = ''' @@ -129,7 +144,7 @@ end root = pwd for component in ${components} cd "${component}/data" - exec --fail-on-error cargo run -p icu_datagen --no-default-features --features rayon,provider_baked,bin,use_wasm,networking ${mode} -- --config config.json + exec --fail-on-error cargo run -p icu_datagen --no-default-features --features rayon,provider_baked,bin,use_wasm,networking,icu_casemap,icu_displaynames,icu_relativetime,icu_compactdecimal ${mode} -- --config config.json cd "${root}" end ''' diff --git a/tools/make/ffi.toml b/tools/make/ffi.toml index e5dc90a6dae..ef18bef850f 100644 --- a/tools/make/ffi.toml +++ b/tools/make/ffi.toml @@ -193,10 +193,7 @@ dependencies = ["install-nightly", "install-wasm"] script_runner = "@duckscript" script = ''' cd ffi/diplomat/js/examples/node -exec --fail-on-error make icu_capi.wasm -exec --fail-on-error make lib -# Copy testdata instead of generating full data from scratch -cp ../../../../../provider/datagen/tests/data/testdata.postcard data.postcard +exec --fail-on-error make # --foreground-scripts makes npm forward the output of make exec --fail-on-error npm ci --foreground-scripts exec --fail-on-error npm test diff --git a/tools/make/tidy.toml b/tools/make/tidy.toml index 6d4515b9396..bdf76aab6ea 100644 --- a/tools/make/tidy.toml +++ b/tools/make/tidy.toml @@ -28,7 +28,7 @@ exit_on_error true glob_pattern_array = array "./**/*.rs" "./**/*.yml" "./**/*.toml" "./**/*.rst" "./**/*.bat" "./**/*.c" "./**/*.cpp" # Skip the files matching these patterns. -glob_skip_pattern_array = array "**/target/**/*" "ffi/diplomat/*/docs/**/*" "**/node_modules/**/*" "**/data/**/*" "ffi/gn/vendor/**/*" "ffi/gn/third_party_tools/**/*" +glob_skip_pattern_array = array "**/target/**/*" "ffi/diplomat/*/docs/**/*" "**/node_modules/**/*" "**/data/**/*" "ffi/gn/vendor/**/*" "ffi/gn/third_party_tools/**/*" "ffi/diplomat/c/examples/fixeddecimal_tiny/baked_data/**/*" blank_line = set "" diff --git a/tools/testdata-scripts/Cargo.toml b/tools/testdata-scripts/Cargo.toml index f173bb05016..623e0db1dc8 100644 --- a/tools/testdata-scripts/Cargo.toml +++ b/tools/testdata-scripts/Cargo.toml @@ -11,7 +11,7 @@ edition = "2021" [dependencies] crlify = { path = "../../utils/crlify" } databake = { path = "../../utils/databake" } -icu_datagen = { path = "../../provider/datagen", default-features = false, features = ["provider_baked", "provider_fs", "provider_blob", "use_wasm", "rayon"] } +icu_datagen = { path = "../../provider/datagen", default-features = false, features = ["legacy_api", "networking", "icu_compactdecimal", "icu_relativetime", "icu_displaynames", "icu_casemap"] } icu_locid = { path = "../../components/locid", features = ["databake"] } icu_provider = { path = "../../provider/core" } @@ -22,10 +22,3 @@ quote = "1" simple_logger = { version = "4.1.0", default-features = false } ureq = "2" zip = ">=0.5, <0.7" - -[features] -make-testdata-legacy = ["icu_datagen/legacy_api"] - -[[bin]] -name = "make-testdata-legacy" -required-features = ["make-testdata-legacy"] \ No newline at end of file diff --git a/tools/testdata-scripts/src/bin/download-repo-sources.rs b/tools/testdata-scripts/src/bin/download-repo-sources.rs index 66de784859f..16565bbae01 100644 --- a/tools/testdata-scripts/src/bin/download-repo-sources.rs +++ b/tools/testdata-scripts/src/bin/download-repo-sources.rs @@ -14,7 +14,7 @@ use std::path::PathBuf; use zip::ZipArchive; include!("../../globs.rs.data"); -include!("../../locales.rs.data"); +include!("../../../../provider/datagen/tests/data/locales.rs.data"); #[derive(Parser)] #[command( diff --git a/tools/testdata-scripts/src/bin/make-testdata-legacy.rs b/tools/testdata-scripts/src/bin/make-testdata-legacy.rs index f7aed4dd1e6..5a167b93d70 100644 --- a/tools/testdata-scripts/src/bin/make-testdata-legacy.rs +++ b/tools/testdata-scripts/src/bin/make-testdata-legacy.rs @@ -4,9 +4,10 @@ use icu_datagen::prelude::*; use std::fs::File; +use std::io::Write; use std::path::Path; -include!("../../locales.rs.data"); +include!("../../../../provider/datagen/tests/data/locales.rs.data"); fn main() { #![allow(deprecated)] // want to keep old datagen code path covered @@ -19,31 +20,17 @@ fn main() { let data_root = Path::new(concat!( core::env!("CARGO_MANIFEST_DIR"), - "/../../provider/datagen/tests/data/" + "/../../provider/testdata/data/" )); + std::fs::create_dir_all(data_root).unwrap(); + let source = SourceData::offline() - .with_cldr(data_root.join("cldr"), Default::default()) - .unwrap() - .with_icuexport(data_root.join("icuexport")) + .with_cldr_latest(Default::default()) .unwrap() - .with_segmenter_lstm(data_root.join("lstm")) + .with_icuexport_latest() .unwrap(); - let json_out = Out::Fs { - output_path: data_root.join("json"), - serializer: Box::new(syntax::Json::pretty()), - overwrite: true, - fingerprint: true, - }; - - let postcard_out = Out::Fs { - output_path: data_root.join("postcard"), - serializer: Box::::default(), - overwrite: true, - fingerprint: true, - }; - let blob_out = Out::Blob(Box::new( File::create(data_root.join("testdata.postcard")).unwrap(), )); @@ -54,7 +41,7 @@ fn main() { options.overwrite = true; options.pretty = true; let mod_out = Out::Baked { - mod_directory: Path::new("/dev/null/").into(), + mod_directory: data_root.join("baked"), options, }; @@ -65,7 +52,40 @@ fn main() { .chain([icu_provider::hello_world::HelloWorldV1Marker::KEY]) .collect::>(), &source, - vec![json_out, blob_out, mod_out, postcard_out], + vec![blob_out, mod_out], ) .unwrap(); + + let mut metadata = File::create(data_root.join("metadata.rs.data")).unwrap(); + + metadata + .write_all( + "\ + // DO NOT EDIT\n\ + // This file is generated by `make-testdata` from\n\ + // * locales.rs.data,\n\ + // * `icu_datagen::SourceData::LATEST_TESTED_*`.\n\ + \n\ + " + .as_bytes(), + ) + .unwrap(); + + let locales = databake::Bake::bake(LOCALES, &Default::default()); + let cldr_tag = SourceData::LATEST_TESTED_CLDR_TAG; + let icu_tag = SourceData::LATEST_TESTED_ICUEXPORT_TAG; + let lstm_tag = SourceData::LATEST_TESTED_SEGMENTER_LSTM_TAG; + + metadata + .write_all( + quote::quote! { + pub const LOCALES: &[icu_locid::LanguageIdentifier] = &#locales; + pub const CLDR_TAG: &str = #cldr_tag; + pub const ICUEXPORT_TAG: &str = #icu_tag; + pub const SEGMENTER_LSTM_TAG: &str = #lstm_tag; + } + .to_string() + .as_bytes(), + ) + .unwrap(); } diff --git a/tools/testdata-scripts/src/bin/make-testdata.rs b/tools/testdata-scripts/src/bin/make-testdata.rs deleted file mode 100644 index 9165b82ccb4..00000000000 --- a/tools/testdata-scripts/src/bin/make-testdata.rs +++ /dev/null @@ -1,132 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use icu_datagen::baked_exporter::*; -use icu_datagen::blob_exporter::*; -use icu_datagen::fs_exporter::serializers::*; -use icu_datagen::fs_exporter::*; -use icu_datagen::prelude::*; -use icu_provider::datagen::MultiExporter; -use std::fs::File; -use std::io::Write; -use std::path::Path; - -include!("../../locales.rs.data"); - -fn main() { - #![allow(deprecated)] // want to keep old datagen code path covered - - simple_logger::SimpleLogger::new() - .env() - .with_level(log::LevelFilter::Info) - .init() - .unwrap(); - - let data_root = Path::new(concat!( - core::env!("CARGO_MANIFEST_DIR"), - "/../../provider/datagen/tests/data/" - )); - - let testdata_data_root = Path::new(concat!( - core::env!("CARGO_MANIFEST_DIR"), - "/../../provider/testdata/data/" - )); - - let source = SourceData::offline() - .with_cldr(data_root.join("cldr"), Default::default()) - .unwrap() - .with_icuexport(data_root.join("icuexport")) - .unwrap(); - - let json_out = Box::new( - FilesystemExporter::try_new(Box::new(Json::pretty()), { - let mut options = ExporterOptions::default(); - options.root = data_root.join("json"); - options.overwrite = OverwriteOption::RemoveAndReplace; - options.fingerprint = true; - options - }) - .unwrap(), - ); - - let postcard_out = Box::new( - FilesystemExporter::try_new(Box::::default(), { - let mut options = ExporterOptions::default(); - options.root = data_root.join("postcard"); - options.overwrite = OverwriteOption::RemoveAndReplace; - options.fingerprint = true; - options - }) - .unwrap(), - ); - - let blob_out = Box::new(BlobExporter::new_with_sink(Box::new( - File::create(data_root.join("testdata.postcard")).unwrap(), - ))); - - let mod_out = Box::new( - BakedExporter::new(testdata_data_root.join("baked"), { - let mut options = Options::default(); - options.insert_feature_gates = true; - options.use_separate_crates = true; - options.overwrite = true; - options.pretty = true; - options - }) - .unwrap(), - ); - - let mut options = options::Options::default(); - options.locales = options::LocaleInclude::Explicit(LOCALES.iter().cloned().collect()); - - DatagenProvider::try_new(options, source) - .unwrap() - .export( - icu_datagen::all_keys_with_experimental() - .into_iter() - .chain([icu_provider::hello_world::HelloWorldV1Marker::KEY]) - .collect(), - MultiExporter::new(vec![json_out, blob_out, mod_out, postcard_out]), - ) - .unwrap(); - - std::fs::copy( - data_root.join("testdata.postcard"), - testdata_data_root.join("testdata.postcard"), - ) - .unwrap(); - - let mut metadata = File::create(testdata_data_root.join("metadata.rs.data")).unwrap(); - - metadata - .write_all( - "\ - // DO NOT EDIT\n\ - // This file is generated by `make-testdata` from\n\ - // * tools/testdata-scripts/locales.rs.data,\n\ - // * `icu_datagen::SourceData::LATEST_TESTED_*`.\n\ - \n\ - " - .as_bytes(), - ) - .unwrap(); - - let locales = databake::Bake::bake(LOCALES, &Default::default()); - let cldr_tag = SourceData::LATEST_TESTED_CLDR_TAG; - let icu_tag = SourceData::LATEST_TESTED_ICUEXPORT_TAG; - let lstm_tag = SourceData::LATEST_TESTED_SEGMENTER_LSTM_TAG; - - metadata - .write_all( - quote::quote! { - pub const LOCALES: &[icu_locid::LanguageIdentifier] = &#locales; - pub const CLDR_TAG: &str = #cldr_tag; - pub const ICUEXPORT_TAG: &str = #icu_tag; - pub const SEGMENTER_LSTM_TAG: &str = #lstm_tag; - } - .to_string() - .as_bytes(), - ) - .unwrap(); -}