Skip to content

Commit

Permalink
Switch locid Value to use Subtag
Browse files Browse the repository at this point in the history
  • Loading branch information
zbraniecki committed Jun 2, 2024
1 parent 77da96b commit 77c2fa2
Show file tree
Hide file tree
Showing 16 changed files with 130 additions and 129 deletions.
2 changes: 1 addition & 1 deletion components/calendar/src/any_calendar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,7 @@ impl AnyCalendarKind {
/// Returns `None` if the calendar is unknown. If you prefer an error, use
/// [`CalendarError::unknown_any_calendar_kind`].
pub fn get_for_bcp47_value(x: &Value) -> Option<Self> {
match *x.as_tinystr_slice() {
match x.as_subtags_slice() {
[first] if first == "buddhist" => Some(AnyCalendarKind::Buddhist),
[first] if first == "chinese" => Some(AnyCalendarKind::Chinese),
[first] if first == "coptic" => Some(AnyCalendarKind::Coptic),
Expand Down
18 changes: 10 additions & 8 deletions components/datetime/src/calendar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ use icu_calendar::{
islamic::IslamicUmmAlQura, japanese::Japanese, japanese::JapaneseExtended, persian::Persian,
Gregorian,
};
use icu_locale_core::extensions::unicode::{value, Value};
use icu_locale_core::{
extensions::unicode::{value, Value},
subtags::{subtag, Subtag},
};
use icu_provider::prelude::*;
use tinystr::{tinystr, TinyAsciiStr};

#[cfg(any(feature = "datagen", feature = "experimental"))]
use crate::provider::neo::*;
Expand Down Expand Up @@ -84,9 +86,9 @@ pub trait CldrCalendar: InternalCldrCalendar {
}

/// Check if the provided value is of the form `islamic-{subcal}`
fn is_islamic_subcal(value: &Value, subcal: TinyAsciiStr<8>) -> bool {
if let &[first, second] = value.as_tinystr_slice() {
first == "islamic" && second == subcal
fn is_islamic_subcal(value: &Value, subcal: Subtag) -> bool {
if let &[first, second] = value.as_subtags_slice() {
first == *"islamic" && second == subcal
} else {
false
}
Expand Down Expand Up @@ -243,7 +245,7 @@ impl CldrCalendar for IslamicCivil {
#[cfg(any(feature = "datagen", feature = "experimental"))]
type SkeletaV1Marker = IslamicDateNeoSkeletonPatternsV1Marker;
fn is_identifier_allowed_for_calendar(value: &Value) -> bool {
*value == value!("islamicc") || is_islamic_subcal(value, tinystr!(8, "civil"))
*value == value!("islamicc") || is_islamic_subcal(value, subtag!("civil"))
}
}

Expand Down Expand Up @@ -277,7 +279,7 @@ impl CldrCalendar for IslamicTabular {
#[cfg(any(feature = "datagen", feature = "experimental"))]
type SkeletaV1Marker = IslamicDateNeoSkeletonPatternsV1Marker;
fn is_identifier_allowed_for_calendar(value: &Value) -> bool {
is_islamic_subcal(value, tinystr!(8, "tbla"))
is_islamic_subcal(value, subtag!("tbla"))
}
}

Expand All @@ -297,7 +299,7 @@ impl CldrCalendar for IslamicUmmAlQura {
#[cfg(any(feature = "datagen", feature = "experimental"))]
type SkeletaV1Marker = IslamicDateNeoSkeletonPatternsV1Marker;
fn is_identifier_allowed_for_calendar(value: &Value) -> bool {
is_islamic_subcal(value, tinystr!(8, "umalqura"))
is_islamic_subcal(value, subtag!("umalqura"))
}
}

Expand Down
15 changes: 8 additions & 7 deletions components/datetime/src/options/preferences.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,11 @@ use crate::fields;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

use icu_locale_core::extensions::unicode::key;
use icu_locale_core::{
extensions::unicode::key,
subtags::{subtag, Subtag},
};
use icu_provider::DataLocale;
use tinystr::tinystr;
use tinystr::TinyAsciiStr;

/// Stores user preferences which may affect the result of date and time formatting.
///
Expand Down Expand Up @@ -81,10 +82,10 @@ impl Bag {

/// Construct a [`Bag`] from a given [`DataLocale`]
pub(crate) fn from_data_locale(data_locale: &DataLocale) -> Self {
const H11: TinyAsciiStr<8> = tinystr!(8, "h11");
const H12: TinyAsciiStr<8> = tinystr!(8, "h12");
const H23: TinyAsciiStr<8> = tinystr!(8, "h23");
const H24: TinyAsciiStr<8> = tinystr!(8, "h24");
const H11: Subtag = subtag!("h11");
const H12: Subtag = subtag!("h12");
const H23: Subtag = subtag!("h23");
const H24: Subtag = subtag!("h24");
let hour_cycle = match data_locale
.get_unicode_ext(&key!("hc"))
.and_then(|v| v.as_single_subtag().copied())
Expand Down
8 changes: 5 additions & 3 deletions components/datetime/src/provider/date_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,10 @@ where
fn skeleton_data_payload(
&self,
) -> Result<DataPayload<DateSkeletonPatternsV1Marker>, DataError> {
use icu_locale_core::extensions::unicode::{key, value};
use tinystr::tinystr;
use icu_locale_core::{
extensions::unicode::{key, value},
subtags::subtag,
};
let mut locale = self.locale.clone();
#[allow(clippy::expect_used)] // experimental
let cal_val = self.cal_val.expect("should be present for components bag");
Expand All @@ -356,7 +358,7 @@ where
locale.set_unicode_ext(key!("ca"), value!("ethiopic"));
} else if cal_val == &value!("islamic")
|| cal_val == &value!("islamicc")
|| cal_val.as_tinystr_slice().first() == Some(&tinystr!(8, "islamic"))
|| cal_val.as_subtags_slice().first() == Some(&subtag!("islamic"))
{
// All islamic calendars store skeleton data under islamic, not their individual extension keys
locale.set_unicode_ext(key!("ca"), value!("islamic"));
Expand Down
4 changes: 2 additions & 2 deletions components/locale/src/canonicalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -497,12 +497,12 @@ impl LocaleCanonicalizer {
if !extensions.unicode.keywords.is_empty() {
for key in [key!("rg"), key!("sd")] {
if let Some(value) = extensions.unicode.keywords.get_mut(&key) {
if let &[only_value] = value.as_tinystr_slice() {
if let Some(only_value) = value.as_single_subtag() {
if let Some(modified_value) = self
.aliases
.get()
.subdivision
.get(&only_value.resize().to_unvalidated())
.get(&only_value.into_tinystr().resize().to_unvalidated())
{
if let Ok(modified_value) = modified_value.parse() {
*value = modified_value;
Expand Down
39 changes: 24 additions & 15 deletions components/locale_core/src/extensions/transform/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

use crate::parser::{ParserError, SubtagIterator};
use crate::shortvec::ShortBoxSlice;
use crate::subtags::{subtag, Subtag};
use core::ops::RangeInclusive;
use core::str::FromStr;
use tinystr::TinyAsciiStr;

/// A value used in a list of [`Fields`](super::Fields).
///
Expand All @@ -27,10 +27,10 @@ use tinystr::TinyAsciiStr;
/// "no".parse::<Value>().expect_err("Invalid Value.");
/// ```
#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Default)]
pub struct Value(ShortBoxSlice<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>);
pub struct Value(ShortBoxSlice<Subtag>);

const TYPE_LENGTH: RangeInclusive<usize> = 3..=8;
const TRUE_TVALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
const TRUE_TVALUE: Subtag = subtag!("true");

impl Value {
/// A constructor which takes a utf8 slice, parses it and
Expand All @@ -52,8 +52,7 @@ impl Value {
return Err(ParserError::InvalidExtension);
}
has_value = true;
let val =
TinyAsciiStr::from_bytes(subtag).map_err(|_| ParserError::InvalidExtension)?;
let val = Subtag::try_from_bytes(subtag).map_err(|_| ParserError::InvalidExtension)?;
if val != TRUE_TVALUE {
v.push(val);
}
Expand All @@ -65,23 +64,19 @@ impl Value {
Ok(Self(v))
}

pub(crate) fn from_short_slice_unchecked(
input: ShortBoxSlice<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>,
) -> Self {
pub(crate) fn from_short_slice_unchecked(input: ShortBoxSlice<Subtag>) -> Self {
Self(input)
}

pub(crate) fn is_type_subtag(t: &[u8]) -> bool {
TYPE_LENGTH.contains(&t.len()) && t.iter().all(u8::is_ascii_alphanumeric)
}

pub(crate) fn parse_subtag(
t: &[u8],
) -> Result<Option<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>, ParserError> {
let s = TinyAsciiStr::from_bytes(t).map_err(|_| ParserError::InvalidSubtag)?;
if !TYPE_LENGTH.contains(&t.len()) || !s.is_ascii_alphanumeric() {
pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<Subtag>, ParserError> {
if !TYPE_LENGTH.contains(&t.len()) {
return Err(ParserError::InvalidExtension);
}
let s = Subtag::try_from_bytes(t).map_err(|_| ParserError::InvalidSubtag)?;

let s = s.to_ascii_lowercase();

Expand All @@ -97,9 +92,9 @@ impl Value {
F: FnMut(&str) -> Result<(), E>,
{
if self.0.is_empty() {
f("true")?;
f(TRUE_TVALUE.as_str())?;
} else {
self.0.iter().map(TinyAsciiStr::as_str).try_for_each(f)?;
self.0.iter().map(Subtag::as_str).try_for_each(f)?;
}
Ok(())
}
Expand Down Expand Up @@ -132,3 +127,17 @@ fn test_writeable() {
"hybrid-foobar"
);
}

#[test]
fn test_short_tvalue() {
let value = Value::from_str("foo-longstag");
assert!(value.is_ok());
let value = value.unwrap();
assert_eq!(value.0.len(), 2);
for (s, reference) in value.0.iter().zip(&[subtag!("foo"), subtag!("longstag")]) {
assert_eq!(s, reference);
}

let value = Value::from_str("foo-ba");
assert!(value.is_err());
}
91 changes: 28 additions & 63 deletions components/locale_core/src/extensions/unicode/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@

use crate::parser::{ParserError, SubtagIterator};
use crate::shortvec::ShortBoxSlice;
use crate::subtags::{subtag, Subtag};
use alloc::vec::Vec;
use core::ops::RangeInclusive;
use core::str::FromStr;
use tinystr::TinyAsciiStr;

/// A value used in a list of [`Keywords`](super::Keywords).
///
Expand All @@ -33,10 +32,9 @@ use tinystr::TinyAsciiStr;
/// assert_eq!(value!("true").to_string(), "");
/// ```
#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Default)]
pub struct Value(ShortBoxSlice<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>);
pub struct Value(ShortBoxSlice<Subtag>);

const VALUE_LENGTH: RangeInclusive<usize> = 3..=8;
const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
const TRUE_VALUE: Subtag = subtag!("true");

impl Value {
/// A constructor which takes a utf8 slice, parses it and
Expand All @@ -53,53 +51,31 @@ impl Value {
let mut v = ShortBoxSlice::new();

if !input.is_empty() {
for subtag in SubtagIterator::new(input) {
let val = Self::subtag_from_bytes(subtag)?;
if let Some(val) = val {
v.push(val);
for chunk in SubtagIterator::new(input) {
let subtag = Subtag::try_from_bytes(chunk)?;
if subtag != TRUE_VALUE {
v.push(subtag);
}
}
}
Ok(Self(v))
}

/// Const constructor for when the value contains only a single subtag.
///
/// # Examples
///
/// ```
/// use icu::locale::extensions::unicode::Value;
///
/// Value::try_from_single_subtag(b"buddhist").expect("valid subtag");
/// Value::try_from_single_subtag(b"#####").expect_err("invalid subtag");
/// Value::try_from_single_subtag(b"foo-bar").expect_err("not a single subtag");
/// ```
pub const fn try_from_single_subtag(subtag: &[u8]) -> Result<Self, ParserError> {
match Self::subtag_from_bytes(subtag) {
Err(_) => Err(ParserError::InvalidExtension),
Ok(option) => Ok(Self::from_tinystr(option)),
}
}

#[doc(hidden)]
pub fn as_tinystr_slice(&self) -> &[TinyAsciiStr<8>] {
&self.0
pub const fn as_single_subtag(&self) -> Option<&Subtag> {
self.0.single()
}

#[doc(hidden)]
pub const fn as_single_subtag(&self) -> Option<&TinyAsciiStr<8>> {
self.0.single()
pub fn as_subtags_slice(&self) -> &[Subtag] {
&self.0
}

#[doc(hidden)]
pub const fn from_tinystr(subtag: Option<TinyAsciiStr<8>>) -> Self {
pub const fn from_subtag(subtag: Option<Subtag>) -> Self {
match subtag {
None => Self(ShortBoxSlice::new()),
Some(val) => {
debug_assert!(val.is_ascii_alphanumeric());
debug_assert!(!matches!(val, TRUE_VALUE));
Self(ShortBoxSlice::new_single(val))
}
None | Some(TRUE_VALUE) => Self(ShortBoxSlice::new()),
Some(val) => Self(ShortBoxSlice::new_single(val)),
}
}

Expand All @@ -110,11 +86,11 @@ impl Value {
///
/// ```
/// use icu::locale::extensions::unicode::Value;
/// use tinystr::{TinyAsciiStr, tinystr};
/// use icu::locale::subtags::subtag;
///
/// let tinystr1: TinyAsciiStr<8> = tinystr!(8, "foobar");
/// let tinystr2: TinyAsciiStr<8> = tinystr!(8, "testing");
/// let mut v = vec![tinystr1, tinystr2];
/// let subtag1 = subtag!("foobar");
/// let subtag2 = subtag!("testing");
/// let mut v = vec![subtag1, subtag2];
/// v.sort();
/// v.dedup();
///
Expand All @@ -124,37 +100,26 @@ impl Value {
/// Notice: For performance- and memory-constrained environments, it is recommended
/// for the caller to use [`binary_search`](slice::binary_search) instead of [`sort`](slice::sort)
/// and [`dedup`](Vec::dedup()).
pub fn from_vec_unchecked(input: Vec<TinyAsciiStr<8>>) -> Self {
pub fn from_vec_unchecked(input: Vec<Subtag>) -> Self {
Self(input.into())
}

pub(crate) fn from_short_slice_unchecked(input: ShortBoxSlice<TinyAsciiStr<8>>) -> Self {
pub(crate) fn from_short_slice_unchecked(input: ShortBoxSlice<Subtag>) -> Self {
Self(input)
}

#[doc(hidden)]
pub const fn subtag_from_bytes(bytes: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
Self::parse_subtag_from_bytes_manual_slice(bytes, 0, bytes.len())
}

pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<Subtag>, ParserError> {
Self::parse_subtag_from_bytes_manual_slice(t, 0, t.len())
}

pub(crate) const fn parse_subtag_from_bytes_manual_slice(
bytes: &[u8],
start: usize,
end: usize,
) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
let slice_len = end - start;
if slice_len > *VALUE_LENGTH.end() || slice_len < *VALUE_LENGTH.start() {
return Err(ParserError::InvalidExtension);
}

match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) {
) -> Result<Option<Subtag>, ParserError> {
match Subtag::try_from_bytes_manual_slice(bytes, start, end) {
Ok(TRUE_VALUE) => Ok(None),
Ok(s) if s.is_ascii_alphanumeric() => Ok(Some(s.to_ascii_lowercase())),
Ok(_) => Err(ParserError::InvalidExtension),
Ok(s) => Ok(Some(s)),
Err(_) => Err(ParserError::InvalidSubtag),
}
}
Expand All @@ -163,7 +128,7 @@ impl Value {
where
F: FnMut(&str) -> Result<(), E>,
{
self.0.iter().map(TinyAsciiStr::as_str).try_for_each(f)
self.0.iter().map(Subtag::as_str).try_for_each(f)
}
}

Expand Down Expand Up @@ -209,9 +174,9 @@ macro_rules! extensions_unicode_value {
// };
// Workaround until https://github.com/rust-lang/rust/issues/73255 lands:
const R: $crate::extensions::unicode::Value =
$crate::extensions::unicode::Value::from_tinystr(
match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) {
Ok(r) => r,
$crate::extensions::unicode::Value::from_subtag(
match $crate::subtags::Subtag::try_from_bytes($value.as_bytes()) {
Ok(r) => Some(r),
_ => panic!(concat!("Invalid Unicode extension value: ", $value)),
},
);
Expand Down
Loading

0 comments on commit 77c2fa2

Please sign in to comment.