diff --git a/core/engine/src/string/mod.rs b/core/engine/src/string/mod.rs index 9133e45f9ed..71d6c0e7c73 100644 --- a/core/engine/src/string/mod.rs +++ b/core/engine/src/string/mod.rs @@ -175,11 +175,10 @@ impl CodePoint { /// The raw representation of a [`JsString`] in the heap. #[repr(C)] struct RawJsString { - /// The UTF-16 length. - len: usize, - - // TODO: Put flag in len. - flags: usize, + /// Contains the flags and Latin1/UTF-16 length. + /// + /// The latin1 flag is stored in the bottom bit. + flags_and_len: usize, /// The number of references to the string. /// @@ -190,6 +189,23 @@ struct RawJsString { data: [u16; 0], } +impl RawJsString { + const LATIN1_BITFLAG: usize = 1 << 0; + const BITFLAG_COUNT: usize = 1; + + const fn is_latin1(&self) -> bool { + (self.flags_and_len & Self::LATIN1_BITFLAG) != 0 + } + + const fn len(&self) -> usize { + self.flags_and_len >> Self::BITFLAG_COUNT + } + + const fn encode_flags_and_len(len: usize, latin1: bool) -> usize { + (len << Self::BITFLAG_COUNT) | (latin1 as usize) + } +} + const DATA_OFFSET: usize = std::mem::size_of::(); /// A Latin1 or UTF-16–encoded, reference counted, immutable string. @@ -260,15 +276,15 @@ impl JsString { unsafe { let h = h.as_ptr(); - if (*h).flags == 0 { - JsStr::utf16(std::slice::from_raw_parts( + if (*h).is_latin1() { + JsStr::latin1(std::slice::from_raw_parts( addr_of!((*h).data).cast(), - (*h).len, + (*h).len(), )) } else { - JsStr::latin1(std::slice::from_raw_parts( + JsStr::utf16(std::slice::from_raw_parts( addr_of!((*h).data).cast(), - (*h).len, + (*h).len(), )) } } @@ -276,8 +292,6 @@ impl JsString { UnwrappedTagged::Tag(index) => { // SAFETY: all static strings are valid indices on `STATIC_JS_STRINGS`, so `get` should always // return `Some`. - // - // SAFETY: All strings in `StaticJsStrings` are ascii strings, so this is safe. unsafe { StaticJsStrings::get(index).unwrap_unchecked() } } } @@ -518,7 +532,6 @@ impl JsString { assert!(position < size); match self.as_str().variant() { - // Fast path for ascii strings. JsStrVariant::Latin1(v) => { let code_point = v.get(position).expect("Already checked the size"); CodePoint::Unicode(*code_point as char) @@ -664,9 +677,8 @@ impl JsString { unsafe { // Write the first part, the `RawJsString`. inner.as_ptr().write(RawJsString { - len: str_len, + flags_and_len: RawJsString::encode_flags_and_len(str_len, latin1), refcount: Cell::new(1), - flags: usize::from(latin1), data: [0; 0], }); } @@ -750,7 +762,7 @@ impl JsString { // by its signature, so this doesn't outlive `self`. unsafe { let h = h.as_ptr(); - (*h).len + (*h).len() } } UnwrappedTagged::Tag(index) => { @@ -869,15 +881,15 @@ impl Drop for JsString { // All the checks for the validity of the layout have already been made on `alloc_inner`, // so we can skip the unwrap. let layout = unsafe { - if inner.flags == 0 { + if inner.is_latin1() { Layout::for_value(inner) - .extend(Layout::array::(inner.len).unwrap_unchecked()) + .extend(Layout::array::(inner.len()).unwrap_unchecked()) .unwrap_unchecked() .0 .pad_to_align() } else { Layout::for_value(inner) - .extend(Layout::array::(inner.len).unwrap_unchecked()) + .extend(Layout::array::(inner.len()).unwrap_unchecked()) .unwrap_unchecked() .0 .pad_to_align() @@ -1023,10 +1035,6 @@ impl PartialEq<[u16; N]> for JsString { impl PartialEq for JsString { fn eq(&self, other: &str) -> bool { - // if let Some(s) = self.as_str().as_ascii() { - // return s == other.as_bytes(); - // } - let utf16 = self.code_points(); let mut utf8 = other.chars();