Skip to content

Commit

Permalink
Put latin1 flag in len field
Browse files Browse the repository at this point in the history
  • Loading branch information
HalidOdat committed Mar 31, 2024
1 parent bb7193a commit d08482e
Showing 1 changed file with 31 additions and 23 deletions.
54 changes: 31 additions & 23 deletions core/engine/src/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,10 @@ impl CodePoint {
/// The raw representation of a [`JsString`] in the heap.
#[repr(C)]
struct RawJsString {
/// The UTF-16 length.
len: usize,

// TODO: Put flag in len.
flags: usize,
/// Contains the flags and Latin1/UTF-16 length.
///
/// The latin1 flag is stored in the bottom bit.
flags_and_len: usize,

/// The number of references to the string.
///
Expand All @@ -190,6 +189,23 @@ struct RawJsString {
data: [u16; 0],
}

impl RawJsString {
const LATIN1_BITFLAG: usize = 1 << 0;
const BITFLAG_COUNT: usize = 1;

const fn is_latin1(&self) -> bool {
(self.flags_and_len & Self::LATIN1_BITFLAG) != 0
}

const fn len(&self) -> usize {
self.flags_and_len >> Self::BITFLAG_COUNT
}

const fn encode_flags_and_len(len: usize, latin1: bool) -> usize {
(len << Self::BITFLAG_COUNT) | (latin1 as usize)
}
}

const DATA_OFFSET: usize = std::mem::size_of::<RawJsString>();

/// A Latin1 or UTF-16–encoded, reference counted, immutable string.
Expand Down Expand Up @@ -260,24 +276,22 @@ impl JsString {
unsafe {
let h = h.as_ptr();

if (*h).flags == 0 {
JsStr::utf16(std::slice::from_raw_parts(
if (*h).is_latin1() {
JsStr::latin1(std::slice::from_raw_parts(
addr_of!((*h).data).cast(),
(*h).len,
(*h).len(),
))
} else {
JsStr::latin1(std::slice::from_raw_parts(
JsStr::utf16(std::slice::from_raw_parts(
addr_of!((*h).data).cast(),
(*h).len,
(*h).len(),
))
}
}
}
UnwrappedTagged::Tag(index) => {
// SAFETY: all static strings are valid indices on `STATIC_JS_STRINGS`, so `get` should always
// return `Some`.
//
// SAFETY: All strings in `StaticJsStrings` are ascii strings, so this is safe.
unsafe { StaticJsStrings::get(index).unwrap_unchecked() }
}
}
Expand Down Expand Up @@ -518,7 +532,6 @@ impl JsString {
assert!(position < size);

match self.as_str().variant() {
// Fast path for ascii strings.
JsStrVariant::Latin1(v) => {
let code_point = v.get(position).expect("Already checked the size");
CodePoint::Unicode(*code_point as char)
Expand Down Expand Up @@ -664,9 +677,8 @@ impl JsString {
unsafe {
// Write the first part, the `RawJsString`.
inner.as_ptr().write(RawJsString {
len: str_len,
flags_and_len: RawJsString::encode_flags_and_len(str_len, latin1),
refcount: Cell::new(1),
flags: usize::from(latin1),
data: [0; 0],
});
}
Expand Down Expand Up @@ -750,7 +762,7 @@ impl JsString {
// by its signature, so this doesn't outlive `self`.
unsafe {
let h = h.as_ptr();
(*h).len
(*h).len()
}
}
UnwrappedTagged::Tag(index) => {
Expand Down Expand Up @@ -869,15 +881,15 @@ impl Drop for JsString {
// All the checks for the validity of the layout have already been made on `alloc_inner`,
// so we can skip the unwrap.
let layout = unsafe {
if inner.flags == 0 {
if inner.is_latin1() {
Layout::for_value(inner)
.extend(Layout::array::<u16>(inner.len).unwrap_unchecked())
.extend(Layout::array::<u8>(inner.len()).unwrap_unchecked())
.unwrap_unchecked()
.0
.pad_to_align()
} else {
Layout::for_value(inner)
.extend(Layout::array::<u8>(inner.len).unwrap_unchecked())
.extend(Layout::array::<u16>(inner.len()).unwrap_unchecked())
.unwrap_unchecked()
.0
.pad_to_align()
Expand Down Expand Up @@ -1023,10 +1035,6 @@ impl<const N: usize> PartialEq<[u16; N]> for JsString {

impl PartialEq<str> for JsString {
fn eq(&self, other: &str) -> bool {
// if let Some(s) = self.as_str().as_ascii() {
// return s == other.as_bytes();
// }

let utf16 = self.code_points();
let mut utf8 = other.chars();

Expand Down

0 comments on commit d08482e

Please sign in to comment.