From cea4f3874c9bc2967ff1053050e55731b1bf0235 Mon Sep 17 00:00:00 2001
From: Haled Odat <8566042+HalidOdat@users.noreply.github.com>
Date: Sun, 31 Mar 2024 08:35:01 +0200
Subject: [PATCH] Preserve latin1 strings during bytecode compilation

---
 Cargo.lock                                    |   1 -
 core/engine/Cargo.toml                        |   1 -
 .../src/builtins/intl/segmenter/iterator.rs   |  14 +-
 .../engine/src/builtins/intl/segmenter/mod.rs |   6 +-
 core/engine/src/builtins/number/globals.rs    | 225 +++++++++++-------
 core/engine/src/bytecompiler/declarations.rs  |  14 +-
 .../engine/src/bytecompiler/expression/mod.rs |  20 +-
 core/engine/src/bytecompiler/mod.rs           |  18 +-
 core/engine/src/object/property_map.rs        |   1 -
 .../src/optimizer/pass/constant_folding.rs    |   7 +-
 core/engine/src/string/mod.rs                 |  23 --
 core/engine/src/string/str.rs                 |  28 ++-
 12 files changed, 195 insertions(+), 163 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index bf090b41ad7..4b0fbb420cf 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -425,7 +425,6 @@ dependencies = [
  "num_enum",
  "once_cell",
  "paste",
- "phf",
  "pollster",
  "portable-atomic",
  "rand",
diff --git a/core/engine/Cargo.toml b/core/engine/Cargo.toml
index 8c0bb8e37cc..9436ef0d059 100644
--- a/core/engine/Cargo.toml
+++ b/core/engine/Cargo.toml
@@ -104,7 +104,6 @@ intrusive-collections = "0.9.6"
 cfg-if = "1.0.0"
 time.workspace = true
 hashbrown.workspace = true
-phf.workspace = true
 
 # intl deps
 boa_icu_provider = {workspace = true, features = ["std"], optional = true }
diff --git a/core/engine/src/builtins/intl/segmenter/iterator.rs b/core/engine/src/builtins/intl/segmenter/iterator.rs
index 3917324b92c..af440efb8ee 100644
--- a/core/engine/src/builtins/intl/segmenter/iterator.rs
+++ b/core/engine/src/builtins/intl/segmenter/iterator.rs
@@ -21,9 +21,9 @@ pub(crate) enum NativeSegmentIterator<'l, 's> {
     GraphemeUtf16(GraphemeClusterBreakIteratorUtf16<'l, 's>),
     WordUtf16(WordBreakIteratorUtf16<'l, 's>),
     SentenceUtf16(SentenceBreakIteratorUtf16<'l, 's>),
-    GraphemeUtf8(GraphemeClusterBreakIteratorLatin1<'l, 's>),
-    WordUtf8(WordBreakIteratorLatin1<'l, 's>),
-    SentenceUtf8(SentenceBreakIteratorLatin1<'l, 's>),
+    GraphemeLatin1(GraphemeClusterBreakIteratorLatin1<'l, 's>),
+    WordLatin1(WordBreakIteratorLatin1<'l, 's>),
+    SentenceLatin1(SentenceBreakIteratorLatin1<'l, 's>),
 }
 
 impl Iterator for NativeSegmentIterator<'_, '_> {
@@ -34,9 +34,9 @@ impl Iterator for NativeSegmentIterator<'_, '_> {
             NativeSegmentIterator::GraphemeUtf16(g) => g.next(),
             NativeSegmentIterator::WordUtf16(w) => w.next(),
             NativeSegmentIterator::SentenceUtf16(s) => s.next(),
-            NativeSegmentIterator::GraphemeUtf8(g) => g.next(),
-            NativeSegmentIterator::WordUtf8(w) => w.next(),
-            NativeSegmentIterator::SentenceUtf8(s) => s.next(),
+            NativeSegmentIterator::GraphemeLatin1(g) => g.next(),
+            NativeSegmentIterator::WordLatin1(w) => w.next(),
+            NativeSegmentIterator::SentenceLatin1(s) => s.next(),
         }
     }
 }
@@ -46,7 +46,7 @@ impl NativeSegmentIterator<'_, '_> {
     /// the current boundary is word-like.
     pub(crate) fn is_word_like(&self) -> Option<bool> {
         match self {
-            Self::WordUtf8(w) => Some(w.is_word_like()),
+            Self::WordLatin1(w) => Some(w.is_word_like()),
             Self::WordUtf16(w) => Some(w.is_word_like()),
             _ => None,
         }
diff --git a/core/engine/src/builtins/intl/segmenter/mod.rs b/core/engine/src/builtins/intl/segmenter/mod.rs
index 14f8e7e2387..53b10a01130 100644
--- a/core/engine/src/builtins/intl/segmenter/mod.rs
+++ b/core/engine/src/builtins/intl/segmenter/mod.rs
@@ -65,9 +65,9 @@ impl NativeSegmenter {
     pub(crate) fn segment<'l, 's>(&'l self, input: JsStr<'s>) -> NativeSegmentIterator<'l, 's> {
         match input.variant() {
             crate::string::JsStrVariant::Latin1(input) => match self {
-                Self::Grapheme(g) => NativeSegmentIterator::GraphemeUtf8(g.segment_latin1(input)),
-                Self::Word(w) => NativeSegmentIterator::WordUtf8(w.segment_latin1(input)),
-                Self::Sentence(s) => NativeSegmentIterator::SentenceUtf8(s.segment_latin1(input)),
+                Self::Grapheme(g) => NativeSegmentIterator::GraphemeLatin1(g.segment_latin1(input)),
+                Self::Word(w) => NativeSegmentIterator::WordLatin1(w.segment_latin1(input)),
+                Self::Sentence(s) => NativeSegmentIterator::SentenceLatin1(s.segment_latin1(input)),
             },
             crate::string::JsStrVariant::Utf16(input) => match self {
                 Self::Grapheme(g) => NativeSegmentIterator::GraphemeUtf16(g.segment_utf16(input)),
diff --git a/core/engine/src/builtins/number/globals.rs b/core/engine/src/builtins/number/globals.rs
index d448ad71afc..1900fa850c0 100644
--- a/core/engine/src/builtins/number/globals.rs
+++ b/core/engine/src/builtins/number/globals.rs
@@ -4,11 +4,10 @@ use crate::{
     object::JsObject,
     realm::Realm,
     string::common::StaticJsStrings,
-    Context, JsArgs, JsResult, JsString, JsValue,
+    Context, JsArgs, JsResult, JsStr, JsString, JsValue,
 };
 
-use boa_macros::utf16;
-use num_traits::Num;
+use boa_macros::js_str;
 
 /// Builtin javascript 'isFinite(number)' function.
 ///
@@ -94,6 +93,55 @@ impl BuiltInObject for IsNaN {
     const NAME: JsString = StaticJsStrings::IS_NAN;
 }
 
+fn from_js_str_radix(src: JsStr<'_>, radix: u8) -> Option<f64> {
+    /// Determines if a string of text of that length of that radix could be guaranteed to be
+    /// stored in the given type T.
+    /// Note that if the radix is known to the compiler, it is just the check of digits.len that
+    /// is done at runtime.
+    fn can_not_overflow(radix: u8, digits_len: usize) -> bool {
+        usize::from(radix) <= 16 && digits_len <= std::mem::size_of::<u64>() * 2
+    }
+
+    const fn to_digit(input: u8, radix: u8) -> Option<u8> {
+        // If not a digit, a number greater than radix will be created.
+        let mut digit = input.wrapping_sub(b'0');
+        if radix > 10 {
+            debug_assert!(radix <= 36, "to_digit: radix is too high (maximum 36)");
+            if digit < 10 {
+                return Some(digit);
+            }
+            // Force the 6th bit to be set to ensure ascii is lower case.
+            digit = (input | 0b10_0000).wrapping_sub(b'a').saturating_add(10);
+        }
+        // FIXME: once then_some is const fn, use it here
+        if digit < radix {
+            Some(digit)
+        } else {
+            None
+        }
+    }
+
+    let src = src
+        .iter()
+        .map(|x| u8::try_from(x).expect("should be ascii string"));
+
+    let result = if can_not_overflow(radix, src.len()) {
+        let mut result = 0;
+        for c in src {
+            result = result * u64::from(radix) + u64::from(to_digit(c, radix)?);
+        }
+        result as f64
+    } else {
+        let mut result = 0f64;
+        for c in src {
+            result = result * f64::from(radix) + f64::from(to_digit(c, radix)?);
+        }
+        result
+    };
+
+    Some(result)
+}
+
 /// Builtin javascript 'parseInt(str, radix)' function.
 ///
 /// Parses the given string as an integer using the given radix as a base.
@@ -109,107 +157,108 @@ impl BuiltInObject for IsNaN {
 /// [spec]: https://tc39.es/ecma262/#sec-parseint-string-radix
 /// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseInt
 pub(crate) fn parse_int(_: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
-    if let (Some(val), radix) = (args.first(), args.get_or_undefined(1)) {
-        // 1. Let inputString be ? ToString(string).
-        let input_string = val.to_string(context)?;
-
-        // 2. Let S be ! TrimString(inputString, start).
-        let mut var_s = &input_string.trim_start().iter().collect::<Vec<_>>()[..];
-
-        // 3. Let sign be 1.
-        // 4. If S is not empty and the first code unit of S is the code unit 0x002D (HYPHEN-MINUS),
-        //    set sign to -1.
-        let sign = if !var_s.is_empty() && var_s.starts_with(utf16!("-")) {
-            -1
-        } else {
-            1
-        };
+    let (Some(val), radix) = (args.first(), args.get_or_undefined(1)) else {
+        // Not enough arguments to parseInt.
+        return Ok(JsValue::nan());
+    };
 
-        // 5. If S is not empty and the first code unit of S is the code unit 0x002B (PLUS SIGN) or
-        //    the code unit 0x002D (HYPHEN-MINUS), remove the first code unit from S.
-        if !var_s.is_empty() && (var_s.starts_with(utf16!("+")) || var_s.starts_with(utf16!("-"))) {
-            var_s = &var_s[1..];
-        }
+    // 1. Let inputString be ? ToString(string).
+    let input_string = val.to_string(context)?;
 
-        // 6. Let R be ℝ(? ToInt32(radix)).
-        let mut var_r = radix.to_i32(context)?;
+    // 2. Let S be ! TrimString(inputString, start).
+    let mut s = input_string.trim_start();
+    // let mut
 
-        // 7. Let stripPrefix be true.
-        let mut strip_prefix = true;
+    // 3. Let sign be 1.
+    // 4. If S is not empty and the first code unit of S is the code unit 0x002D (HYPHEN-MINUS),
+    //    set sign to -1.
+    let sign = if !s.is_empty() && s.starts_with(js_str!("-")) {
+        -1
+    } else {
+        1
+    };
 
-        // 8. If R ≠ 0, then
-        #[allow(clippy::if_not_else)]
-        if var_r != 0 {
-            //     a. If R < 2 or R > 36, return NaN.
-            if !(2..=36).contains(&var_r) {
-                return Ok(JsValue::nan());
-            }
+    // 5. If S is not empty and the first code unit of S is the code unit 0x002B (PLUS SIGN) or
+    //    the code unit 0x002D (HYPHEN-MINUS), remove the first code unit from S.
+    if !s.is_empty() && (s.starts_with(js_str!("+")) || s.starts_with(js_str!("-"))) {
+        s = s.get(1..).expect("already checked that it's not empty");
+    }
 
-            //     b. If R ≠ 16, set stripPrefix to false.
-            if var_r != 16 {
-                strip_prefix = false;
-            }
-        } else {
-            // 9. Else,
-            //     a. Set R to 10.
-            var_r = 10;
-        }
+    // 6. Let R be ℝ(? ToInt32(radix)).
+    let r = radix.to_i32(context)?;
 
-        // 10. If stripPrefix is true, then
-        //     a. If the length of S is at least 2 and the first two code units of S are either "0x" or "0X", then
-        //         i. Remove the first two code units from S.
-        //         ii. Set R to 16.
-        if strip_prefix
-            && var_s.len() >= 2
-            && (var_s.starts_with(utf16!("0x")) || var_s.starts_with(utf16!("0X")))
-        {
-            var_s = &var_s[2..];
+    // 7. Let stripPrefix be true.
+    let mut strip_prefix = true;
 
-            var_r = 16;
+    // 8. If R ≠ 0, then
+    #[allow(clippy::if_not_else)]
+    let mut r = if r != 0 {
+        //     a. If R < 2 or R > 36, return NaN.
+        if !(2..=36).contains(&r) {
+            return Ok(JsValue::nan());
         }
 
-        // 11. If S contains a code unit that is not a radix-R digit, let end be the index within S of the
-        //     first such code unit; otherwise, let end be the length of S.
-        let end = char::decode_utf16(var_s.iter().copied())
-            .position(|code| !code.is_ok_and(|c| c.is_digit(var_r as u32)))
-            .unwrap_or(var_s.len());
+        //     b. If R ≠ 16, set stripPrefix to false.
+        if r != 16 {
+            strip_prefix = false;
+        }
+        r as u8
+    } else {
+        // 9. Else,
+        //     a. Set R to 10.
+        10
+    };
+
+    // 10. If stripPrefix is true, then
+    //     a. If the length of S is at least 2 and the first two code units of S are either "0x" or "0X", then
+    //         i. Remove the first two code units from S.
+    //         ii. Set R to 16.
+    if strip_prefix
+        && s.len() >= 2
+        && (s.starts_with(js_str!("0x")) || s.starts_with(js_str!("0X")))
+    {
+        s = s
+            .get(2..)
+            .expect("already checked that it contains at least two chars");
+
+        r = 16;
+    }
 
-        // 12. Let Z be the substring of S from 0 to end.
-        let var_z = String::from_utf16_lossy(&var_s[..end]);
+    // 11. If S contains a code unit that is not a radix-R digit, let end be the index within S of the
+    //     first such code unit; otherwise, let end be the length of S.
+    let end = char::decode_utf16(s.iter())
+        .position(|code| !code.is_ok_and(|c| c.is_digit(u32::from(r))))
+        .unwrap_or(s.len());
 
-        // 13. If Z is empty, return NaN.
-        if var_z.is_empty() {
-            return Ok(JsValue::nan());
-        }
+    // 12. Let Z be the substring of S from 0 to end.
+    let z = s.get(..end).expect("should be in range");
 
-        // 14. Let mathInt be the integer value that is represented by Z in radix-R notation, using the
-        //     letters A-Z and a-z for digits with values 10 through 35. (However, if R is 10 and Z contains
-        //     more than 20 significant digits, every significant digit after the 20th may be replaced by a
-        //     0 digit, at the option of the implementation; and if R is not 2, 4, 8, 10, 16, or 32, then
-        //     mathInt may be an implementation-approximated value representing the integer value that is
-        //     represented by Z in radix-R notation.)
-        let math_int = u64::from_str_radix(&var_z, var_r as u32).map_or_else(
-            |_| f64::from_str_radix(&var_z, var_r as u32).expect("invalid_float_conversion"),
-            |i| i as f64,
-        );
-
-        // 15. If mathInt = 0, then
-        //     a. If sign = -1, return -0𝔽.
-        //     b. Return +0𝔽.
-        if math_int == 0_f64 {
-            if sign == -1 {
-                return Ok(JsValue::new(-0_f64));
-            }
+    // 13. If Z is empty, return NaN.
+    if z.is_empty() {
+        return Ok(JsValue::nan());
+    }
 
-            return Ok(JsValue::new(0_f64));
+    // 14. Let mathInt be the integer value that is represented by Z in radix-R notation, using the
+    //     letters A-Z and a-z for digits with values 10 through 35. (However, if R is 10 and Z contains
+    //     more than 20 significant digits, every significant digit after the 20th may be replaced by a
+    //     0 digit, at the option of the implementation; and if R is not 2, 4, 8, 10, 16, or 32, then
+    //     mathInt may be an implementation-approximated value representing the integer value that is
+    //     represented by Z in radix-R notation.)
+    let math_int = from_js_str_radix(z, r).expect("Already checked");
+
+    // 15. If mathInt = 0, then
+    //     a. If sign = -1, return -0𝔽.
+    //     b. Return +0𝔽.
+    if math_int == 0_f64 {
+        if sign == -1 {
+            return Ok(JsValue::new(-0_f64));
         }
 
-        // 16. Return 𝔽(sign × mathInt).
-        Ok(JsValue::new(f64::from(sign) * math_int))
-    } else {
-        // Not enough arguments to parseInt.
-        Ok(JsValue::nan())
+        return Ok(JsValue::new(0_f64));
     }
+
+    // 16. Return 𝔽(sign × mathInt).
+    Ok(JsValue::new(f64::from(sign) * math_int))
 }
 
 pub(crate) struct ParseInt;
diff --git a/core/engine/src/bytecompiler/declarations.rs b/core/engine/src/bytecompiler/declarations.rs
index 11de1994ed8..343af1af936 100644
--- a/core/engine/src/bytecompiler/declarations.rs
+++ b/core/engine/src/bytecompiler/declarations.rs
@@ -44,12 +44,7 @@ impl ByteCompiler<'_> {
 
         // 3. For each element name of lexNames, do
         for name in lex_names {
-            let name = self
-                .context
-                .interner()
-                .resolve_expect(name.sym())
-                .utf16()
-                .into();
+            let name = name.to_js_string(self.interner());
 
             // Note: Our implementation differs from the spec here.
             // a. If env.HasVarDeclaration(name) is true, throw a SyntaxError exception.
@@ -73,12 +68,7 @@ impl ByteCompiler<'_> {
 
         // 4. For each element name of varNames, do
         for name in var_names {
-            let name = self
-                .context
-                .interner()
-                .resolve_expect(name.sym())
-                .utf16()
-                .into();
+            let name = name.to_js_string(self.interner());
 
             // a. If env.HasLexicalDeclaration(name) is true, throw a SyntaxError exception.
             if env.has_lex_binding(&name) {
diff --git a/core/engine/src/bytecompiler/expression/mod.rs b/core/engine/src/bytecompiler/expression/mod.rs
index 27a226874f1..fb229b41c60 100644
--- a/core/engine/src/bytecompiler/expression/mod.rs
+++ b/core/engine/src/bytecompiler/expression/mod.rs
@@ -4,7 +4,7 @@ mod object_literal;
 mod unary;
 mod update;
 
-use super::{Access, Callable, NodeKind, Operand};
+use super::{Access, Callable, NodeKind, Operand, ToJsString};
 use crate::{
     bytecompiler::{ByteCompiler, Literal},
     vm::{GeneratorResumeKind, Opcode},
@@ -22,9 +22,9 @@ use boa_ast::{
 impl ByteCompiler<'_> {
     fn compile_literal(&mut self, lit: &AstLiteral, use_expr: bool) {
         match lit {
-            AstLiteral::String(v) => self.emit_push_literal(Literal::String(
-                self.interner().resolve_expect(*v).into_common(false),
-            )),
+            AstLiteral::String(v) => {
+                self.emit_push_literal(Literal::String(v.to_js_string(self.interner())));
+            }
             AstLiteral::Int(v) => self.emit_push_integer(*v),
             AstLiteral::Num(v) => self.emit_push_rational(*v),
             AstLiteral::BigInt(v) => {
@@ -58,9 +58,9 @@ impl ByteCompiler<'_> {
     fn compile_template_literal(&mut self, template_literal: &TemplateLiteral, use_expr: bool) {
         for element in template_literal.elements() {
             match element {
-                TemplateElement::String(s) => self.emit_push_literal(Literal::String(
-                    self.interner().resolve_expect(*s).into_common(false),
-                )),
+                TemplateElement::String(s) => {
+                    self.emit_push_literal(Literal::String(s.to_js_string(self.interner())));
+                }
                 TemplateElement::Expr(expr) => {
                     self.compile_expr(expr, true);
                 }
@@ -268,14 +268,12 @@ impl ByteCompiler<'_> {
                 for (cooked, raw) in template.cookeds().iter().zip(template.raws()) {
                     if let Some(cooked) = cooked {
                         self.emit_push_literal(Literal::String(
-                            self.interner().resolve_expect(*cooked).into_common(false),
+                            cooked.to_js_string(self.interner()),
                         ));
                     } else {
                         self.emit_opcode(Opcode::PushUndefined);
                     }
-                    self.emit_push_literal(Literal::String(
-                        self.interner().resolve_expect(*raw).into_common(false),
-                    ));
+                    self.emit_push_literal(Literal::String(raw.to_js_string(self.interner())));
                 }
 
                 self.emit(
diff --git a/core/engine/src/bytecompiler/mod.rs b/core/engine/src/bytecompiler/mod.rs
index 13b8867882f..7bcbd77dec9 100644
--- a/core/engine/src/bytecompiler/mod.rs
+++ b/core/engine/src/bytecompiler/mod.rs
@@ -21,7 +21,7 @@ use crate::{
         BindingOpcode, CodeBlock, CodeBlockFlags, Constant, GeneratorResumeKind, Handler,
         InlineCache, Opcode, VaryingOperandKind,
     },
-    Context, JsBigInt, JsString,
+    Context, JsBigInt, JsStr, JsString,
 };
 use boa_ast::{
     declaration::{Binding, LexicalDeclaration, VarDeclaration},
@@ -52,7 +52,15 @@ pub(crate) trait ToJsString {
 
 impl ToJsString for Sym {
     fn to_js_string(&self, interner: &Interner) -> JsString {
-        js_string!(interner.resolve_expect(*self).utf16())
+        // TODO: Identify latin1 encodeable strings during parsing to avoid this check.
+        let string = interner.resolve_expect(*self).utf16();
+        for c in string {
+            if u8::try_from(*c).is_err() {
+                return js_string!(string);
+            }
+        }
+        let string = string.iter().map(|c| *c as u8).collect::<Vec<_>>();
+        js_string!(JsStr::latin1(&string))
     }
 }
 
@@ -391,9 +399,9 @@ impl<'ctx> ByteCompiler<'ctx> {
             return *index;
         }
 
-        let string = self.interner().resolve_expect(name.sym()).utf16();
         let index = self.constants.len() as u32;
-        self.constants.push(Constant::String(js_string!(string)));
+        let string = name.to_js_string(self.interner());
+        self.constants.push(Constant::String(string));
         self.names_map.insert(name, index);
         index
     }
@@ -734,7 +742,7 @@ impl<'ctx> ByteCompiler<'ctx> {
     }
 
     fn resolve_identifier_expect(&self, identifier: Identifier) -> JsString {
-        js_string!(self.interner().resolve_expect(identifier.sym()).utf16())
+        identifier.to_js_string(self.interner())
     }
 
     fn access_get(&mut self, access: Access<'_>, use_expr: bool) {
diff --git a/core/engine/src/object/property_map.rs b/core/engine/src/object/property_map.rs
index a6b00215081..a480653df23 100644
--- a/core/engine/src/object/property_map.rs
+++ b/core/engine/src/object/property_map.rs
@@ -538,7 +538,6 @@ impl PropertyMap {
         self.shape = self.shape.insert_property_transition(transition_key);
 
         // Make Sure that if we are inserting, it has the correct slot index.
-        // dbg!(key);
         debug_assert_eq!(
             self.shape.lookup(key),
             Some(Slot {
diff --git a/core/engine/src/optimizer/pass/constant_folding.rs b/core/engine/src/optimizer/pass/constant_folding.rs
index 21cbc787612..c1725ddfa38 100644
--- a/core/engine/src/optimizer/pass/constant_folding.rs
+++ b/core/engine/src/optimizer/pass/constant_folding.rs
@@ -1,5 +1,6 @@
 use crate::{
-    builtins::Number, optimizer::PassAction, value::Numeric, Context, JsBigInt, JsString, JsValue,
+    builtins::Number, bytecompiler::ToJsString, optimizer::PassAction, value::Numeric, Context,
+    JsBigInt, JsValue,
 };
 use boa_ast::{
     expression::{
@@ -16,9 +17,7 @@ use boa_interner::JStrRef;
 
 fn literal_to_js_value(literal: &Literal, context: &mut Context) -> JsValue {
     match literal {
-        Literal::String(v) => JsValue::new(JsString::from(
-            context.interner().resolve_expect(*v).utf16(),
-        )),
+        Literal::String(v) => JsValue::new(v.to_js_string(context.interner())),
         Literal::Num(v) => JsValue::new(*v),
         Literal::Int(v) => JsValue::new(*v),
         Literal::BigInt(v) => JsValue::new(JsBigInt::new(v.clone())),
diff --git a/core/engine/src/string/mod.rs b/core/engine/src/string/mod.rs
index 71d6c0e7c73..de658250eeb 100644
--- a/core/engine/src/string/mod.rs
+++ b/core/engine/src/string/mod.rs
@@ -1262,29 +1262,6 @@ mod tests {
         assert_eq!(&y, s.trim_start());
     }
 
-    #[test]
-    fn hash_js_str_u8_and_u16() {
-        const STR_U8: &[u8] = b"Hello";
-        const STR_U16: &[u16] = utf16!("Hello");
-
-        let str_u8_hash = hash_value(&STR_U8);
-        let str_u16_hash = hash_value(&STR_U16);
-
-        assert_ne!(str_u8_hash, str_u16_hash);
-
-        let js_str_u8 = JsStr::utf16(STR_U16);
-        let js_str_u16 = JsStr::latin1(STR_U8);
-
-        assert_eq!(*STR_U16, js_str_u8);
-        assert_eq!(js_str_u8, js_str_u16);
-
-        let js_str_u8_hash = hash_value(&js_str_u8);
-        let js_str_u16_hash = hash_value(&js_str_u16);
-
-        // TODO: assert_eq!(js_str_u16_hash, str_u16_hash);
-        assert_eq!(js_str_u8_hash, js_str_u16_hash);
-    }
-
     #[test]
     fn conversion_to_known_static_js_string() {
         const JS_STR_U8: &JsStr<'_> = &js_str!("length");
diff --git a/core/engine/src/string/str.rs b/core/engine/src/string/str.rs
index 20604e58e8e..6ce3c4cabd4 100644
--- a/core/engine/src/string/str.rs
+++ b/core/engine/src/string/str.rs
@@ -187,11 +187,11 @@ impl<'a> JsStr<'a> {
     /// Returns an element or subslice depending on the type of index, otherwise [`None`].
     #[inline]
     #[must_use]
-    pub fn get<I>(&'a self, index: I) -> Option<I::Value>
+    pub fn get<I>(self, index: I) -> Option<I::Value>
     where
         I: JsSliceIndex<'a>,
     {
-        I::get(*self, index)
+        I::get(self, index)
     }
 
     /// Convert the [`JsStr`] into a [`Vec<U16>`].
@@ -201,22 +201,36 @@ impl<'a> JsStr<'a> {
             JsStrVariant::Utf16(v) => v.to_vec(),
         }
     }
+
+    /// Returns true if needle is a prefix of the [`JsStr`].
+    #[must_use]
+    pub fn starts_with(&self, needle: JsStr<'_>) -> bool {
+        let n = needle.len();
+        self.len() >= n && needle == self.get(..n).expect("already checked size")
+    }
+    /// Returns `true` if `needle` is a suffix of the [`JsStr`].
+    #[must_use]
+    pub fn ends_with(&self, needle: JsStr<'_>) -> bool {
+        let (m, n) = (self.len(), needle.len());
+        m >= n && needle == self.get(m - n..).expect("already checked size")
+    }
 }
 
 impl Hash for JsStr<'_> {
     fn hash<H: Hasher>(&self, state: &mut H) {
-        // https://github.com/rust-lang/rust/issues/96762
+        // NOTE: The hash function has been inlined to ensure that a hash of latin1 and U16
+        // encoded strings remains the same if they have the same characters
         match self.variant() {
             JsStrVariant::Latin1(s) => {
-                s.len().hash(state);
+                state.write_usize(s.len());
                 for elem in s {
-                    u16::from(*elem).hash(state);
+                    state.write_u16(u16::from(*elem));
                 }
             }
             JsStrVariant::Utf16(s) => {
-                s.len().hash(state);
+                state.write_usize(s.len());
                 for elem in s {
-                    elem.hash(state);
+                    state.write_u16(*elem);
                 }
             }
         }