Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - Implement escape and unescape #2768

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions boa_cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ repository.workspace = true
rust-version.workspace = true

[dependencies]
boa_engine = { workspace = true, features = ["deser", "console", "flowgraph", "trace"] }
boa_ast = { workspace = true, features = ["serde"]}
boa_engine = { workspace = true, features = ["deser", "console", "flowgraph", "trace", "annex-b"] }
boa_ast = { workspace = true, features = ["serde"] }
boa_parser.workspace = true
rustyline = { version = "11.0.0", features = ["derive"]}
clap = { version = "4.2.1", features = ["derive"] }
Expand Down
3 changes: 3 additions & 0 deletions boa_engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ trace = []
# Enable Boa's WHATWG console object implementation.
console = []

# Enable Boa's additional ECMAScript features for web browsers.
annex-b = []

[dependencies]
boa_interner.workspace = true
boa_gc = { workspace = true, features = [ "thinvec" ] }
Expand Down
256 changes: 256 additions & 0 deletions boa_engine/src/builtins/escape/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
//! Boa's implementation of ECMAScript's string escaping functions.
//!
//! The `escape()` function replaces all characters with escape sequences, with the exception of ASCII
//! word characters (A–Z, a–z, 0–9, _) and @*_+-./.
//!
//! The `unescape()` function replaces any escape sequence with the character that it represents.
//!
//! More information:
//! - [ECMAScript reference][spec]
//!
//! [spec]: https://tc39.es/ecma262/#sec-additional-properties-of-the-global-object

use crate::{
context::intrinsics::Intrinsics, js_string, Context, JsArgs, JsObject, JsResult, JsValue,
};

use super::{BuiltInBuilder, BuiltInObject, IntrinsicObject};

/// The `escape` function
#[derive(Debug, Clone, Copy)]
pub(crate) struct Escape;

impl IntrinsicObject for Escape {
fn init(intrinsics: &Intrinsics) {
BuiltInBuilder::with_intrinsic::<Self>(intrinsics)
.callable(escape)
.name(Self::NAME)
.length(1)
.build();
}
fn get(intrinsics: &Intrinsics) -> JsObject {
intrinsics.objects().escape().into()
}
}

impl BuiltInObject for Escape {
const NAME: &'static str = "escape";
}

/// Builtin JavaScript `escape ( string )` function.
fn escape(_: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> {
/// Returns `true` if the codepoint `cp` is part of the `unescapedSet`.
fn is_unescaped(cp: u16) -> bool {
let Ok(cp) = TryInto::<u8>::try_into(cp) else {
return false;
};

// 4. Let unescapedSet be the string-concatenation of the ASCII word characters and "@*+-./".
cp.is_ascii_alphanumeric() || [b'_', b'@', b'*', b'+', b'-', b'.', b'/'].contains(&cp)
}

// 1. Set string to ? ToString(string).
let string = args.get_or_undefined(0).to_string(context)?;

// 3. Let R be the empty String.
let mut vec = Vec::with_capacity(string.len());

// 2. Let len be the length of string.
// 5. Let k be 0.
// 6. Repeat, while k < len,
// a. Let C be the code unit at index k within string.
for &cp in &*string {
// b. If unescapedSet contains C, then
if is_unescaped(cp) {
// i. Let S be C.
vec.push(cp);
continue;
}
// c. Else,
// i. Let n be the numeric value of C.
// ii. If n < 256, then
let c = if cp < 256 {
// 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
// 2. Let S be the string-concatenation of "%" and ! StringPad(hex, 2𝔽, "0", start).
format!("%{cp:02X}")
}
// iii. Else,
else {
// 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
// 2. Let S be the string-concatenation of "%u" and ! StringPad(hex, 4𝔽, "0", start).
format!("%u{cp:04X}")
};
// d. Set R to the string-concatenation of R and S.
// e. Set k to k + 1.
vec.extend(c.encode_utf16());
}

// 7. Return R.
Ok(js_string!(vec).into())
}

/// The `unescape` function
#[derive(Debug, Clone, Copy)]
pub(crate) struct Unescape;

impl IntrinsicObject for Unescape {
fn init(intrinsics: &Intrinsics) {
BuiltInBuilder::with_intrinsic::<Self>(intrinsics)
.callable(unescape)
.name(Self::NAME)
.length(1)
.build();
}
fn get(intrinsics: &Intrinsics) -> JsObject {
intrinsics.objects().unescape().into()
}
}

impl BuiltInObject for Unescape {
const NAME: &'static str = "unescape";
}

/// Builtin JavaScript `unescape ( string )` function.
fn unescape(_: &JsValue, args: &[JsValue], context: &mut Context<'_>) -> JsResult<JsValue> {
/// Converts a char `cp` to its corresponding hex digit value.
fn to_hex_digit(cp: u16) -> Option<u16> {
char::from_u32(u32::from(cp))
.and_then(|c| c.to_digit(16))
.and_then(|d| d.try_into().ok())
}

// 1. Set string to ? ToString(string).
let string = args.get_or_undefined(0).to_string(context)?;

// 3. Let R be the empty String.
let mut vec = Vec::with_capacity(string.len());

let mut codepoints = <PeekableN<_, 6>>::new(string.iter().copied());

// 2. Let len be the length of string.
// 4. Let k be 0.
// 5. Repeat, while k < len,
loop {
// a. Let C be the code unit at index k within string.
let Some(cp) = codepoints.next() else {
break;
};

// b. If C is the code unit 0x0025 (PERCENT SIGN), then
if cp != u16::from(b'%') {
vec.push(cp);
continue;
}

// i. Let hexDigits be the empty String.
// ii. Let optionalAdvance be 0.
// TODO: Try blocks :(
let Some(unescaped_cp) = (|| match *codepoints.peek_n(5) {
// iii. If k + 5 < len and the code unit at index k + 1 within string is the code unit
// 0x0075 (LATIN SMALL LETTER U), then
[u, n1, n2, n3, n4] if u == u16::from(b'u') => {
// 1. Set hexDigits to the substring of string from k + 2 to k + 6.
// 2. Set optionalAdvance to 5.
let n1 = to_hex_digit(n1)?;
let n2 = to_hex_digit(n2)?;
let n3 = to_hex_digit(n3)?;
let n4 = to_hex_digit(n4)?;

// TODO: https://github.com/rust-lang/rust/issues/77404
for _ in 0..5 {
codepoints.next();
}

Some((n1 << 12) + (n2 << 8) + (n3 << 4) + n4)
}
// iv. Else if k + 3 ≤ len, then
[n1, n2, ..] => {
// 1. Set hexDigits to the substring of string from k + 1 to k + 3.
// 2. Set optionalAdvance to 2.
let n1 = to_hex_digit(n1)?;
let n2 = to_hex_digit(n2)?;

// TODO: https://github.com/rust-lang/rust/issues/77404
for _ in 0..2 {
codepoints.next();
}

Some((n1 << 4) + n2)
}
_ => None
})() else {
vec.push(u16::from(b'%'));
continue;
};

// v. Let parseResult be ParseText(StringToCodePoints(hexDigits), HexDigits[~Sep]).
// vi. If parseResult is a Parse Node, then
// 1. Let n be the MV of parseResult.
// 2. Set C to the code unit whose numeric value is n.
// 3. Set k to k + optionalAdvance.
// c. Set R to the string-concatenation of R and C.
// d. Set k to k + 1.
vec.push(unescaped_cp);
}
// 6. Return R.
Ok(js_string!(vec).into())
}

/// An iterator that can peek `N` items.
struct PeekableN<I, const N: usize>
where
I: Iterator,
{
iterator: I,
buffer: [I::Item; N],
buffered_end: usize,
}

impl<I, const N: usize> PeekableN<I, N>
where
I: Iterator,
I::Item: Default + Copy,
{
/// Creates a new `PeekableN`.
fn new(iterator: I) -> Self {
Self {
iterator,
buffer: [I::Item::default(); N],
buffered_end: 0,
}
}

/// Peeks `n` items from the iterator.
fn peek_n(&mut self, count: usize) -> &[I::Item] {
if count <= self.buffered_end {
return &self.buffer[..count];
}
for _ in 0..(count - self.buffered_end) {
let Some(next) = self.iterator.next() else {
return &self.buffer[..self.buffered_end];
};
self.buffer[self.buffered_end] = next;
self.buffered_end += 1;
}

&self.buffer[..count]
}
}

impl<I, const N: usize> Iterator for PeekableN<I, N>
where
I: Iterator,
I::Item: Copy,
{
type Item = I::Item;

fn next(&mut self) -> Option<Self::Item> {
if self.buffered_end > 0 {
let item = self.buffer[0];
self.buffer.rotate_left(1);
self.buffered_end -= 1;
return Some(item);
}
self.iterator.next()
}
}
16 changes: 16 additions & 0 deletions boa_engine/src/builtins/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ pub mod weak;
pub mod weak_map;
pub mod weak_set;

#[cfg(feature = "annex-b")]
pub mod escape;

#[cfg(feature = "intl")]
pub mod intl;

Expand Down Expand Up @@ -253,6 +256,13 @@ impl Intrinsics {
WeakRef::init(&intrinsics);
WeakMap::init(&intrinsics);
WeakSet::init(&intrinsics);

#[cfg(feature = "annex-b")]
{
escape::Escape::init(&intrinsics);
escape::Unescape::init(&intrinsics);
}

#[cfg(feature = "intl")]
{
intl::Intl::init(&intrinsics);
Expand Down Expand Up @@ -354,6 +364,12 @@ pub(crate) fn set_default_global_bindings(context: &mut Context<'_>) -> JsResult
global_binding::<WeakMap>(context)?;
global_binding::<WeakSet>(context)?;

#[cfg(feature = "annex-b")]
{
global_binding::<escape::Escape>(context)?;
global_binding::<escape::Unescape>(context)?;
}

#[cfg(feature = "intl")]
global_binding::<intl::Intl>(context)?;

Expand Down
28 changes: 28 additions & 0 deletions boa_engine/src/context/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,14 @@ pub struct IntrinsicObjects {
/// [`%parseInt%`](https://tc39.es/ecma262/#sec-parseint-string-radix)
parse_int: JsFunction,

/// [`%escape%`](https://tc39.es/ecma262/#sec-escape-string)
#[cfg(feature = "annex-b")]
escape: JsFunction,
jedel1043 marked this conversation as resolved.
Show resolved Hide resolved

/// [`%unescape%`](https://tc39.es/ecma262/#sec-unescape-string)
#[cfg(feature = "annex-b")]
unescape: JsFunction,

/// [`%Intl%`](https://tc39.es/ecma402/#intl-object)
#[cfg(feature = "intl")]
intl: JsObject,
Expand All @@ -786,6 +794,10 @@ impl Default for IntrinsicObjects {
is_nan: JsFunction::from_object_unchecked(JsObject::default()),
parse_float: JsFunction::from_object_unchecked(JsObject::default()),
parse_int: JsFunction::from_object_unchecked(JsObject::default()),
#[cfg(feature = "annex-b")]
escape: JsFunction::from_object_unchecked(JsObject::default()),
#[cfg(feature = "annex-b")]
unescape: JsFunction::from_object_unchecked(JsObject::default()),
#[cfg(feature = "intl")]
intl: JsObject::default(),
}
Expand Down Expand Up @@ -892,6 +904,22 @@ impl IntrinsicObjects {
self.parse_int.clone()
}

/// Gets the [`%escape%`][spec] intrinsic function.
///
/// [spec]: https://tc39.es/ecma262/#sec-escape-string
#[cfg(feature = "annex-b")]
pub fn escape(&self) -> JsFunction {
self.escape.clone()
}

/// Gets the [`%unescape%`][spec] intrinsic function.
///
/// [spec]: https://tc39.es/ecma262/#sec-unescape-string
#[cfg(feature = "annex-b")]
pub fn unescape(&self) -> JsFunction {
self.unescape.clone()
}

/// Gets the [`%Intl%`][spec] intrinsic object.
///
/// [spec]: https://tc39.es/ecma402/#intl-object
Expand Down
2 changes: 1 addition & 1 deletion boa_tester/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repository.workspace = true
rust-version.workspace = true

[dependencies]
boa_engine.workspace = true
boa_engine = { workspace = true, features = ["annex-b"] }
boa_gc.workspace = true
clap = { version = "4.2.1", features = ["derive"] }
serde = { version = "1.0.159", features = ["derive"] }
Expand Down
2 changes: 1 addition & 1 deletion boa_tester/src/edition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ impl SpecEdition {
pub(crate) fn from_test_metadata(metadata: &MetaData) -> Result<Self, Vec<&str>> {
let mut min_edition = if metadata.flags.contains(&TestFlag::Async) {
Self::ES8
} else if metadata.es6id.is_some() || metadata.flags.contains(&TestFlag::Module) {
} else if metadata.flags.contains(&TestFlag::Module) {
Self::ES6
} else {
Self::ES5
Expand Down
2 changes: 1 addition & 1 deletion boa_wasm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ repository.workspace = true
rust-version.workspace = true

[dependencies]
boa_engine = { workspace = true, features = ["console"] }
boa_engine = { workspace = true, features = ["console", "annex-b"] }
wasm-bindgen = "0.2.84"
getrandom = { version = "0.2.8", features = ["js"] }

Expand Down