Skip to content

Commit

Permalink
Implement String.prototype.codePointAt
Browse files Browse the repository at this point in the history
Closes boa-dev#751.
  • Loading branch information
devinus committed Nov 18, 2020
1 parent 7f5868b commit 5838d85
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 12 deletions.
4 changes: 3 additions & 1 deletion boa/src/builtins/date/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,9 @@ fn date_proto_to_string() -> Result<(), Box<dyn std::error::Error>> {

assert_eq!(
Some(Value::string(
Local::now()
Local
.ymd(2020, 6, 8)
.and_hms(9, 16, 15)
.format("Wed Jul 08 2020 09:16:15 GMT%:z")
.to_string()
)),
Expand Down
70 changes: 59 additions & 11 deletions boa/src/builtins/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ pub(crate) fn code_point_at(string: RcString, position: i32) -> Option<(u32, u8,
}

fn is_leading_surrogate(value: u16) -> bool {
value >= 0xD800 && value <= 0xDBFF
(0xD800..0xDBFF).contains(&value)
}

fn is_trailing_surrogate(value: u16) -> bool {
value >= 0xDC00 && value <= 0xDFFF
(0xDC00..0xDFFF).contains(&value)
}

/// JavaScript `String` implementation.
Expand Down Expand Up @@ -84,6 +84,7 @@ impl BuiltIn for String {
.property("length", 0, attribute)
.method(Self::char_at, "charAt", 1)
.method(Self::char_code_at, "charCodeAt", 1)
.method(Self::code_point_at, "codePointAt", 1)
.method(Self::to_string, "toString", 0)
.method(Self::concat, "concat", 1)
.method(Self::repeat, "repeat", 1)
Expand Down Expand Up @@ -197,14 +198,17 @@ impl String {
.unwrap_or_else(Value::undefined)
.to_integer(context)? as i32;

// Fast path returning empty string when pos is obviously out of range
if pos < 0 || pos >= primitive_val.len() as i32 {
return Ok("".into());
}

// Calling .len() on a string would give the wrong result, as they are bytes not the number of
// unicode code points
// Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of
// bytes is an O(1) operation.
let length = primitive_val.chars().count();

// We should return an empty string is pos is out of range
if pos >= length as i32 || pos < 0 {
let length = primitive_val.chars().count() as i32;
if pos >= length {
return Ok("".into());
}

Expand All @@ -216,6 +220,46 @@ impl String {
))
}

/// `String.prototype.codePointAt( index )`
///
/// The `codePointAt()` method returns an integer between `0` and `65535` representing the UTF-16 code unit at the given index.
///
/// If no UTF-16 surrogate pair begins at the index, the code point at the index is returned.
///
/// `codePointAt()` returns `undefined` if the given index is less than `0`, or if it is equal to or greater than the `length` of the string.
///
/// More information:
/// - [ECMAScript reference][spec]
/// - [MDN documentation][mdn]
///
/// [spec]: https://tc39.es/ecma262/#sec-string.prototype.codepointat
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/codePointAt
pub(crate) fn code_point_at(
this: &Value,
args: &[Value],
context: &mut Context,
) -> Result<Value> {
// First we get it the actual string a private field stored on the object only the context has access to.
// Then we convert it into a Rust String by wrapping it in from_value
let primitive_val = this.to_string(context)?;
let pos = args
.get(0)
.cloned()
.unwrap_or_else(Value::undefined)
.to_integer(context)? as i32;

// Fast path returning undefined when pos is obviously out of range
if pos < 0 || pos >= primitive_val.len() as i32 {
return Ok(Value::undefined());
}

if let Some((code_point, _, _)) = code_point_at(primitive_val, pos) {
Ok(Value::from(code_point))
} else {
Ok(Value::undefined())
}
}

/// `String.prototype.charCodeAt( index )`
///
/// The `charCodeAt()` method returns an integer between `0` and `65535` representing the UTF-16 code unit at the given index.
Expand All @@ -238,17 +282,21 @@ impl String {
// First we get it the actual string a private field stored on the object only the context has access to.
// Then we convert it into a Rust String by wrapping it in from_value
let primitive_val = this.to_string(context)?;

// Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points
// Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation.
let length = primitive_val.chars().count();
let pos = args
.get(0)
.cloned()
.unwrap_or_else(Value::undefined)
.to_integer(context)? as i32;

if pos >= length as i32 || pos < 0 {
// Fast path returning NaN when pos is obviously out of range
if pos < 0 || pos >= primitive_val.len() as i32 {
return Ok(Value::from(NAN));
}

// Calling .len() on a string would give the wrong result, as they are bytes not the number of unicode code points
// Note that this is an O(N) operation (because UTF-8 is complex) while getting the number of bytes is an O(1) operation.
let length = primitive_val.chars().count() as i32;
if pos >= length {
return Ok(Value::from(NAN));
}

Expand Down
16 changes: 16 additions & 0 deletions boa/src/builtins/string/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,7 @@ fn last_index_non_integer_position_argument() {
#[test]
fn char_at() {
let mut context = Context::new();
assert_eq!(forward(&mut context, "'abc'.charAt(-1)"), "\"\"");
assert_eq!(forward(&mut context, "'abc'.charAt(1)"), "\"b\"");
assert_eq!(forward(&mut context, "'abc'.charAt(9)"), "\"\"");
assert_eq!(forward(&mut context, "'abc'.charAt()"), "\"a\"");
Expand All @@ -784,12 +785,27 @@ fn char_at() {
#[test]
fn char_code_at() {
let mut context = Context::new();
assert_eq!(forward(&mut context, "'abc'.charCodeAt(-1)"), "NaN");
assert_eq!(forward(&mut context, "'abc'.charCodeAt(1)"), "98");
assert_eq!(forward(&mut context, "'abc'.charCodeAt(9)"), "NaN");
assert_eq!(forward(&mut context, "'abc'.charCodeAt()"), "97");
assert_eq!(forward(&mut context, "'abc'.charCodeAt(null)"), "97");
}

#[test]
fn code_point_at() {
let mut context = Context::new();
assert_eq!(forward(&mut context, "'abc'.codePointAt(-1)"), "undefined");
assert_eq!(forward(&mut context, "'abc'.codePointAt(1)"), "98");
assert_eq!(forward(&mut context, "'abc'.codePointAt(9)"), "undefined");
assert_eq!(forward(&mut context, "'abc'.codePointAt()"), "97");
assert_eq!(forward(&mut context, "'abc'.codePointAt(null)"), "97");
assert_eq!(
forward(&mut context, "'\\uD800\\uDC00'.codePointAt(0)"),
"65536"
);
}

#[test]
fn slice() {
let mut context = Context::new();
Expand Down
4 changes: 4 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
console.log("☃★♲".codePointAt(1)); // 9733
console.log('ABC'.codePointAt(1)); // 66
console.log('\uD800\uDC00'.codePointAt(0)); // 65536
console.log('XYZ'.codePointAt(42)); // undefined

0 comments on commit 5838d85

Please sign in to comment.