Skip to content

Commit

Permalink
Add UTF-16 input parsing (#3538)
Browse files Browse the repository at this point in the history
* Remove unused lexer code

* Remove utf8 byte handling in lexer

* Implement generic encoding input

* Allow non UTF-8 regex parsing

* Apply review

* Apply review
  • Loading branch information
raskad authored Jan 5, 2024
1 parent b43ec0d commit 84a5e45
Show file tree
Hide file tree
Showing 79 changed files with 887 additions and 929 deletions.
4 changes: 2 additions & 2 deletions core/engine/src/builtins/eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ impl Eval {

// 2. If Type(x) is not String, return x.
// TODO: rework parser to take an iterator of `u32` unicode codepoints
let Some(x) = x.as_string().map(JsString::to_std_string_escaped) else {
let Some(x) = x.as_string() else {
return Ok(x.clone());
};

Expand All @@ -118,7 +118,7 @@ impl Eval {
// b. If script is a List of errors, throw a SyntaxError exception.
// c. If script Contains ScriptBody is false, return undefined.
// d. Let body be the ScriptBody of script.
let mut parser = Parser::new(Source::from_bytes(&x));
let mut parser = Parser::new(Source::from_utf16(x));
parser.set_identifier(context.next_parser_identifier());
if strict {
parser.set_strict();
Expand Down
5 changes: 3 additions & 2 deletions core/engine/src/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod hooks;
pub(crate) mod icu;
pub mod intrinsics;

use boa_parser::source::ReadChar;
pub use hooks::{DefaultHooks, HostHooks};

#[cfg(feature = "intl")]
Expand All @@ -14,7 +15,7 @@ use intrinsics::Intrinsics;

#[cfg(not(feature = "intl"))]
pub use std::marker::PhantomData;
use std::{cell::Cell, io::Read, path::Path, rc::Rc};
use std::{cell::Cell, path::Path, rc::Rc};

use crate::{
builtins,
Expand Down Expand Up @@ -185,7 +186,7 @@ impl Context {
/// Note that this won't run any scheduled promise jobs; you need to call [`Context::run_jobs`]
/// on the context or [`JobQueue::run_jobs`] on the provided queue to run them.
#[allow(clippy::unit_arg, dropping_copy_types)]
pub fn eval<R: Read>(&mut self, src: Source<'_, R>) -> JsResult<JsValue> {
pub fn eval<R: ReadChar>(&mut self, src: Source<'_, R>) -> JsResult<JsValue> {
let main_timer = Profiler::global().start_event("Script evaluation", "Main");

let result = Script::parse(src, None, self)?.evaluate(self);
Expand Down
4 changes: 2 additions & 2 deletions core/engine/src/module/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ mod loader;
mod namespace;
mod source;
mod synthetic;
use boa_parser::source::ReadChar;
pub use loader::*;
pub use namespace::ModuleNamespace;
use source::SourceTextModule;
Expand All @@ -33,7 +34,6 @@ pub use synthetic::{SyntheticModule, SyntheticModuleInitializer};
use std::cell::{Cell, RefCell};
use std::collections::HashSet;
use std::hash::Hash;
use std::io::Read;
use std::rc::Rc;

use rustc_hash::FxHashSet;
Expand Down Expand Up @@ -141,7 +141,7 @@ impl Module {
/// Parses the provided `src` as an ECMAScript module, returning an error if parsing fails.
///
/// [spec]: https://tc39.es/ecma262/#sec-parsemodule
pub fn parse<R: Read>(
pub fn parse<R: ReadChar>(
src: Source<'_, R>,
realm: Option<Realm>,
context: &mut Context,
Expand Down
6 changes: 2 additions & 4 deletions core/engine/src/script.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@
//! [spec]: https://tc39.es/ecma262/#sec-scripts
//! [script]: https://tc39.es/ecma262/#sec-script-records
use std::io::Read;

use boa_gc::{Finalize, Gc, GcRefCell, Trace};
use boa_parser::{Parser, Source};
use boa_parser::{source::ReadChar, Parser, Source};
use boa_profiler::Profiler;
use rustc_hash::FxHashMap;

Expand Down Expand Up @@ -76,7 +74,7 @@ impl Script {
/// Parses the provided `src` as an ECMAScript script, returning an error if parsing fails.
///
/// [spec]: https://tc39.es/ecma262/#sec-parse-script
pub fn parse<R: Read>(
pub fn parse<R: ReadChar>(
src: Source<'_, R>,
realm: Option<Realm>,
context: &mut Context,
Expand Down
6 changes: 3 additions & 3 deletions core/engine/src/tests/operators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ fn assignment_to_non_assignable_ctd() {
TestAction::assert_native_error(
src,
JsNativeErrorKind::Syntax,
"Invalid left-hand side in assignment at line 1, col 13",
"Invalid left-hand side in assignment at line 1, col 12",
)
}),
);
Expand Down Expand Up @@ -362,7 +362,7 @@ fn multicharacter_assignment_to_non_assignable_ctd() {
TestAction::assert_native_error(
src,
JsNativeErrorKind::Syntax,
"Invalid left-hand side in assignment at line 1, col 13",
"Invalid left-hand side in assignment at line 1, col 12",
)
}),
);
Expand Down Expand Up @@ -397,7 +397,7 @@ fn multicharacter_bitwise_assignment_to_non_assignable_ctd() {
TestAction::assert_native_error(
src,
JsNativeErrorKind::Syntax,
"Invalid left-hand side in assignment at line 1, col 13",
"Invalid left-hand side in assignment at line 1, col 12",
)
}),
);
Expand Down
10 changes: 5 additions & 5 deletions core/parser/src/lexer/comment.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
//! Boa's lexing for ECMAScript comments.
use crate::lexer::{Cursor, Error, Token, TokenKind, Tokenizer};
use crate::source::ReadChar;
use boa_ast::{Position, Span};
use boa_interner::Interner;
use boa_profiler::Profiler;
use std::io::Read;

/// Lexes a single line comment.
///
Expand All @@ -26,7 +26,7 @@ impl<R> Tokenizer<R> for SingleLineComment {
_interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("SingleLineComment", "Lexing");

Expand Down Expand Up @@ -66,15 +66,15 @@ impl<R> Tokenizer<R> for MultiLineComment {
_interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("MultiLineComment", "Lexing");

let mut new_line = false;
while let Some(ch) = cursor.next_char()? {
let tried_ch = char::try_from(ch);
match tried_ch {
Ok(c) if c == '*' && cursor.next_is(b'/')? => {
Ok(c) if c == '*' && cursor.next_if(0x2F /* / */)? => {
return Ok(Token::new(
if new_line {
TokenKind::LineTerminator
Expand Down Expand Up @@ -115,7 +115,7 @@ impl<R> Tokenizer<R> for HashbangComment {
_interner: &mut Interner,
) -> Result<Token, Error>
where
R: Read,
R: ReadChar,
{
let _timer = Profiler::global().start_event("Hashbang", "Lexing");

Expand Down
Loading

0 comments on commit 84a5e45

Please sign in to comment.