Skip to content

Commit

Permalink
Fix line terminators in template strings (#3641)
Browse files Browse the repository at this point in the history
  • Loading branch information
raskad authored Feb 1, 2024
1 parent 0a18dae commit 6ddc2b4
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 73 deletions.
11 changes: 0 additions & 11 deletions core/parser/src/lexer/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,6 @@ impl<R: ReadChar> Cursor<R> {
}
}

/// Creates a new Lexer cursor with an initial position.
pub(super) fn with_position(inner: R, pos: Position) -> Self {
Self {
iter: inner,
pos,
strict: false,
module: false,
peeked: [None; 4],
}
}

/// Peeks the next n bytes, the maximum number of peeked bytes is 4 (n <= 4).
pub(super) fn peek_n(&mut self, n: u8) -> Result<&[Option<u32>; 4], Error> {
let _timer = Profiler::global().start_event("cursor::peek_n()", "Lexing");
Expand Down
165 changes: 126 additions & 39 deletions core/parser/src/lexer/template.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
//! Boa's lexing for ECMAScript template literals.
use crate::source::ReadChar;
use crate::{
lexer::{
string::{StringLiteral, UTF16CodeUnitsBuffer},
Cursor, Error, Token, TokenKind, Tokenizer,
},
source::UTF8Input,
lexer::{string::UTF16CodeUnitsBuffer, Cursor, Error, Token, TokenKind, Tokenizer},
source::ReadChar,
};
use boa_ast::{Position, Span};
use boa_interner::{Interner, Sym};
Expand All @@ -16,17 +12,30 @@ use std::io::{self, ErrorKind};
#[cfg_attr(feature = "deser", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TemplateString {
/// The template string of template literal with argument `raw` true.
/// The raw template string.
raw: Sym,
/// The start position of the template string. Used to make lexer error if `to_owned_cooked`
/// failed.
start_pos: Position,

/// The cooked template string.
cooked: Option<Sym>,
}

impl TemplateString {
/// Creates a new `TemplateString` with the given raw template ans start position.
pub const fn new(raw: Sym, start_pos: Position) -> Self {
Self { raw, start_pos }
pub fn new(raw: Sym, interner: &mut Interner) -> Self {
Self {
raw: Self::as_raw(raw, interner),
cooked: Self::as_cooked(raw, interner),
}
}

/// Returns the raw template string.
pub fn raw(self) -> Sym {
self.raw
}

/// Returns the cooked template string if it exists.
pub fn cooked(self) -> Option<Sym> {
self.cooked
}

/// Converts the raw template string into a mutable string slice.
Expand All @@ -35,8 +44,34 @@ impl TemplateString {
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub const fn as_raw(self) -> Sym {
self.raw
fn as_raw(raw: Sym, interner: &mut Interner) -> Sym {
let string = interner.resolve_expect(raw).utf16();
let mut iter = string.iter().peekable();
let mut buf: Vec<u16> = Vec::new();
loop {
match iter.next() {
Some(0x5C /* \ */) => {
buf.push_code_point(0x5C);
match iter.next() {
Some(0x0D /* <CR> */) => {
buf.push_code_point(0x0A);
}
Some(ch) => {
buf.push_code_point(u32::from(*ch));
}
None => break,
}
}
Some(0x0D /* <CR> */) => {
buf.push_code_point(0x0A);
}
Some(ch) => {
buf.push_code_point(u32::from(*ch));
}
None => break,
}
}
interner.get_or_intern(buf.as_slice())
}

/// Creates a new cooked template string. Returns a lexer error if it fails to cook the
Expand All @@ -46,39 +81,91 @@ impl TemplateString {
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub fn to_owned_cooked(self, interner: &mut Interner) -> Result<Sym, Error> {
let string = interner.resolve_expect(self.raw).to_string();
let mut cursor = Cursor::with_position(UTF8Input::new(string.as_bytes()), self.start_pos);
fn as_cooked(raw: Sym, interner: &mut Interner) -> Option<Sym> {
let string = interner.resolve_expect(raw).utf16();
let mut iter = string.iter().peekable();
let mut buf: Vec<u16> = Vec::new();

loop {
let ch_start_pos = cursor.pos();
let ch = cursor.next_char()?;

match ch {
Some(0x005C /* \ */) => {
let escape_value = StringLiteral::take_escape_sequence_or_line_continuation(
&mut cursor,
ch_start_pos,
true,
true,
)?;

if let (Some(escape_value), _) = escape_value {
buf.push_code_point(escape_value);
}
match iter.next() {
Some(0x5C /* \ */) => {
let escape_value = match iter.next() {
Some(0x62 /* b */) => 0x08 /* <BS> */,
Some(0x74 /* t */) => 0x09 /* <HT> */,
Some(0x6E /* n */) => 0x0A /* <LF> */,
Some(0x76 /* v */) => 0x0B /* <VT> */,
Some(0x66 /* f */) => 0x0C /* <FF> */,
Some(0x72 /* r */) => 0x0D /* <CR> */,
Some(0x22 /* " */) => 0x22 /* " */,
Some(0x27 /* ' */) => 0x27 /* ' */,
Some(0x5C /* \ */) => 0x5C /* \ */,
Some(0x30 /* 0 */) if iter
.peek()
.filter(|ch| (0x30..=0x39 /* 0..=9 */).contains(**ch))
.is_none() => 0x00 /* NULL */,
// Hex Escape
Some(0x078 /* x */) => {
let mut s = String::with_capacity(2);
s.push(char::from_u32(u32::from(*iter.next()?))?);
s.push(char::from_u32(u32::from(*iter.next()?))?);
u16::from_str_radix(&s, 16).ok()?.into()
}
// Unicode Escape
Some(0x75 /* u */) => {
let next = *iter.next()?;
if next == 0x7B /* { */ {
let mut buffer = String::with_capacity(6);
loop {
let next = *iter.next()?;
if next == 0x7D /* } */ {
break;
}
buffer.push(char::from_u32(u32::from(next))?);
}
let cp = u32::from_str_radix(&buffer, 16).ok()?;
if cp > 0x10_FFFF {
return None;
}
cp
} else {
let mut s = String::with_capacity(4);
s.push(char::from_u32(u32::from(next))?);
s.push(char::from_u32(u32::from(*iter.next()?))?);
s.push(char::from_u32(u32::from(*iter.next()?))?);
s.push(char::from_u32(u32::from(*iter.next()?))?);
u16::from_str_radix(&s, 16).ok()?.into()
}
}
// NonOctalDecimalEscapeSequence
Some(0x38 /* 8 */ | 0x39 /* 9 */) => {
return None;
}
// LegacyOctalEscapeSequence
Some(ch) if (0x30..=0x37 /* '0'..='7' */).contains(ch) => {
return None;
}
// Line Terminator
Some(0x0A /* <LF> */ | 0x0D /* <CR> */ | 0x2028 /* <LS> */ | 0x2029 /* <PS> */) => {
continue;
}
Some(ch) => {
u32::from(*ch)
}
None => return None,
};
buf.push_code_point(escape_value);
}
Some(0x0D /* <CR> */) => {
buf.push_code_point(0x0A);
}
Some(ch) => {
// The caller guarantees that sequences '`' and '${' never appear
// LineTerminatorSequence <CR> <LF> is consumed by `cursor.next_char()` and
// returns <LF>, which matches the TV of <CR> <LF>
buf.push_code_point(ch);
buf.push_code_point(u32::from(*ch));
}
None => break,
}
}

Ok(interner.get_or_intern(&buf[..]))
Some(interner.get_or_intern(buf.as_slice()))
}
}

Expand Down Expand Up @@ -120,7 +207,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
// `
0x0060 => {
let raw_sym = interner.get_or_intern(&buf[..]);
let template_string = TemplateString::new(raw_sym, start_pos);
let template_string = TemplateString::new(raw_sym, interner);

return Ok(Token::new(
TokenKind::template_no_substitution(template_string),
Expand All @@ -130,7 +217,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
// $
0x0024 if cursor.next_if(0x7B /* { */)? => {
let raw_sym = interner.get_or_intern(&buf[..]);
let template_string = TemplateString::new(raw_sym, start_pos);
let template_string = TemplateString::new(raw_sym, interner);

return Ok(Token::new(
TokenKind::template_middle(template_string),
Expand Down
2 changes: 1 addition & 1 deletion core/parser/src/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ fn check_template_literal_simple() {

assert_eq!(
lexer.next(interner).unwrap().unwrap().kind(),
&TokenKind::template_no_substitution(TemplateString::new(sym, Position::new(1, 1)))
&TokenKind::template_no_substitution(TemplateString::new(sym, interner))
);
}

Expand Down
2 changes: 1 addition & 1 deletion core/parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ impl TokenKind {
Self::Punctuator(punc) => punc.to_string(),
Self::StringLiteral((lit, _)) => interner.resolve_expect(lit).to_string(),
Self::TemplateNoSubstitution(ts) | Self::TemplateMiddle(ts) => {
interner.resolve_expect(ts.as_raw()).to_string()
interner.resolve_expect(ts.raw()).to_string()
}
Self::RegularExpressionLiteral(body, flags) => {
format!(
Expand Down
8 changes: 4 additions & 4 deletions core/parser/src/parser/expression/left_hand_side/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ where
loop {
match token.kind() {
TokenKind::TemplateMiddle(template_string) => {
raws.push(template_string.as_raw());
cookeds.push(template_string.to_owned_cooked(interner).ok());
raws.push(template_string.raw());
cookeds.push(template_string.cooked());
exprs.push(
Expression::new(None, true, self.allow_yield, self.allow_await)
.parse(cursor, interner)?,
Expand All @@ -77,8 +77,8 @@ where
)?;
}
TokenKind::TemplateNoSubstitution(template_string) => {
raws.push(template_string.as_raw());
cookeds.push(template_string.to_owned_cooked(interner).ok());
raws.push(template_string.raw());
cookeds.push(template_string.cooked());
return Ok(TaggedTemplate::new(
self.tag,
raws.into_boxed_slice(),
Expand Down
23 changes: 14 additions & 9 deletions core/parser/src/parser/expression/primary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,12 +215,13 @@ where
Ok(node)
}
TokenKind::TemplateNoSubstitution(template_string) => {
let node = Literal::from(
template_string
.to_owned_cooked(interner)
.map_err(Error::lex)?,
)
.into();
let Some(cooked) = template_string.cooked() else {
return Err(Error::general(
"invalid escape in template literal",
tok.span().start(),
));
};
let node = Literal::from(cooked).into();
cursor.advance(interner);
Ok(node)
}
Expand Down Expand Up @@ -261,13 +262,17 @@ where
}
}
TokenKind::TemplateMiddle(template_string) => {
let Some(cooked) = template_string.cooked() else {
return Err(Error::general(
"invalid escape in template literal",
tok.span().start(),
));
};
let parser = TemplateLiteral::new(
self.allow_yield,
self.allow_await,
tok.span().start(),
template_string
.to_owned_cooked(interner)
.map_err(Error::lex)?,
cooked,
);
cursor.advance(interner);
parser.parse(cursor, interner).map(Into::into)
Expand Down
20 changes: 12 additions & 8 deletions core/parser/src/parser/expression/primary/template/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,12 @@ where
loop {
match cursor.lex_template(self.start, interner)?.kind() {
TokenKind::TemplateMiddle(template_string) => {
let cooked = template_string
.to_owned_cooked(interner)
.map_err(Error::lex)?;

let Some(cooked) = template_string.cooked() else {
return Err(Error::general(
"invalid escape in template literal",
self.start,
));
};
elements.push(TemplateElement::String(cooked));
elements.push(TemplateElement::Expr(
Expression::new(None, true, self.allow_yield, self.allow_await)
Expand All @@ -93,10 +95,12 @@ where
)?;
}
TokenKind::TemplateNoSubstitution(template_string) => {
let cooked = template_string
.to_owned_cooked(interner)
.map_err(Error::lex)?;

let Some(cooked) = template_string.cooked() else {
return Err(Error::general(
"invalid escape in template literal",
self.start,
));
};
elements.push(TemplateElement::String(cooked));
return Ok(literal::TemplateLiteral::new(elements.into()));
}
Expand Down

0 comments on commit 6ddc2b4

Please sign in to comment.