Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix line terminators in template strings #3641

Merged
merged 1 commit into from
Feb 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions core/parser/src/lexer/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,6 @@ impl<R: ReadChar> Cursor<R> {
}
}

/// Creates a new Lexer cursor with an initial position.
pub(super) fn with_position(inner: R, pos: Position) -> Self {
Self {
iter: inner,
pos,
strict: false,
module: false,
peeked: [None; 4],
}
}

/// Peeks the next n bytes, the maximum number of peeked bytes is 4 (n <= 4).
pub(super) fn peek_n(&mut self, n: u8) -> Result<&[Option<u32>; 4], Error> {
let _timer = Profiler::global().start_event("cursor::peek_n()", "Lexing");
Expand Down
165 changes: 126 additions & 39 deletions core/parser/src/lexer/template.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
//! Boa's lexing for ECMAScript template literals.

use crate::source::ReadChar;
use crate::{
lexer::{
string::{StringLiteral, UTF16CodeUnitsBuffer},
Cursor, Error, Token, TokenKind, Tokenizer,
},
source::UTF8Input,
lexer::{string::UTF16CodeUnitsBuffer, Cursor, Error, Token, TokenKind, Tokenizer},
source::ReadChar,
};
use boa_ast::{Position, Span};
use boa_interner::{Interner, Sym};
Expand All @@ -16,17 +12,30 @@ use std::io::{self, ErrorKind};
#[cfg_attr(feature = "deser", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TemplateString {
/// The template string of template literal with argument `raw` true.
/// The raw template string.
raw: Sym,
/// The start position of the template string. Used to make lexer error if `to_owned_cooked`
/// failed.
start_pos: Position,

/// The cooked template string.
cooked: Option<Sym>,
}

impl TemplateString {
/// Creates a new `TemplateString` with the given raw template ans start position.
pub const fn new(raw: Sym, start_pos: Position) -> Self {
Self { raw, start_pos }
pub fn new(raw: Sym, interner: &mut Interner) -> Self {
Self {
raw: Self::as_raw(raw, interner),
cooked: Self::as_cooked(raw, interner),
}
}

/// Returns the raw template string.
pub fn raw(self) -> Sym {
self.raw
}

/// Returns the cooked template string if it exists.
pub fn cooked(self) -> Option<Sym> {
self.cooked
}

/// Converts the raw template string into a mutable string slice.
Expand All @@ -35,8 +44,34 @@ impl TemplateString {
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub const fn as_raw(self) -> Sym {
self.raw
fn as_raw(raw: Sym, interner: &mut Interner) -> Sym {
let string = interner.resolve_expect(raw).utf16();
let mut iter = string.iter().peekable();
let mut buf: Vec<u16> = Vec::new();
loop {
match iter.next() {
Some(0x5C /* \ */) => {
buf.push_code_point(0x5C);
match iter.next() {
Some(0x0D /* <CR> */) => {
buf.push_code_point(0x0A);
}
Some(ch) => {
buf.push_code_point(u32::from(*ch));
}
None => break,
}
}
Some(0x0D /* <CR> */) => {
buf.push_code_point(0x0A);
}
Some(ch) => {
buf.push_code_point(u32::from(*ch));
}
None => break,
}
}
interner.get_or_intern(buf.as_slice())
}

/// Creates a new cooked template string. Returns a lexer error if it fails to cook the
Expand All @@ -46,39 +81,91 @@ impl TemplateString {
/// - [ECMAScript reference][spec]
///
/// [spec]: https://tc39.es/ecma262/#sec-static-semantics-templatestrings
pub fn to_owned_cooked(self, interner: &mut Interner) -> Result<Sym, Error> {
let string = interner.resolve_expect(self.raw).to_string();
let mut cursor = Cursor::with_position(UTF8Input::new(string.as_bytes()), self.start_pos);
fn as_cooked(raw: Sym, interner: &mut Interner) -> Option<Sym> {
let string = interner.resolve_expect(raw).utf16();
let mut iter = string.iter().peekable();
let mut buf: Vec<u16> = Vec::new();

loop {
let ch_start_pos = cursor.pos();
let ch = cursor.next_char()?;

match ch {
Some(0x005C /* \ */) => {
let escape_value = StringLiteral::take_escape_sequence_or_line_continuation(
&mut cursor,
ch_start_pos,
true,
true,
)?;

if let (Some(escape_value), _) = escape_value {
buf.push_code_point(escape_value);
}
match iter.next() {
Some(0x5C /* \ */) => {
let escape_value = match iter.next() {
Some(0x62 /* b */) => 0x08 /* <BS> */,
Some(0x74 /* t */) => 0x09 /* <HT> */,
Some(0x6E /* n */) => 0x0A /* <LF> */,
Some(0x76 /* v */) => 0x0B /* <VT> */,
Some(0x66 /* f */) => 0x0C /* <FF> */,
Some(0x72 /* r */) => 0x0D /* <CR> */,
Some(0x22 /* " */) => 0x22 /* " */,
Some(0x27 /* ' */) => 0x27 /* ' */,
Some(0x5C /* \ */) => 0x5C /* \ */,
Some(0x30 /* 0 */) if iter
.peek()
.filter(|ch| (0x30..=0x39 /* 0..=9 */).contains(**ch))
.is_none() => 0x00 /* NULL */,
// Hex Escape
Some(0x078 /* x */) => {
let mut s = String::with_capacity(2);
s.push(char::from_u32(u32::from(*iter.next()?))?);
s.push(char::from_u32(u32::from(*iter.next()?))?);
u16::from_str_radix(&s, 16).ok()?.into()
}
// Unicode Escape
Some(0x75 /* u */) => {
let next = *iter.next()?;
if next == 0x7B /* { */ {
let mut buffer = String::with_capacity(6);
loop {
let next = *iter.next()?;
if next == 0x7D /* } */ {
break;
}
buffer.push(char::from_u32(u32::from(next))?);
}
let cp = u32::from_str_radix(&buffer, 16).ok()?;
if cp > 0x10_FFFF {
return None;
}
cp
} else {
let mut s = String::with_capacity(4);
s.push(char::from_u32(u32::from(next))?);
s.push(char::from_u32(u32::from(*iter.next()?))?);
s.push(char::from_u32(u32::from(*iter.next()?))?);
s.push(char::from_u32(u32::from(*iter.next()?))?);
u16::from_str_radix(&s, 16).ok()?.into()
}
}
// NonOctalDecimalEscapeSequence
Some(0x38 /* 8 */ | 0x39 /* 9 */) => {
return None;
}
// LegacyOctalEscapeSequence
Some(ch) if (0x30..=0x37 /* '0'..='7' */).contains(ch) => {
return None;
}
// Line Terminator
Some(0x0A /* <LF> */ | 0x0D /* <CR> */ | 0x2028 /* <LS> */ | 0x2029 /* <PS> */) => {
continue;
}
Some(ch) => {
u32::from(*ch)
}
None => return None,
};
buf.push_code_point(escape_value);
}
Some(0x0D /* <CR> */) => {
buf.push_code_point(0x0A);
}
Some(ch) => {
// The caller guarantees that sequences '`' and '${' never appear
// LineTerminatorSequence <CR> <LF> is consumed by `cursor.next_char()` and
// returns <LF>, which matches the TV of <CR> <LF>
buf.push_code_point(ch);
buf.push_code_point(u32::from(*ch));
}
None => break,
}
}

Ok(interner.get_or_intern(&buf[..]))
Some(interner.get_or_intern(buf.as_slice()))
}
}

Expand Down Expand Up @@ -120,7 +207,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
// `
0x0060 => {
let raw_sym = interner.get_or_intern(&buf[..]);
let template_string = TemplateString::new(raw_sym, start_pos);
let template_string = TemplateString::new(raw_sym, interner);

return Ok(Token::new(
TokenKind::template_no_substitution(template_string),
Expand All @@ -130,7 +217,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
// $
0x0024 if cursor.next_if(0x7B /* { */)? => {
let raw_sym = interner.get_or_intern(&buf[..]);
let template_string = TemplateString::new(raw_sym, start_pos);
let template_string = TemplateString::new(raw_sym, interner);

return Ok(Token::new(
TokenKind::template_middle(template_string),
Expand Down
2 changes: 1 addition & 1 deletion core/parser/src/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ fn check_template_literal_simple() {

assert_eq!(
lexer.next(interner).unwrap().unwrap().kind(),
&TokenKind::template_no_substitution(TemplateString::new(sym, Position::new(1, 1)))
&TokenKind::template_no_substitution(TemplateString::new(sym, interner))
);
}

Expand Down
2 changes: 1 addition & 1 deletion core/parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ impl TokenKind {
Self::Punctuator(punc) => punc.to_string(),
Self::StringLiteral((lit, _)) => interner.resolve_expect(lit).to_string(),
Self::TemplateNoSubstitution(ts) | Self::TemplateMiddle(ts) => {
interner.resolve_expect(ts.as_raw()).to_string()
interner.resolve_expect(ts.raw()).to_string()
}
Self::RegularExpressionLiteral(body, flags) => {
format!(
Expand Down
8 changes: 4 additions & 4 deletions core/parser/src/parser/expression/left_hand_side/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ where
loop {
match token.kind() {
TokenKind::TemplateMiddle(template_string) => {
raws.push(template_string.as_raw());
cookeds.push(template_string.to_owned_cooked(interner).ok());
raws.push(template_string.raw());
cookeds.push(template_string.cooked());
exprs.push(
Expression::new(None, true, self.allow_yield, self.allow_await)
.parse(cursor, interner)?,
Expand All @@ -77,8 +77,8 @@ where
)?;
}
TokenKind::TemplateNoSubstitution(template_string) => {
raws.push(template_string.as_raw());
cookeds.push(template_string.to_owned_cooked(interner).ok());
raws.push(template_string.raw());
cookeds.push(template_string.cooked());
return Ok(TaggedTemplate::new(
self.tag,
raws.into_boxed_slice(),
Expand Down
23 changes: 14 additions & 9 deletions core/parser/src/parser/expression/primary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,12 +215,13 @@ where
Ok(node)
}
TokenKind::TemplateNoSubstitution(template_string) => {
let node = Literal::from(
template_string
.to_owned_cooked(interner)
.map_err(Error::lex)?,
)
.into();
let Some(cooked) = template_string.cooked() else {
return Err(Error::general(
"invalid escape in template literal",
tok.span().start(),
));
};
let node = Literal::from(cooked).into();
cursor.advance(interner);
Ok(node)
}
Expand Down Expand Up @@ -261,13 +262,17 @@ where
}
}
TokenKind::TemplateMiddle(template_string) => {
let Some(cooked) = template_string.cooked() else {
return Err(Error::general(
"invalid escape in template literal",
tok.span().start(),
));
};
let parser = TemplateLiteral::new(
self.allow_yield,
self.allow_await,
tok.span().start(),
template_string
.to_owned_cooked(interner)
.map_err(Error::lex)?,
cooked,
);
cursor.advance(interner);
parser.parse(cursor, interner).map(Into::into)
Expand Down
20 changes: 12 additions & 8 deletions core/parser/src/parser/expression/primary/template/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,12 @@ where
loop {
match cursor.lex_template(self.start, interner)?.kind() {
TokenKind::TemplateMiddle(template_string) => {
let cooked = template_string
.to_owned_cooked(interner)
.map_err(Error::lex)?;

let Some(cooked) = template_string.cooked() else {
return Err(Error::general(
"invalid escape in template literal",
self.start,
));
};
elements.push(TemplateElement::String(cooked));
elements.push(TemplateElement::Expr(
Expression::new(None, true, self.allow_yield, self.allow_await)
Expand All @@ -93,10 +95,12 @@ where
)?;
}
TokenKind::TemplateNoSubstitution(template_string) => {
let cooked = template_string
.to_owned_cooked(interner)
.map_err(Error::lex)?;

let Some(cooked) = template_string.cooked() else {
return Err(Error::general(
"invalid escape in template literal",
self.start,
));
};
elements.push(TemplateElement::String(cooked));
return Ok(literal::TemplateLiteral::new(elements.into()));
}
Expand Down
Loading