From bded0ae580918b901eda7c65186486fc244f0a50 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Sun, 17 Jan 2021 17:29:47 -0800
Subject: [PATCH 01/23] Refactor StringLiteral

---
 boa/src/syntax/lexer/string.rs   | 346 ++++++++++++++++++-------------
 boa/src/syntax/lexer/template.rs |   6 +-
 boa/src/syntax/lexer/tests.rs    |   6 +-
 3 files changed, 203 insertions(+), 155 deletions(-)
diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 6f20599811e..52d1b8c59cf 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -58,171 +58,219 @@ impl<R> Tokenizer<R> for StringLiteral {
         let _timer = BoaProfiler::global().start_event("StringLiteral", "Lexing");
 
         let (lit, span) =
-            unescape_string(cursor, start_pos, self.terminator, cursor.strict_mode())?;
+            Self::unescape_string(cursor, start_pos, self.terminator, cursor.strict_mode())?;
 
         Ok(Token::new(TokenKind::string_literal(lit), span))
     }
 }
 
-pub(super) fn unescape_string<R>(
-    cursor: &mut Cursor<R>,
-    start_pos: Position,
-    terminator: StringTerminator,
-    strict_mode: bool,
-) -> Result<(String, Span), Error>
-where
-    R: Read,
-{
-    let mut buf = Vec::new();
-    loop {
-        let next_chr = cursor.next_char()?.map(char::try_from).transpose().unwrap();
-
-        match next_chr {
-            Some('\'') if terminator == StringTerminator::SingleQuote => {
-                break;
-            }
-            Some('"') if terminator == StringTerminator::DoubleQuote => {
-                break;
-            }
-            Some('\\') => {
-                let _timer =
-                    BoaProfiler::global().start_event("StringLiteral - escape sequence", "Lexing");
+impl StringLiteral {
+    pub(super) fn unescape_string<R>(
+        cursor: &mut Cursor<R>,
+        start_pos: Position,
+        terminator: StringTerminator,
+        strict_mode: bool,
+    ) -> Result<(String, Span), Error>
+    where
+        R: Read,
+    {
+        let mut buf = Vec::new();
+        loop {
+            let next_chr = cursor.next_char()?.map(char::try_from).transpose().unwrap();
 
-                let escape = cursor.peek()?.ok_or_else(|| {
-                    Error::from(io::Error::new(
-                        ErrorKind::UnexpectedEof,
-                        "unterminated escape sequence in literal",
-                    ))
-                })?;
-
-                if escape <= 0x7f {
-                    let _ = cursor.next_byte()?;
-                    match escape {
-                        b'\n' => (),
-                        b'n' => buf.push('\n' as u16),
-                        b'r' => buf.push('\r' as u16),
-                        b't' => buf.push('\t' as u16),
-                        b'b' => buf.push('\x08' as u16),
-                        b'f' => buf.push('\x0c' as u16),
-                        b'0' => buf.push('\0' as u16),
-                        b'x' => {
-                            let mut code_point_utf8_bytes = [0u8; 2];
-                            cursor.fill_bytes(&mut code_point_utf8_bytes)?;
-                            let code_point_str = str::from_utf8(&code_point_utf8_bytes)
-                                .expect("malformed Hexadecimal character escape sequence");
-                            let code_point =
-                                u16::from_str_radix(&code_point_str, 16).map_err(|_| {
-                                    Error::syntax(
-                                        "invalid Hexadecimal escape sequence",
-                                        cursor.pos(),
-                                    )
-                                })?;
-
-                            buf.push(code_point);
-                        }
-                        b'u' => {
-                            // Support \u{X..X} (Unicode Codepoint)
-                            if cursor.next_is(b'{')? {
-                                // TODO: use bytes for a bit better performance (using stack)
-                                let mut code_point_buf = Vec::with_capacity(6);
-                                cursor.take_until(b'}', &mut code_point_buf)?;
-
-                                let code_point_str =
-                                    unsafe { str::from_utf8_unchecked(code_point_buf.as_slice()) };
-                                // We know this is a single unicode codepoint, convert to u32
-                                let code_point =
-                                    u32::from_str_radix(&code_point_str, 16).map_err(|_| {
-                                        Error::syntax(
-                                            "malformed Unicode character escape sequence",
-                                            cursor.pos(),
-                                        )
-                                    })?;
-
-                                // UTF16Encoding of a numeric code point value
-                                if code_point > 0x10_FFFF {
-                                    return Err(Error::syntax("Unicode codepoint must not be greater than 0x10FFFF in escape sequence", cursor.pos()));
-                                } else if code_point <= 65535 {
-                                    buf.push(code_point as u16);
-                                } else {
-                                    let cu1 = ((code_point - 65536) / 1024 + 0xD800) as u16;
-                                    let cu2 = ((code_point - 65536) % 1024 + 0xDC00) as u16;
-                                    buf.push(cu1);
-                                    buf.push(cu2);
-                                }
-                            } else {
-                                // Collect each character after \u e.g \uD83D will give "D83D"
-                                let mut code_point_utf8_bytes = [0u8; 4];
-                                cursor.fill_bytes(&mut code_point_utf8_bytes)?;
-
-                                // Convert to u16
-                                let code_point_str = str::from_utf8(&code_point_utf8_bytes)
-                                    .expect("malformed Unicode character escape sequence");
-                                let code_point =
-                                    u16::from_str_radix(code_point_str, 16).map_err(|_| {
-                                        Error::syntax(
-                                            "invalid Unicode escape sequence",
-                                            cursor.pos(),
-                                        )
-                                    })?;
-
-                                buf.push(code_point);
+            match next_chr {
+                Some('\'') if terminator == StringTerminator::SingleQuote => {
+                    break;
+                }
+                Some('"') if terminator == StringTerminator::DoubleQuote => {
+                    break;
+                }
+                Some('\\') => {
+                    let _timer = BoaProfiler::global()
+                        .start_event("StringLiteral - escape sequence", "Lexing");
+
+                    let escape = cursor.peek()?.ok_or_else(|| {
+                        Error::from(io::Error::new(
+                            ErrorKind::UnexpectedEof,
+                            "unterminated escape sequence in literal",
+                        ))
+                    })?;
+
+                    if escape <= 0x7f {
+                        let _ = cursor.next_byte()?;
+                        match escape {
+                            b'\n' => (),
+                            b'n' => buf.push('\n' as u16),
+                            b'r' => buf.push('\r' as u16),
+                            b't' => buf.push('\t' as u16),
+                            b'b' => buf.push('\x08' as u16),
+                            b'f' => buf.push('\x0c' as u16),
+                            b'0' => buf.push('\0' as u16),
+                            b'x' => {
+                                Self::hex_escape_sequence(cursor, Some(&mut buf))?;
                             }
-                        }
-                        n if char::is_digit(char::from(n), 8) => {
-                            if strict_mode {
-                                return Err(Error::syntax(
-                                    "octal escape sequences are deprecated",
-                                    cursor.pos(),
-                                ));
+                            b'u' => {
+                                Self::unicode_escape_sequence(cursor, Some(&mut buf))?;
                             }
-                            let mut o = char::from(n).to_digit(8).unwrap();
-
-                            match cursor.peek()? {
-                                Some(c) if char::is_digit(char::from(c), 8) => {
-                                    let _ = cursor.next_byte()?;
-                                    o = o * 8 + char::from(n).to_digit(8).unwrap();
-                                    if n <= b'3' {
-                                        match cursor.peek()? {
-                                            Some(c) if char::is_digit(char::from(c), 8) => {
-                                                let _ = cursor.next_byte();
-                                                o = o * 8 + char::from(n).to_digit(8).unwrap();
-                                            }
-                                            _ => (),
-                                        }
-                                    }
-                                }
-                                _ => (),
+                            n if char::is_digit(char::from(n), 8) => {
+                                Self::legacy_octal_escape_sequence(
+                                    cursor,
+                                    Some(&mut buf),
+                                    strict_mode,
+                                    n,
+                                )?;
                             }
-                            buf.push(o as u16);
-                        }
-                        _ => buf.push(escape as u16),
-                    };
+                            _ => buf.push(escape as u16),
+                        };
+                    }
                 }
-            }
-            Some(next_ch) => {
-                if next_ch.len_utf16() == 1 {
-                    buf.push(next_ch as u16);
-                } else {
-                    let mut code_point_bytes_buf = [0u16; 2];
-                    let code_point_bytes = next_ch.encode_utf16(&mut code_point_bytes_buf);
+                Some(next_ch) => {
+                    if next_ch.len_utf16() == 1 {
+                        buf.push(next_ch as u16);
+                    } else {
+                        let mut code_point_bytes_buf = [0u16; 2];
+                        let code_point_bytes = next_ch.encode_utf16(&mut code_point_bytes_buf);
 
-                    buf.extend(code_point_bytes.iter());
+                        buf.extend(code_point_bytes.iter());
+                    }
+                }
+                None if terminator != StringTerminator::End => {
+                    return Err(Error::from(io::Error::new(
+                        ErrorKind::UnexpectedEof,
+                        "unterminated string literal",
+                    )));
+                }
+                None => {
+                    break;
                 }
             }
-            None if terminator != StringTerminator::End => {
-                return Err(Error::from(io::Error::new(
-                    ErrorKind::UnexpectedEof,
-                    "unterminated string literal",
-                )));
+        }
+
+        Ok((
+            String::from_utf16_lossy(buf.as_slice()),
+            Span::new(start_pos, cursor.pos()),
+        ))
+    }
+
+    #[inline]
+    pub(super) fn unicode_escape_sequence<R>(
+        cursor: &mut Cursor<R>,
+        code_units_buf: Option<&mut Vec<u16>>,
+    ) -> Result<u32, Error>
+    where
+        R: Read,
+    {
+        // Support \u{X..X} (Unicode CodePoint)
+        if cursor.next_is(b'{')? {
+            // TODO: use bytes for a bit better performance (using stack)
+            let mut code_point_buf = Vec::with_capacity(6);
+            cursor.take_until(b'}', &mut code_point_buf)?;
+
+            let code_point_str = unsafe { str::from_utf8_unchecked(code_point_buf.as_slice()) };
+            // We know this is a single unicode codepoint, convert to u32
+            let code_point = u32::from_str_radix(&code_point_str, 16).map_err(|_| {
+                Error::syntax("malformed Unicode character escape sequence", cursor.pos())
+            })?;
+
+            // UTF16Encoding of a numeric code point value
+            if code_point > 0x10_FFFF {
+                return Err(Error::syntax(
+                    "Unicode codepoint must not be greater than 0x10FFFF in escape sequence",
+                    cursor.pos(),
+                ));
+            } else if let Some(code_units_buf) = code_units_buf {
+                if code_point <= 65535 {
+                    code_units_buf.push(code_point as u16);
+                } else {
+                    let cu1 = ((code_point - 65536) / 1024 + 0xD800) as u16;
+                    let cu2 = ((code_point - 65536) % 1024 + 0xDC00) as u16;
+                    code_units_buf.push(cu1);
+                    code_units_buf.push(cu2);
+                }
             }
-            None => {
-                break;
+
+            Ok(code_point)
+        } else {
+            // Hex4Digits
+            // Collect each character after \u e.g \uD83D will give "D83D"
+            let mut code_point_utf8_bytes = [0u8; 4];
+            cursor.fill_bytes(&mut code_point_utf8_bytes)?;
+
+            // Convert to u16
+            let code_point_str = str::from_utf8(&code_point_utf8_bytes)
+                .expect("malformed Unicode character escape sequence");
+            let code_point = u16::from_str_radix(code_point_str, 16)
+                .map_err(|_| Error::syntax("invalid Unicode escape sequence", cursor.pos()))?;
+
+            if let Some(code_units_buf) = code_units_buf {
+                code_units_buf.push(code_point);
             }
+
+            Ok(code_point as u32)
+        }
+    }
+
+    #[inline]
+    fn hex_escape_sequence<R>(
+        cursor: &mut Cursor<R>,
+        code_units_buf: Option<&mut Vec<u16>>,
+    ) -> Result<u32, Error>
+    where
+        R: Read,
+    {
+        let mut code_point_utf8_bytes = [0u8; 2];
+        cursor.fill_bytes(&mut code_point_utf8_bytes)?;
+        let code_point_str = str::from_utf8(&code_point_utf8_bytes)
+            .expect("malformed Hexadecimal character escape sequence");
+        let code_point = u16::from_str_radix(&code_point_str, 16)
+            .map_err(|_| Error::syntax("invalid Hexadecimal escape sequence", cursor.pos()))?;
+
+        if let Some(code_units_buf) = code_units_buf {
+            code_units_buf.push(code_point);
         }
+
+        Ok(code_point as u32)
     }
 
-    Ok((
-        String::from_utf16_lossy(buf.as_slice()),
-        Span::new(start_pos, cursor.pos()),
-    ))
+    #[inline]
+    fn legacy_octal_escape_sequence<R>(
+        cursor: &mut Cursor<R>,
+        code_units_buf: Option<&mut Vec<u16>>,
+        strict_mode: bool,
+        init: u8,
+    ) -> Result<u32, Error>
+    where
+        R: Read,
+    {
+        if strict_mode {
+            return Err(Error::syntax(
+                "octal escape sequences are deprecated",
+                cursor.pos(),
+            ));
+        }
+        let mut code_point = char::from(init).to_digit(8).unwrap();
+
+        match cursor.peek()? {
+            Some(c) if char::is_digit(char::from(c), 8) => {
+                let _ = cursor.next_byte()?;
+                code_point = code_point * 8 + char::from(init).to_digit(8).unwrap();
+                if init <= b'3' {
+                    match cursor.peek()? {
+                        Some(c) if char::is_digit(char::from(c), 8) => {
+                            let _ = cursor.next_byte();
+                            code_point = code_point * 8 + char::from(init).to_digit(8).unwrap();
+                        }
+                        _ => (),
+                    }
+                }
+            }
+            _ => (),
+        }
+
+        if let Some(code_units_buf) = code_units_buf {
+            code_units_buf.push(code_point as u16);
+        }
+
+        Ok(code_point)
+    }
 }
diff --git a/boa/src/syntax/lexer/template.rs b/boa/src/syntax/lexer/template.rs
index a34ba025238..23171e333a8 100644
--- a/boa/src/syntax/lexer/template.rs
+++ b/boa/src/syntax/lexer/template.rs
@@ -3,7 +3,7 @@
 use super::{Cursor, Error, Tokenizer};
 use crate::{
     profiler::BoaProfiler,
-    syntax::lexer::string::{unescape_string, StringTerminator},
+    syntax::lexer::string::{StringLiteral, StringTerminator},
     syntax::{
         ast::{Position, Span},
         lexer::{Token, TokenKind},
@@ -44,7 +44,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
             match next_chr {
                 '`' => {
                     let raw = String::from_utf16_lossy(buf.as_slice());
-                    let (cooked, _) = unescape_string(
+                    let (cooked, _) = StringLiteral::unescape_string(
                         &mut Cursor::with_position(raw.as_bytes(), start_pos),
                         start_pos,
                         StringTerminator::End,
@@ -58,7 +58,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
                 '$' if cursor.peek()? == Some(b'{') => {
                     let _ = cursor.next_byte()?;
                     let raw = String::from_utf16_lossy(buf.as_slice());
-                    let (cooked, _) = unescape_string(
+                    let (cooked, _) = StringLiteral::unescape_string(
                         &mut Cursor::with_position(raw.as_bytes(), start_pos),
                         start_pos,
                         StringTerminator::End,
diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs
index f54b8f4b338..4f3e400b401 100644
--- a/boa/src/syntax/lexer/tests.rs
+++ b/boa/src/syntax/lexer/tests.rs
@@ -6,7 +6,7 @@ use super::token::Numeric;
 use super::*;
 use super::{Error, Position};
 use crate::syntax::ast::Keyword;
-use crate::syntax::lexer::string::{unescape_string, StringTerminator};
+use crate::syntax::lexer::string::{StringLiteral, StringTerminator};
 use std::str;
 
 fn span(start: (u32, u32), end: (u32, u32)) -> Span {
@@ -864,7 +864,7 @@ fn unicode_escape_with_braces_() {
 
     let mut cursor = Cursor::new(s.as_bytes());
 
-    if let Ok((s, _)) = unescape_string(
+    if let Ok((s, _)) = StringLiteral::unescape_string(
         &mut cursor,
         Position::new(1, 1),
         StringTerminator::End,
@@ -880,7 +880,7 @@ fn unicode_escape_with_braces_() {
 fn unescape_string_with_single_escape() {
     let s = r#"\Б"#.to_string();
     let mut cursor = Cursor::new(s.as_bytes());
-    let (s, _) = unescape_string(
+    let (s, _) = StringLiteral::unescape_string(
         &mut cursor,
         Position::new(1, 1),
         StringTerminator::End,

From 54c6ffec7291a0c77519dbecba4f3c4602ba2676 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Sun, 17 Jan 2021 18:50:24 -0800
Subject: [PATCH 02/23] Fix octal escape in string literal

---
 boa/src/syntax/lexer/string.rs | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 52d1b8c59cf..7fc0e21351a 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -112,12 +112,12 @@ impl StringLiteral {
                             b'u' => {
                                 Self::unicode_escape_sequence(cursor, Some(&mut buf))?;
                             }
-                            n if char::is_digit(char::from(n), 8) => {
+                            byte if (b'0'..b'8').contains(&byte) => {
                                 Self::legacy_octal_escape_sequence(
                                     cursor,
                                     Some(&mut buf),
                                     strict_mode,
-                                    n,
+                                    byte,
                                 )?;
                             }
                             _ => buf.push(escape as u16),
@@ -237,7 +237,7 @@ impl StringLiteral {
         cursor: &mut Cursor<R>,
         code_units_buf: Option<&mut Vec<u16>>,
         strict_mode: bool,
-        init: u8,
+        init_byte: u8,
     ) -> Result<u32, Error>
     where
         R: Read,
@@ -248,23 +248,26 @@ impl StringLiteral {
                 cursor.pos(),
             ));
         }
-        let mut code_point = char::from(init).to_digit(8).unwrap();
+        // Grammar: OctalDigit
+        let mut code_point = (init_byte - b'0') as u32;
 
-        match cursor.peek()? {
-            Some(c) if char::is_digit(char::from(c), 8) => {
+        // Grammar: ZeroToThree OctalDigit
+        // Grammar: FourToSeven OctalDigit
+        if let Some(byte) = cursor.peek()? {
+            if (b'0'..b'8').contains(&byte) {
                 let _ = cursor.next_byte()?;
-                code_point = code_point * 8 + char::from(init).to_digit(8).unwrap();
-                if init <= b'3' {
-                    match cursor.peek()? {
-                        Some(c) if char::is_digit(char::from(c), 8) => {
-                            let _ = cursor.next_byte();
-                            code_point = code_point * 8 + char::from(init).to_digit(8).unwrap();
+                code_point = (code_point * 8) + (byte - b'0') as u32;
+
+                if (b'0'..b'4').contains(&init_byte) {
+                    // Grammar: ZeroToThree OctalDigit OctalDigit
+                    if let Some(byte) = cursor.peek()? {
+                        if (b'0'..b'8').contains(&byte) {
+                            let _ = cursor.next_byte()?;
+                            code_point = (code_point * 8) + (byte - b'0') as u32;
                         }
-                        _ => (),
                     }
                 }
             }
-            _ => (),
         }
 
         if let Some(code_units_buf) = code_units_buf {

From 4c9a78fa55848bb80dbd67a00e9e3e104ba0b185 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Sun, 17 Jan 2021 19:29:33 -0800
Subject: [PATCH 03/23] Add tests

---
 boa/src/syntax/lexer/tests.rs | 44 +++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs
index 4f3e400b401..dca682a011f 100644
--- a/boa/src/syntax/lexer/tests.rs
+++ b/boa/src/syntax/lexer/tests.rs
@@ -890,6 +890,50 @@ fn unescape_string_with_single_escape() {
     assert_eq!(s, "Б");
 }
 
+#[test]
+fn legacy_octal_escape() {
+    let test_cases = [
+        (r#"\3"#, "\u{3}"),
+        (r#"\03"#, "\u{3}"),
+        (r#"\003"#, "\u{3}"),
+        (r#"\0003"#, "\u{0}3"),
+        (r#"\43"#, "#"),
+        (r#"\043"#, "#"),
+        (r#"\101"#, "A"),
+    ];
+
+    for (s, expected) in test_cases.iter() {
+        let mut cursor = Cursor::new(s.as_bytes());
+        let (s, _) = StringLiteral::unescape_string(
+            &mut cursor,
+            Position::new(1, 1),
+            StringTerminator::End,
+            false,
+        )
+        .unwrap();
+
+        assert_eq!(s, *expected);
+    }
+}
+
+#[test]
+fn zero_escape() {
+    let test_cases = [(r#"\0"#, "\u{0}"), (r#"\0A"#, "\u{0}A")];
+
+    for (s, expected) in test_cases.iter() {
+        let mut cursor = Cursor::new(s.as_bytes());
+        let (s, _) = StringLiteral::unescape_string(
+            &mut cursor,
+            Position::new(1, 1),
+            StringTerminator::End,
+            false,
+        )
+        .unwrap();
+
+        assert_eq!(s, *expected);
+    }
+}
+
 mod carriage_return {
     use super::*;
 

From 06513e953108a7a0977dbd449cad1d5a56b5f700 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Sun, 17 Jan 2021 19:29:45 -0800
Subject: [PATCH 04/23] Fix zero escape

---
 boa/src/syntax/lexer/string.rs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 7fc0e21351a..489c364768c 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -105,7 +105,13 @@ impl StringLiteral {
                             b't' => buf.push('\t' as u16),
                             b'b' => buf.push('\x08' as u16),
                             b'f' => buf.push('\x0c' as u16),
-                            b'0' => buf.push('\0' as u16),
+                            b'0' if cursor
+                                .peek()?
+                                .filter(|next_byte| (*next_byte as char).is_digit(10))
+                                .is_none() =>
+                            {
+                                buf.push('\0' as u16)
+                            }
                             b'x' => {
                                 Self::hex_escape_sequence(cursor, Some(&mut buf))?;
                             }

From 0cae4439ecfb411bb16e140d013b603c1c5a7d6b Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 00:50:46 -0800
Subject: [PATCH 05/23] Fix zero escape lookahead

---
 boa/src/syntax/lexer/string.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 489c364768c..da8f73636c1 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -107,7 +107,8 @@ impl StringLiteral {
                             b'f' => buf.push('\x0c' as u16),
                             b'0' if cursor
                                 .peek()?
-                                .filter(|next_byte| (*next_byte as char).is_digit(10))
+                                .and_then(|next_byte| char::try_from(next_byte).ok())
+                                .filter(|next_ch| next_ch.is_digit(10))
                                 .is_none() =>
                             {
                                 buf.push('\0' as u16)
@@ -197,7 +198,7 @@ impl StringLiteral {
 
             Ok(code_point)
         } else {
-            // Hex4Digits
+            // Grammar: Hex4Digits
             // Collect each character after \u e.g \uD83D will give "D83D"
             let mut code_point_utf8_bytes = [0u8; 4];
             cursor.fill_bytes(&mut code_point_utf8_bytes)?;

From e783fe464c9a2b563530dad802c21c2cea99824d Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 10:40:09 -0800
Subject: [PATCH 06/23] Rename variables

---
 boa/src/syntax/lexer/string.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index da8f73636c1..2fccd56eebc 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -76,9 +76,9 @@ impl StringLiteral {
     {
         let mut buf = Vec::new();
         loop {
-            let next_chr = cursor.next_char()?.map(char::try_from).transpose().unwrap();
+            let next_ch = cursor.next_char()?.map(char::try_from).transpose().unwrap();
 
-            match next_chr {
+            match next_ch {
                 Some('\'') if terminator == StringTerminator::SingleQuote => {
                     break;
                 }
@@ -135,10 +135,10 @@ impl StringLiteral {
                     if next_ch.len_utf16() == 1 {
                         buf.push(next_ch as u16);
                     } else {
-                        let mut code_point_bytes_buf = [0u16; 2];
-                        let code_point_bytes = next_ch.encode_utf16(&mut code_point_bytes_buf);
+                        let mut code_units_buf = [0u16; 2];
+                        let code_units_buf = next_ch.encode_utf16(&mut code_units_buf);
 
-                        buf.extend(code_point_bytes.iter());
+                        buf.extend(code_units_buf.iter());
                     }
                 }
                 None if terminator != StringTerminator::End => {

From 1f6b7b2b28ddcf60117e8ff3b8d575a34df21cb4 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 10:43:43 -0800
Subject: [PATCH 07/23] Rename helper functions

---
 boa/src/syntax/lexer/string.rs   | 16 ++++++++--------
 boa/src/syntax/lexer/template.rs |  4 ++--
 boa/src/syntax/lexer/tests.rs    |  8 ++++----
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 2fccd56eebc..eed996f7068 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -58,14 +58,14 @@ impl<R> Tokenizer<R> for StringLiteral {
         let _timer = BoaProfiler::global().start_event("StringLiteral", "Lexing");
 
         let (lit, span) =
-            Self::unescape_string(cursor, start_pos, self.terminator, cursor.strict_mode())?;
+            Self::take_string_characters(cursor, start_pos, self.terminator, cursor.strict_mode())?;
 
         Ok(Token::new(TokenKind::string_literal(lit), span))
     }
 }
 
 impl StringLiteral {
-    pub(super) fn unescape_string<R>(
+    pub(super) fn take_string_characters<R>(
         cursor: &mut Cursor<R>,
         start_pos: Position,
         terminator: StringTerminator,
@@ -114,13 +114,13 @@ impl StringLiteral {
                                 buf.push('\0' as u16)
                             }
                             b'x' => {
-                                Self::hex_escape_sequence(cursor, Some(&mut buf))?;
+                                Self::take_hex_escape_sequence(cursor, Some(&mut buf))?;
                             }
                             b'u' => {
-                                Self::unicode_escape_sequence(cursor, Some(&mut buf))?;
+                                Self::take_unicode_escape_sequence(cursor, Some(&mut buf))?;
                             }
                             byte if (b'0'..b'8').contains(&byte) => {
-                                Self::legacy_octal_escape_sequence(
+                                Self::take_legacy_octal_escape_sequence(
                                     cursor,
                                     Some(&mut buf),
                                     strict_mode,
@@ -160,7 +160,7 @@ impl StringLiteral {
     }
 
     #[inline]
-    pub(super) fn unicode_escape_sequence<R>(
+    pub(super) fn take_unicode_escape_sequence<R>(
         cursor: &mut Cursor<R>,
         code_units_buf: Option<&mut Vec<u16>>,
     ) -> Result<u32, Error>
@@ -218,7 +218,7 @@ impl StringLiteral {
     }
 
     #[inline]
-    fn hex_escape_sequence<R>(
+    fn take_hex_escape_sequence<R>(
         cursor: &mut Cursor<R>,
         code_units_buf: Option<&mut Vec<u16>>,
     ) -> Result<u32, Error>
@@ -240,7 +240,7 @@ impl StringLiteral {
     }
 
     #[inline]
-    fn legacy_octal_escape_sequence<R>(
+    fn take_legacy_octal_escape_sequence<R>(
         cursor: &mut Cursor<R>,
         code_units_buf: Option<&mut Vec<u16>>,
         strict_mode: bool,
diff --git a/boa/src/syntax/lexer/template.rs b/boa/src/syntax/lexer/template.rs
index 23171e333a8..ecec7a7387f 100644
--- a/boa/src/syntax/lexer/template.rs
+++ b/boa/src/syntax/lexer/template.rs
@@ -44,7 +44,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
             match next_chr {
                 '`' => {
                     let raw = String::from_utf16_lossy(buf.as_slice());
-                    let (cooked, _) = StringLiteral::unescape_string(
+                    let (cooked, _) = StringLiteral::take_string_characters(
                         &mut Cursor::with_position(raw.as_bytes(), start_pos),
                         start_pos,
                         StringTerminator::End,
@@ -58,7 +58,7 @@ impl<R> Tokenizer<R> for TemplateLiteral {
                 '$' if cursor.peek()? == Some(b'{') => {
                     let _ = cursor.next_byte()?;
                     let raw = String::from_utf16_lossy(buf.as_slice());
-                    let (cooked, _) = StringLiteral::unescape_string(
+                    let (cooked, _) = StringLiteral::take_string_characters(
                         &mut Cursor::with_position(raw.as_bytes(), start_pos),
                         start_pos,
                         StringTerminator::End,
diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs
index dca682a011f..61e8962f190 100644
--- a/boa/src/syntax/lexer/tests.rs
+++ b/boa/src/syntax/lexer/tests.rs
@@ -864,7 +864,7 @@ fn unicode_escape_with_braces_() {
 
     let mut cursor = Cursor::new(s.as_bytes());
 
-    if let Ok((s, _)) = StringLiteral::unescape_string(
+    if let Ok((s, _)) = StringLiteral::take_string_characters(
         &mut cursor,
         Position::new(1, 1),
         StringTerminator::End,
@@ -880,7 +880,7 @@ fn unicode_escape_with_braces_() {
 fn unescape_string_with_single_escape() {
     let s = r#"\Б"#.to_string();
     let mut cursor = Cursor::new(s.as_bytes());
-    let (s, _) = StringLiteral::unescape_string(
+    let (s, _) = StringLiteral::take_string_characters(
         &mut cursor,
         Position::new(1, 1),
         StringTerminator::End,
@@ -904,7 +904,7 @@ fn legacy_octal_escape() {
 
     for (s, expected) in test_cases.iter() {
         let mut cursor = Cursor::new(s.as_bytes());
-        let (s, _) = StringLiteral::unescape_string(
+        let (s, _) = StringLiteral::take_string_characters(
             &mut cursor,
             Position::new(1, 1),
             StringTerminator::End,
@@ -922,7 +922,7 @@ fn zero_escape() {
 
     for (s, expected) in test_cases.iter() {
         let mut cursor = Cursor::new(s.as_bytes());
-        let (s, _) = StringLiteral::unescape_string(
+        let (s, _) = StringLiteral::take_string_characters(
             &mut cursor,
             Position::new(1, 1),
             StringTerminator::End,

From 067f2a1c27bd3c11e41608530bb416cc5e876a01 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 10:48:28 -0800
Subject: [PATCH 08/23] Refactor match arms

---
 boa/src/syntax/lexer/string.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index eed996f7068..008f7857a56 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -85,6 +85,9 @@ impl StringLiteral {
                 Some('"') if terminator == StringTerminator::DoubleQuote => {
                     break;
                 }
+                None if terminator == StringTerminator::End => {
+                    break;
+                }
                 Some('\\') => {
                     let _timer = BoaProfiler::global()
                         .start_event("StringLiteral - escape sequence", "Lexing");
@@ -141,15 +144,12 @@ impl StringLiteral {
                         buf.extend(code_units_buf.iter());
                     }
                 }
-                None if terminator != StringTerminator::End => {
+                None => {
                     return Err(Error::from(io::Error::new(
                         ErrorKind::UnexpectedEof,
                         "unterminated string literal",
                     )));
                 }
-                None => {
-                    break;
-                }
             }
         }
 

From 6cca4d9b274d1b8394e0dcc31dfd932ff51aeb57 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 19:02:53 -0800
Subject: [PATCH 09/23] Fix escape line terminator sequence

---
 boa/src/syntax/lexer/string.rs | 98 ++++++++++++++++++++--------------
 1 file changed, 58 insertions(+), 40 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 008f7857a56..f97837748f5 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -65,6 +65,20 @@ impl<R> Tokenizer<R> for StringLiteral {
 }
 
 impl StringLiteral {
+    /// Checks if a character is LineTerminator as per ECMAScript standards.
+    ///
+    /// More information:
+    ///  - [ECMAScript reference][spec]
+    ///
+    /// [spec]: https://tc39.es/ecma262/#prod-LineTerminator
+    #[inline]
+    pub(super) fn is_line_terminator(ch: char) -> bool {
+        matches!(
+            ch,
+            '\u{000A}' /* <LF> */ | '\u{000D}' /* <CR> */ | '\u{2028}' /* <LS> */ | '\u{2029}' /* <PS> */
+        )
+    }
+
     pub(super) fn take_string_characters<R>(
         cursor: &mut Cursor<R>,
         start_pos: Position,
@@ -92,47 +106,51 @@ impl StringLiteral {
                     let _timer = BoaProfiler::global()
                         .start_event("StringLiteral - escape sequence", "Lexing");
 
-                    let escape = cursor.peek()?.ok_or_else(|| {
-                        Error::from(io::Error::new(
-                            ErrorKind::UnexpectedEof,
-                            "unterminated escape sequence in literal",
-                        ))
-                    })?;
+                    let escape_ch = cursor
+                        .next_char()?
+                        .and_then(|byte| char::try_from(byte).ok())
+                        .ok_or_else(|| {
+                            Error::from(io::Error::new(
+                                ErrorKind::UnexpectedEof,
+                                "unterminated escape sequence in literal",
+                            ))
+                        })?;
 
-                    if escape <= 0x7f {
-                        let _ = cursor.next_byte()?;
-                        match escape {
-                            b'\n' => (),
-                            b'n' => buf.push('\n' as u16),
-                            b'r' => buf.push('\r' as u16),
-                            b't' => buf.push('\t' as u16),
-                            b'b' => buf.push('\x08' as u16),
-                            b'f' => buf.push('\x0c' as u16),
-                            b'0' if cursor
-                                .peek()?
-                                .and_then(|next_byte| char::try_from(next_byte).ok())
-                                .filter(|next_ch| next_ch.is_digit(10))
-                                .is_none() =>
-                            {
-                                buf.push('\0' as u16)
-                            }
-                            b'x' => {
-                                Self::take_hex_escape_sequence(cursor, Some(&mut buf))?;
-                            }
-                            b'u' => {
-                                Self::take_unicode_escape_sequence(cursor, Some(&mut buf))?;
-                            }
-                            byte if (b'0'..b'8').contains(&byte) => {
-                                Self::take_legacy_octal_escape_sequence(
-                                    cursor,
-                                    Some(&mut buf),
-                                    strict_mode,
-                                    byte,
-                                )?;
-                            }
-                            _ => buf.push(escape as u16),
-                        };
-                    }
+                    match escape_ch {
+                        'b' => buf.push('\x08' as u16),
+                        'f' => buf.push('\x0c' as u16),
+                        'n' => buf.push('\n' as u16),
+                        'r' => buf.push('\r' as u16),
+                        't' => buf.push('\t' as u16),
+                        '0' if cursor
+                            .peek()?
+                            .and_then(|next_byte| char::try_from(next_byte).ok())
+                            .filter(|next_ch| next_ch.is_digit(10))
+                            .is_none() =>
+                        {
+                            buf.push('\0' as u16)
+                        }
+                        'x' => {
+                            Self::take_hex_escape_sequence(cursor, Some(&mut buf))?;
+                        }
+                        'u' => {
+                            Self::take_unicode_escape_sequence(cursor, Some(&mut buf))?;
+                        }
+                        _ if escape_ch.is_digit(10) => {
+                            Self::take_legacy_octal_escape_sequence(
+                                cursor,
+                                Some(&mut buf),
+                                strict_mode,
+                                escape_ch as u8,
+                            )?;
+                        }
+                        _ if Self::is_line_terminator(escape_ch) => {
+                            // Check match LineContinuation
+                            // Grammar: \ LineTerminatorSequence
+                            // do nothing, continue lexing
+                        }
+                        _ => buf.push(escape_ch as u16),
+                    };
                 }
                 Some(next_ch) => {
                     if next_ch.len_utf16() == 1 {

From 83e86475250ab6498c815d266d4179380b9279e8 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 19:11:50 -0800
Subject: [PATCH 10/23] Fix single character escape

---
 boa/src/syntax/lexer/string.rs | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index f97837748f5..6849791f5a6 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -117,11 +117,15 @@ impl StringLiteral {
                         })?;
 
                     match escape_ch {
-                        'b' => buf.push('\x08' as u16),
-                        'f' => buf.push('\x0c' as u16),
-                        'n' => buf.push('\n' as u16),
-                        'r' => buf.push('\r' as u16),
-                        't' => buf.push('\t' as u16),
+                        'b' => buf.push('\u{0008}' as u16 /* <BS> */),
+                        't' => buf.push('\u{0009}' as u16 /* <HT> */),
+                        'n' => buf.push('\u{000A}' as u16 /* <LF> */),
+                        'v' => buf.push('\u{000B}' as u16 /* <VT> */),
+                        'f' => buf.push('\u{000C}' as u16 /* <FF> */),
+                        'r' => buf.push('\u{000D}' as u16 /* <CR> */),
+                        '"' => buf.push('\u{0022}' as u16 /* " */),
+                        '\'' => buf.push('\u{0027}' as u16 /* ' */),
+                        '\\' => buf.push('\u{005C}' as u16 /* \ */),
                         '0' if cursor
                             .peek()?
                             .and_then(|next_byte| char::try_from(next_byte).ok())
@@ -145,9 +149,9 @@ impl StringLiteral {
                             )?;
                         }
                         _ if Self::is_line_terminator(escape_ch) => {
-                            // Check match LineContinuation
+                            // Match LineContinuation
                             // Grammar: \ LineTerminatorSequence
-                            // do nothing, continue lexing
+                            // LineContinuation is the empty String. Do nothing and continue lexing.
                         }
                         _ => buf.push(escape_ch as u16),
                     };

From 5fd86a1a67301b253b7d7137ad81772ea7336c51 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 20:04:22 -0800
Subject: [PATCH 11/23] Fix escape followed by unicode char

---
 boa/src/syntax/lexer/string.rs | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 6849791f5a6..fd203763649 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -132,7 +132,7 @@ impl StringLiteral {
                             .filter(|next_ch| next_ch.is_digit(10))
                             .is_none() =>
                         {
-                            buf.push('\0' as u16)
+                            buf.push('\u{0000}' as u16 /* NULL */)
                         }
                         'x' => {
                             Self::take_hex_escape_sequence(cursor, Some(&mut buf))?;
@@ -153,17 +153,20 @@ impl StringLiteral {
                             // Grammar: \ LineTerminatorSequence
                             // LineContinuation is the empty String. Do nothing and continue lexing.
                         }
-                        _ => buf.push(escape_ch as u16),
+                        _ => {
+                            if escape_ch.len_utf16() == 1 {
+                                buf.push(escape_ch as u16);
+                            } else {
+                                buf.extend(escape_ch.encode_utf16(&mut [0u16; 2]).iter());
+                            }
+                        }
                     };
                 }
                 Some(next_ch) => {
                     if next_ch.len_utf16() == 1 {
                         buf.push(next_ch as u16);
                     } else {
-                        let mut code_units_buf = [0u16; 2];
-                        let code_units_buf = next_ch.encode_utf16(&mut code_units_buf);
-
-                        buf.extend(code_units_buf.iter());
+                        buf.extend(next_ch.encode_utf16(&mut [0u16; 2]).iter());
                     }
                 }
                 None => {

From b176702fae9c91ed3acc2e7dc910bf7f74fee2d0 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 20:21:37 -0800
Subject: [PATCH 12/23] Add NonOctalDecimalEscapeSequence

---
 boa/src/syntax/lexer/string.rs | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index fd203763649..2e02b374427 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -140,7 +140,18 @@ impl StringLiteral {
                         'u' => {
                             Self::take_unicode_escape_sequence(cursor, Some(&mut buf))?;
                         }
-                        _ if escape_ch.is_digit(10) => {
+                        '8' | '9' => {
+                            // Grammar: NonOctalDecimalEscapeSequence
+                            if strict_mode {
+                                return Err(Error::syntax(
+                                    "\\8 and \\9 are not allowed in strict mode.",
+                                    cursor.pos(),
+                                ));
+                            } else {
+                                buf.push(escape_ch as u16);
+                            }
+                        }
+                        _ if escape_ch.is_digit(8) => {
                             Self::take_legacy_octal_escape_sequence(
                                 cursor,
                                 Some(&mut buf),

From 858a74d9927dadbd171fe89608d3cf7739af9158 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 20:22:05 -0800
Subject: [PATCH 13/23] Fix comment

---
 boa/src/syntax/lexer/string.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 2e02b374427..88894b9290c 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -160,7 +160,7 @@ impl StringLiteral {
                             )?;
                         }
                         _ if Self::is_line_terminator(escape_ch) => {
-                            // Match LineContinuation
+                            // Grammar: LineContinuation
                             // Grammar: \ LineTerminatorSequence
                             // LineContinuation is the empty String. Do nothing and continue lexing.
                         }

From e4bf635b94e37c1f0e0dd0439df082246cbf18fa Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 20:24:59 -0800
Subject: [PATCH 14/23] Refactor

---
 boa/src/syntax/lexer/string.rs | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 88894b9290c..b8b27fd3412 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -117,22 +117,22 @@ impl StringLiteral {
                         })?;
 
                     match escape_ch {
-                        'b' => buf.push('\u{0008}' as u16 /* <BS> */),
-                        't' => buf.push('\u{0009}' as u16 /* <HT> */),
-                        'n' => buf.push('\u{000A}' as u16 /* <LF> */),
-                        'v' => buf.push('\u{000B}' as u16 /* <VT> */),
-                        'f' => buf.push('\u{000C}' as u16 /* <FF> */),
-                        'r' => buf.push('\u{000D}' as u16 /* <CR> */),
-                        '"' => buf.push('\u{0022}' as u16 /* " */),
-                        '\'' => buf.push('\u{0027}' as u16 /* ' */),
-                        '\\' => buf.push('\u{005C}' as u16 /* \ */),
+                        'b' => buf.push(0x0008 /* <BS> */),
+                        't' => buf.push(0x0009 /* <HT> */),
+                        'n' => buf.push(0x000A /* <LF> */),
+                        'v' => buf.push(0x000B /* <VT> */),
+                        'f' => buf.push(0x000C /* <FF> */),
+                        'r' => buf.push(0x000D /* <CR> */),
+                        '"' => buf.push(0x0022 /* " */),
+                        '\'' => buf.push(0x0027 /* ' */),
+                        '\\' => buf.push(0x005C /* \ */),
                         '0' if cursor
                             .peek()?
                             .and_then(|next_byte| char::try_from(next_byte).ok())
                             .filter(|next_ch| next_ch.is_digit(10))
                             .is_none() =>
                         {
-                            buf.push('\u{0000}' as u16 /* NULL */)
+                            buf.push(0x0000 /* NULL */)
                         }
                         'x' => {
                             Self::take_hex_escape_sequence(cursor, Some(&mut buf))?;

From 835a5bb88cd818fe2c259ef5242af2e78bdbca71 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 20:26:22 -0800
Subject: [PATCH 15/23] Modify error message

---
 boa/src/syntax/lexer/string.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index b8b27fd3412..6cf1e991b88 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -144,7 +144,7 @@ impl StringLiteral {
                             // Grammar: NonOctalDecimalEscapeSequence
                             if strict_mode {
                                 return Err(Error::syntax(
-                                    "\\8 and \\9 are not allowed in strict mode.",
+                                    "\\8 and \\9 are not allowed in strict mode",
                                     cursor.pos(),
                                 ));
                             } else {
@@ -287,7 +287,7 @@ impl StringLiteral {
     {
         if strict_mode {
             return Err(Error::syntax(
-                "octal escape sequences are deprecated",
+                "octal escape sequences are not allowed in strict mode",
                 cursor.pos(),
             ));
         }

From d67cec2dbff4cf9f6282ea5d5a6bdfde2ea49e60 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 20:56:19 -0800
Subject: [PATCH 16/23] Add tests

---
 boa/src/syntax/lexer/tests.rs | 55 +++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs
index 61e8962f190..496ca44174a 100644
--- a/boa/src/syntax/lexer/tests.rs
+++ b/boa/src/syntax/lexer/tests.rs
@@ -914,6 +914,17 @@ fn legacy_octal_escape() {
 
         assert_eq!(s, *expected);
     }
+
+    for (s, _) in test_cases.iter() {
+        let mut cursor = Cursor::new(s.as_bytes());
+        StringLiteral::take_string_characters(
+            &mut cursor,
+            Position::new(1, 1),
+            StringTerminator::End,
+            true,
+        )
+        .expect_err("Octal-escape in strict mode not rejected as expected");
+    }
 }
 
 #[test]
@@ -934,6 +945,50 @@ fn zero_escape() {
     }
 }
 
+#[test]
+fn non_octal_decimal_escape() {
+    let test_cases = [(r#"\8"#, "8"), (r#"\9"#, "9")];
+
+    for (s, expected) in test_cases.iter() {
+        let mut cursor = Cursor::new(s.as_bytes());
+        let (s, _) = StringLiteral::take_string_characters(
+            &mut cursor,
+            Position::new(1, 1),
+            StringTerminator::End,
+            false,
+        )
+        .unwrap();
+
+        assert_eq!(s, *expected);
+    }
+
+    for (s, _) in test_cases.iter() {
+        let mut cursor = Cursor::new(s.as_bytes());
+        StringLiteral::take_string_characters(
+            &mut cursor,
+            Position::new(1, 1),
+            StringTerminator::End,
+            true,
+        )
+        .expect_err("Non-octal-decimal-escape in strict mode not rejected as expected");
+    }
+}
+
+#[test]
+fn line_continuation() {
+    let s = "hello \\\nworld";
+    let mut cursor = Cursor::new(s.as_bytes());
+    let (s, _) = StringLiteral::take_string_characters(
+        &mut cursor,
+        Position::new(1, 1),
+        StringTerminator::End,
+        false,
+    )
+    .unwrap();
+
+    assert_eq!(s, "hello world");
+}
+
 mod carriage_return {
     use super::*;
 

From f59a002816aa88711ee9ad383e44e23dd4efd2f6 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 21:02:07 -0800
Subject: [PATCH 17/23] Rename tests

---
 boa/src/syntax/lexer/tests.rs | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs
index 496ca44174a..e9d01d46915 100644
--- a/boa/src/syntax/lexer/tests.rs
+++ b/boa/src/syntax/lexer/tests.rs
@@ -795,7 +795,7 @@ fn illegal_following_numeric_literal() {
 }
 
 #[test]
-fn codepoint_with_no_braces() {
+fn string_codepoint_with_no_braces() {
     let mut lexer = Lexer::new(&br#""test\uD38Dtest""#[..]);
     assert!(lexer.next().is_ok());
 }
@@ -814,7 +814,7 @@ fn illegal_code_point_following_numeric_literal() {
 }
 
 #[test]
-fn non_english_str() {
+fn string_unicode() {
     let str = r#"'中文';"#;
 
     let mut lexer = Lexer::new(str.as_bytes());
@@ -828,7 +828,7 @@ fn non_english_str() {
 }
 
 #[test]
-fn unicode_escape_with_braces() {
+fn string_unicode_escape_with_braces() {
     let mut lexer = Lexer::new(&br#"'{\u{20ac}\u{a0}\u{a0}}'"#[..]);
 
     let expected = [TokenKind::StringLiteral("{\u{20ac}\u{a0}\u{a0}}".into())];
@@ -859,7 +859,7 @@ fn unicode_escape_with_braces() {
 }
 
 #[test]
-fn unicode_escape_with_braces_() {
+fn take_string_characters_unicode_escape_with_braces_2() {
     let s = r#"\u{20ac}\u{a0}\u{a0}"#.to_string();
 
     let mut cursor = Cursor::new(s.as_bytes());
@@ -877,7 +877,7 @@ fn unicode_escape_with_braces_() {
 }
 
 #[test]
-fn unescape_string_with_single_escape() {
+fn take_string_characters_with_single_escape() {
     let s = r#"\Б"#.to_string();
     let mut cursor = Cursor::new(s.as_bytes());
     let (s, _) = StringLiteral::take_string_characters(
@@ -891,7 +891,7 @@ fn unescape_string_with_single_escape() {
 }
 
 #[test]
-fn legacy_octal_escape() {
+fn take_string_characters_legacy_octal_escape() {
     let test_cases = [
         (r#"\3"#, "\u{3}"),
         (r#"\03"#, "\u{3}"),
@@ -928,7 +928,7 @@ fn legacy_octal_escape() {
 }
 
 #[test]
-fn zero_escape() {
+fn take_string_characters_zero_escape() {
     let test_cases = [(r#"\0"#, "\u{0}"), (r#"\0A"#, "\u{0}A")];
 
     for (s, expected) in test_cases.iter() {
@@ -946,7 +946,7 @@ fn zero_escape() {
 }
 
 #[test]
-fn non_octal_decimal_escape() {
+fn take_string_characters_non_octal_decimal_escape() {
     let test_cases = [(r#"\8"#, "8"), (r#"\9"#, "9")];
 
     for (s, expected) in test_cases.iter() {
@@ -975,7 +975,7 @@ fn non_octal_decimal_escape() {
 }
 
 #[test]
-fn line_continuation() {
+fn take_string_characters_line_continuation() {
     let s = "hello \\\nworld";
     let mut cursor = Cursor::new(s.as_bytes());
     let (s, _) = StringLiteral::take_string_characters(

From 983d786d6b67cfe65aa16c2f3cabca2d14c42f34 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Mon, 18 Jan 2021 21:30:16 -0800
Subject: [PATCH 18/23] Add test for error

---
 boa/src/syntax/lexer/string.rs | 33 +++++++++++++++++++--------------
 boa/src/syntax/lexer/tests.rs  | 20 ++++++++++++++++----
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 6cf1e991b88..6449d554c90 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -90,9 +90,10 @@ impl StringLiteral {
     {
         let mut buf = Vec::new();
         loop {
-            let next_ch = cursor.next_char()?.map(char::try_from).transpose().unwrap();
+            let ch_start_pos = cursor.pos();
+            let ch = cursor.next_char()?.map(char::try_from).transpose().unwrap();
 
-            match next_ch {
+            match ch {
                 Some('\'') if terminator == StringTerminator::SingleQuote => {
                     break;
                 }
@@ -135,17 +136,17 @@ impl StringLiteral {
                             buf.push(0x0000 /* NULL */)
                         }
                         'x' => {
-                            Self::take_hex_escape_sequence(cursor, Some(&mut buf))?;
+                            Self::take_hex_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?;
                         }
                         'u' => {
-                            Self::take_unicode_escape_sequence(cursor, Some(&mut buf))?;
+                            Self::take_unicode_escape_sequence(cursor, ch_start_pos, Some(&mut buf))?;
                         }
                         '8' | '9' => {
                             // Grammar: NonOctalDecimalEscapeSequence
                             if strict_mode {
                                 return Err(Error::syntax(
                                     "\\8 and \\9 are not allowed in strict mode",
-                                    cursor.pos(),
+                                    ch_start_pos,
                                 ));
                             } else {
                                 buf.push(escape_ch as u16);
@@ -154,6 +155,7 @@ impl StringLiteral {
                         _ if escape_ch.is_digit(8) => {
                             Self::take_legacy_octal_escape_sequence(
                                 cursor,
+                                ch_start_pos,
                                 Some(&mut buf),
                                 strict_mode,
                                 escape_ch as u8,
@@ -173,11 +175,11 @@ impl StringLiteral {
                         }
                     };
                 }
-                Some(next_ch) => {
-                    if next_ch.len_utf16() == 1 {
-                        buf.push(next_ch as u16);
+                Some(ch) => {
+                    if ch.len_utf16() == 1 {
+                        buf.push(ch as u16);
                     } else {
-                        buf.extend(next_ch.encode_utf16(&mut [0u16; 2]).iter());
+                        buf.extend(ch.encode_utf16(&mut [0u16; 2]).iter());
                     }
                 }
                 None => {
@@ -198,6 +200,7 @@ impl StringLiteral {
     #[inline]
     pub(super) fn take_unicode_escape_sequence<R>(
         cursor: &mut Cursor<R>,
+        start_pos: Position,
         code_units_buf: Option<&mut Vec<u16>>,
     ) -> Result<u32, Error>
     where
@@ -212,14 +215,14 @@ impl StringLiteral {
             let code_point_str = unsafe { str::from_utf8_unchecked(code_point_buf.as_slice()) };
             // We know this is a single unicode codepoint, convert to u32
             let code_point = u32::from_str_radix(&code_point_str, 16).map_err(|_| {
-                Error::syntax("malformed Unicode character escape sequence", cursor.pos())
+                Error::syntax("malformed Unicode character escape sequence", start_pos)
             })?;
 
             // UTF16Encoding of a numeric code point value
             if code_point > 0x10_FFFF {
                 return Err(Error::syntax(
                     "Unicode codepoint must not be greater than 0x10FFFF in escape sequence",
-                    cursor.pos(),
+                    start_pos,
                 ));
             } else if let Some(code_units_buf) = code_units_buf {
                 if code_point <= 65535 {
@@ -243,7 +246,7 @@ impl StringLiteral {
             let code_point_str = str::from_utf8(&code_point_utf8_bytes)
                 .expect("malformed Unicode character escape sequence");
             let code_point = u16::from_str_radix(code_point_str, 16)
-                .map_err(|_| Error::syntax("invalid Unicode escape sequence", cursor.pos()))?;
+                .map_err(|_| Error::syntax("invalid Unicode escape sequence", start_pos))?;
 
             if let Some(code_units_buf) = code_units_buf {
                 code_units_buf.push(code_point);
@@ -256,6 +259,7 @@ impl StringLiteral {
     #[inline]
     fn take_hex_escape_sequence<R>(
         cursor: &mut Cursor<R>,
+        start_pos: Position,
         code_units_buf: Option<&mut Vec<u16>>,
     ) -> Result<u32, Error>
     where
@@ -266,7 +270,7 @@ impl StringLiteral {
         let code_point_str = str::from_utf8(&code_point_utf8_bytes)
             .expect("malformed Hexadecimal character escape sequence");
         let code_point = u16::from_str_radix(&code_point_str, 16)
-            .map_err(|_| Error::syntax("invalid Hexadecimal escape sequence", cursor.pos()))?;
+            .map_err(|_| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;
 
         if let Some(code_units_buf) = code_units_buf {
             code_units_buf.push(code_point);
@@ -278,6 +282,7 @@ impl StringLiteral {
     #[inline]
     fn take_legacy_octal_escape_sequence<R>(
         cursor: &mut Cursor<R>,
+        start_pos: Position,
         code_units_buf: Option<&mut Vec<u16>>,
         strict_mode: bool,
         init_byte: u8,
@@ -288,7 +293,7 @@ impl StringLiteral {
         if strict_mode {
             return Err(Error::syntax(
                 "octal escape sequences are not allowed in strict mode",
-                cursor.pos(),
+                start_pos,
             ));
         }
         // Grammar: OctalDigit
diff --git a/boa/src/syntax/lexer/tests.rs b/boa/src/syntax/lexer/tests.rs
index e9d01d46915..7ef4a34bc04 100644
--- a/boa/src/syntax/lexer/tests.rs
+++ b/boa/src/syntax/lexer/tests.rs
@@ -917,13 +917,19 @@ fn take_string_characters_legacy_octal_escape() {
 
     for (s, _) in test_cases.iter() {
         let mut cursor = Cursor::new(s.as_bytes());
-        StringLiteral::take_string_characters(
+
+        if let Error::Syntax(_, pos) = StringLiteral::take_string_characters(
             &mut cursor,
             Position::new(1, 1),
             StringTerminator::End,
             true,
         )
-        .expect_err("Octal-escape in strict mode not rejected as expected");
+        .expect_err("Octal-escape in strict mode not rejected as expected")
+        {
+            assert_eq!(pos, Position::new(1, 1));
+        } else {
+            panic!("invalid error type");
+        }
     }
 }
 
@@ -964,13 +970,19 @@ fn take_string_characters_non_octal_decimal_escape() {
 
     for (s, _) in test_cases.iter() {
         let mut cursor = Cursor::new(s.as_bytes());
-        StringLiteral::take_string_characters(
+
+        if let Error::Syntax(_, pos) = StringLiteral::take_string_characters(
             &mut cursor,
             Position::new(1, 1),
             StringTerminator::End,
             true,
         )
-        .expect_err("Non-octal-decimal-escape in strict mode not rejected as expected");
+        .expect_err("Non-octal-decimal-escape in strict mode not rejected as expected")
+        {
+            assert_eq!(pos, Position::new(1, 1));
+        } else {
+            panic!("invalid error type");
+        }
     }
 }
 

From 17803d0e8f418ef63086f287173edd3562d70bb2 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Tue, 19 Jan 2021 12:12:49 -0800
Subject: [PATCH 19/23] Add comments for unsafe bytes to str

---
 boa/src/syntax/lexer/string.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 6449d554c90..99c1964617c 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -212,8 +212,9 @@ impl StringLiteral {
             let mut code_point_buf = Vec::with_capacity(6);
             cursor.take_until(b'}', &mut code_point_buf)?;
 
+            // Safty: invalid UTF-8 bytes will be handled by returning Err in the following `u32::from_str_radix`
             let code_point_str = unsafe { str::from_utf8_unchecked(code_point_buf.as_slice()) };
-            // We know this is a single unicode codepoint, convert to u32
+            // The `code_point_str` should represent a single unicode codepoint, convert to u32
             let code_point = u32::from_str_radix(&code_point_str, 16).map_err(|_| {
                 Error::syntax("malformed Unicode character escape sequence", start_pos)
             })?;

From 99a6096fb11293f0275261249c1dd52fcceafb00 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Tue, 19 Jan 2021 12:52:38 -0800
Subject: [PATCH 20/23] Update boa/src/syntax/lexer/string.rs

Co-authored-by: tofpie <75836434+tofpie@users.noreply.github.com>
---
 boa/src/syntax/lexer/string.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 99c1964617c..9fc4d888042 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -212,7 +212,7 @@ impl StringLiteral {
             let mut code_point_buf = Vec::with_capacity(6);
             cursor.take_until(b'}', &mut code_point_buf)?;
 
-            // Safty: invalid UTF-8 bytes will be handled by returning Err in the following `u32::from_str_radix`
+            // Safety: invalid UTF-8 bytes will be handled by returning Err in the following `u32::from_str_radix`
             let code_point_str = unsafe { str::from_utf8_unchecked(code_point_buf.as_slice()) };
             // The `code_point_str` should represent a single unicode codepoint, convert to u32
             let code_point = u32::from_str_radix(&code_point_str, 16).map_err(|_| {

From b9d7b02d902d91e7aaca13767be35b105db55f12 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Tue, 19 Jan 2021 13:50:02 -0800
Subject: [PATCH 21/23] Minor refactor

---
 boa/src/syntax/lexer/string.rs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index 9fc4d888042..a6c9e198e48 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -129,8 +129,7 @@ impl StringLiteral {
                         '\\' => buf.push(0x005C /* \ */),
                         '0' if cursor
                             .peek()?
-                            .and_then(|next_byte| char::try_from(next_byte).ok())
-                            .filter(|next_ch| next_ch.is_digit(10))
+                            .filter(|next_byte| (b'0'..=b'9').contains(next_byte))
                             .is_none() =>
                         {
                             buf.push(0x0000 /* NULL */)
@@ -303,14 +302,14 @@ impl StringLiteral {
         // Grammar: ZeroToThree OctalDigit
         // Grammar: FourToSeven OctalDigit
         if let Some(byte) = cursor.peek()? {
-            if (b'0'..b'8').contains(&byte) {
+            if (b'0'..=b'7').contains(&byte) {
                 let _ = cursor.next_byte()?;
                 code_point = (code_point * 8) + (byte - b'0') as u32;
 
-                if (b'0'..b'4').contains(&init_byte) {
+                if (b'0'..=b'3').contains(&init_byte) {
                     // Grammar: ZeroToThree OctalDigit OctalDigit
                     if let Some(byte) = cursor.peek()? {
-                        if (b'0'..b'8').contains(&byte) {
+                        if (b'0'..=b'7').contains(&byte) {
                             let _ = cursor.next_byte()?;
                             code_point = (code_point * 8) + (byte - b'0') as u32;
                         }

From dcf668e001fd46e254a6dd113c8bdc5966dc319c Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Tue, 19 Jan 2021 15:32:29 -0800
Subject: [PATCH 22/23] Remove unsafe bytes to str

---
 boa/src/syntax/lexer/string.rs | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index a6c9e198e48..f0a55e8cf4d 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -211,12 +211,15 @@ impl StringLiteral {
             let mut code_point_buf = Vec::with_capacity(6);
             cursor.take_until(b'}', &mut code_point_buf)?;
 
-            // Safety: invalid UTF-8 bytes will be handled by returning Err in the following `u32::from_str_radix`
-            let code_point_str = unsafe { str::from_utf8_unchecked(code_point_buf.as_slice()) };
-            // The `code_point_str` should represent a single unicode codepoint, convert to u32
-            let code_point = u32::from_str_radix(&code_point_str, 16).map_err(|_| {
-                Error::syntax("malformed Unicode character escape sequence", start_pos)
-            })?;
+            let code_point = str::from_utf8(code_point_buf.as_slice())
+                .ok()
+                .and_then(|code_point_str| {
+                    // The `code_point_str` should represent a single unicode codepoint, convert to u32
+                    u32::from_str_radix(&code_point_str, 16).ok()
+                })
+                .ok_or_else(|| {
+                    Error::syntax("malformed Unicode character escape sequence", start_pos)
+                })?;
 
             // UTF16Encoding of a numeric code point value
             if code_point > 0x10_FFFF {

From 6fa0642376a3c18c36f5732442a68328a03fb2e1 Mon Sep 17 00:00:00 2001
From: Jevan Chan <jevan.cnchan@gmail.com>
Date: Tue, 19 Jan 2021 15:39:09 -0800
Subject: [PATCH 23/23] Fix panic when reading invalid utf-8 chars

---
 boa/src/syntax/lexer/string.rs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/boa/src/syntax/lexer/string.rs b/boa/src/syntax/lexer/string.rs
index f0a55e8cf4d..b4542a70d41 100644
--- a/boa/src/syntax/lexer/string.rs
+++ b/boa/src/syntax/lexer/string.rs
@@ -246,10 +246,10 @@ impl StringLiteral {
             cursor.fill_bytes(&mut code_point_utf8_bytes)?;
 
             // Convert to u16
-            let code_point_str = str::from_utf8(&code_point_utf8_bytes)
-                .expect("malformed Unicode character escape sequence");
-            let code_point = u16::from_str_radix(code_point_str, 16)
-                .map_err(|_| Error::syntax("invalid Unicode escape sequence", start_pos))?;
+            let code_point = str::from_utf8(&code_point_utf8_bytes)
+                .ok()
+                .and_then(|code_point_str| u16::from_str_radix(&code_point_str, 16).ok())
+                .ok_or_else(|| Error::syntax("invalid Unicode escape sequence", start_pos))?;
 
             if let Some(code_units_buf) = code_units_buf {
                 code_units_buf.push(code_point);
@@ -270,10 +270,10 @@ impl StringLiteral {
     {
         let mut code_point_utf8_bytes = [0u8; 2];
         cursor.fill_bytes(&mut code_point_utf8_bytes)?;
-        let code_point_str = str::from_utf8(&code_point_utf8_bytes)
-            .expect("malformed Hexadecimal character escape sequence");
-        let code_point = u16::from_str_radix(&code_point_str, 16)
-            .map_err(|_| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;
+        let code_point = str::from_utf8(&code_point_utf8_bytes)
+            .ok()
+            .and_then(|code_point_str| u16::from_str_radix(&code_point_str, 16).ok())
+            .ok_or_else(|| Error::syntax("invalid Hexadecimal escape sequence", start_pos))?;
 
         if let Some(code_units_buf) = code_units_buf {
             code_units_buf.push(code_point);