Fix hashbang comments by using proper goal symbols

boa-dev · Jun 14, 2024 · 4ede603 · 4ede603
1 parent 32e7562
commit 4ede603
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 22 deletions.
diff --git a/core/parser/src/lexer/mod.rs b/core/parser/src/lexer/mod.rs
@@ -161,7 +161,7 @@ impl<R> Lexer<R> {
                                 ))
                             }
                         }
-                        InputElement::RegExp => {
+                        InputElement::RegExp | InputElement::HashbangOrRegExp => {
                             // Can be a regular expression.
                             RegexLiteral.lex(&mut self.cursor, start, interner)
                         }
@@ -214,28 +214,34 @@ impl<R> Lexer<R> {
     {
         let _timer = Profiler::global().start_event("next()", "Lexing");
 
-        let (start, next_ch) = loop {
-            let start = self.cursor.pos();
-            if let Some(next_ch) = self.cursor.next_char()? {
-                // Ignore whitespace
-                if !is_whitespace(next_ch) {
-                    break (start, next_ch);
-                }
-            } else {
-                return Ok(None);
-            }
+        let mut start = self.cursor.pos();
+        let Some(mut next_ch) = self.cursor.next_char()? else {
+            return Ok(None);
         };
 
-        //handle hashbang here so the below match block still throws error on
-        //# if position isn't (1, 1)
-        if start.column_number() == 1
-            && start.line_number() == 1
-            && next_ch == 0x23
-            && self.cursor.peek_char()? == Some(0x21)
-        {
-            let _token = HashbangComment.lex(&mut self.cursor, start, interner);
-            return self.next(interner);
-        };
+        // If the goal symbol is HashbangOrRegExp, then we need to check if the next token is a hashbang comment.
+        // Since the goal symbol is only valid for the first token, we need to change it to RegExp after the first token.
+        if self.get_goal() == InputElement::HashbangOrRegExp {
+            self.set_goal(InputElement::RegExp);
+            if next_ch == 0x23 && self.cursor.peek_char()? == Some(0x21) {
+                let _token = HashbangComment.lex(&mut self.cursor, start, interner);
+                return self.next(interner);
+            };
+        }
+
+        // Ignore whitespace
+        if is_whitespace(next_ch) {
+            loop {
+                start = self.cursor.pos();
+                let Some(next) = self.cursor.next_char()? else {
+                    return Ok(None);
+                };
+                if !is_whitespace(next) {
+                    next_ch = next;
+                    break;
+                }
+            }
+        }
 
         if let Ok(c) = char::try_from(next_ch) {
             let token = match c {
@@ -392,6 +398,7 @@ pub(crate) enum InputElement {
     Div,
     RegExp,
     TemplateTail,
+    HashbangOrRegExp,
 }
 
 impl Default for InputElement {

diff --git a/core/parser/src/parser/mod.rs b/core/parser/src/parser/mod.rs
@@ -11,7 +11,7 @@ mod tests;
 
 use crate::{
     error::ParseResult,
-    lexer::Error as LexError,
+    lexer::{Error as LexError, InputElement},
     parser::{
         cursor::Cursor,
         function::{FormalParameters, FunctionStatementList},
@@ -140,6 +140,7 @@ impl<'a, R: ReadChar> Parser<'a, R> {
     ///
     /// [spec]: https://tc39.es/ecma262/#prod-Script
     pub fn parse_script(&mut self, interner: &mut Interner) -> ParseResult<boa_ast::Script> {
+        self.cursor.set_goal(InputElement::HashbangOrRegExp);
         ScriptParser::new(false).parse(&mut self.cursor, interner)
     }
 
@@ -155,6 +156,7 @@ impl<'a, R: ReadChar> Parser<'a, R> {
     where
         R: ReadChar,
     {
+        self.cursor.set_goal(InputElement::HashbangOrRegExp);
         ModuleParser.parse(&mut self.cursor, interner)
     }
 
@@ -172,6 +174,7 @@ impl<'a, R: ReadChar> Parser<'a, R> {
         direct: bool,
         interner: &mut Interner,
     ) -> ParseResult<boa_ast::Script> {
+        self.cursor.set_goal(InputElement::HashbangOrRegExp);
         ScriptParser::new(direct).parse(&mut self.cursor, interner)
     }