From bbda9b04be6c4f1ca3510f32d1abd8c2373aea54 Mon Sep 17 00:00:00 2001
From: Tom French <15848336+TomAFrench@users.noreply.github.com>
Date: Thu, 24 Aug 2023 15:49:27 +0100
Subject: [PATCH] fix: Correct off-by-one errors in lexer spans (#2393)

Co-authored-by: Blaine Bublitz <blaine.bublitz@gmail.com>
---
 crates/noirc_errors/src/position.rs      | 14 +----
 crates/noirc_errors/src/reporter.rs      |  2 +-
 crates/noirc_evaluator/src/errors.rs     |  4 +-
 crates/noirc_frontend/src/lexer/lexer.rs | 77 ++++++++++++++++--------
 4 files changed, 59 insertions(+), 38 deletions(-)
diff --git a/crates/noirc_errors/src/position.rs b/crates/noirc_errors/src/position.rs
index 09c59da1980..4cbf934138c 100644
--- a/crates/noirc_errors/src/position.rs
+++ b/crates/noirc_errors/src/position.rs
@@ -33,7 +33,7 @@ impl<T: Hash> Hash for Spanned<T> {
 
 impl<T> Spanned<T> {
     pub fn from_position(start: Position, end: Position, contents: T) -> Spanned<T> {
-        Spanned { span: Span(ByteSpan::new(start, end)), contents }
+        Spanned { span: Span::inclusive(start, end), contents }
     }
 
     pub const fn from(t_span: Span, contents: T) -> Spanned<T> {
@@ -57,16 +57,8 @@ impl<T> std::borrow::Borrow<T> for Spanned<T> {
 pub struct Span(ByteSpan);
 
 impl Span {
-    pub fn new(range: Range<u32>) -> Span {
-        Span(ByteSpan::from(range))
-    }
-
-    pub fn exclusive(start: u32, end: u32) -> Span {
-        Span::new(start..end)
-    }
-
     pub fn inclusive(start: u32, end: u32) -> Span {
-        Span::exclusive(start, end + 1)
+        Span(ByteSpan::from(start..end + 1))
     }
 
     pub fn single_char(start: u32) -> Span {
@@ -103,7 +95,7 @@ impl chumsky::Span for Span {
     type Offset = u32;
 
     fn new(_context: Self::Context, range: Range<Self::Offset>) -> Self {
-        Span::new(range)
+        Span(ByteSpan::from(range))
     }
 
     fn context(&self) -> Self::Context {}
diff --git a/crates/noirc_errors/src/reporter.rs b/crates/noirc_errors/src/reporter.rs
index d55189e80ac..65cd0a93279 100644
--- a/crates/noirc_errors/src/reporter.rs
+++ b/crates/noirc_errors/src/reporter.rs
@@ -165,7 +165,7 @@ fn convert_diagnostic(
             .iter()
             .map(|sl| {
                 let start_span = sl.span.start() as usize;
-                let end_span = sl.span.end() as usize + 1;
+                let end_span = sl.span.end() as usize;
                 Label::secondary(file_id.as_usize(), start_span..end_span).with_message(&sl.message)
             })
             .collect()
diff --git a/crates/noirc_evaluator/src/errors.rs b/crates/noirc_evaluator/src/errors.rs
index eab9bff043a..4be94148b71 100644
--- a/crates/noirc_evaluator/src/errors.rs
+++ b/crates/noirc_evaluator/src/errors.rs
@@ -96,10 +96,10 @@ impl RuntimeError {
         match self {
             RuntimeError::InternalError(_) => {
                 Diagnostic::simple_error(
-                    "Internal Consistency Evaluators Errors: \n 
+                    "Internal Consistency Evaluators Errors: \n
                     This is likely a bug. Consider Opening an issue at https://github.com/noir-lang/noir/issues".to_owned(),
                     "".to_string(),
-                    noirc_errors::Span::new(0..0)
+                    noirc_errors::Span::inclusive(0, 0)
                 )
             }
             _ => {
diff --git a/crates/noirc_frontend/src/lexer/lexer.rs b/crates/noirc_frontend/src/lexer/lexer.rs
index fc72f43674d..3265249b13e 100644
--- a/crates/noirc_frontend/src/lexer/lexer.rs
+++ b/crates/noirc_frontend/src/lexer/lexer.rs
@@ -67,7 +67,7 @@ impl<'a> Lexer<'a> {
         if self.peek_char_is('&') {
             // When we issue this error the first '&' will already be consumed
             // and the next token issued will be the next '&'.
-            let span = Span::new(self.position..self.position + 1);
+            let span = Span::inclusive(self.position, self.position + 1);
             Err(LexerErrorKind::LogicalAnd { span })
         } else {
             self.single_char_token(Token::Ampersand)
@@ -102,7 +102,7 @@ impl<'a> Lexer<'a> {
             Some('}') => self.single_char_token(Token::RightBrace),
             Some('[') => self.single_char_token(Token::LeftBracket),
             Some(']') => self.single_char_token(Token::RightBracket),
-            Some('"') => Ok(self.eat_string_literal(false)),
+            Some('"') => self.eat_string_literal(),
             Some('f') => self.eat_format_string_or_alpha_numeric(),
             Some('#') => self.eat_attribute(),
             Some(ch) if ch.is_ascii_alphanumeric() || ch == '_' => self.eat_alpha_numeric(ch),
@@ -195,9 +195,7 @@ impl<'a> Lexer<'a> {
         &mut self,
         initial_char: Option<char>,
         predicate: F,
-    ) -> (String, Position, Position) {
-        let start = self.position;
-
+    ) -> String {
         // This function is only called when we want to continue consuming a character of the same type.
         // For example, we see a digit and we want to consume the whole integer
         // Therefore, the current character which triggered this function will need to be appended
@@ -212,7 +210,7 @@ impl<'a> Lexer<'a> {
             // If not, return word. The next character will be analyzed on the next iteration of next_token,
             // Which will increment the cursor
             if !predicate(peek_char) {
-                return (word, start, self.position);
+                return word;
             }
             word.push(peek_char);
 
@@ -220,7 +218,7 @@ impl<'a> Lexer<'a> {
             self.next_char();
         }
 
-        (word, start, self.position)
+        word
     }
 
     fn eat_alpha_numeric(&mut self, initial_char: char) -> SpannedTokenResult {
@@ -236,6 +234,8 @@ impl<'a> Lexer<'a> {
     }
 
     fn eat_attribute(&mut self) -> SpannedTokenResult {
+        let start = self.position;
+
         if !self.peek_char_is('[') {
             return Err(LexerErrorKind::UnexpectedCharacter {
                 span: Span::single_char(self.position),
@@ -245,7 +245,7 @@ impl<'a> Lexer<'a> {
         }
         self.next_char();
 
-        let (word, start, end) = self.eat_while(None, |ch| ch != ']');
+        let word = self.eat_while(None, |ch| ch != ']');
 
         if !self.peek_char_is(']') {
             return Err(LexerErrorKind::UnexpectedCharacter {
@@ -256,20 +256,24 @@ impl<'a> Lexer<'a> {
         }
         self.next_char();
 
-        let attribute = Attribute::lookup_attribute(&word, Span::exclusive(start, end))?;
+        let end = self.position;
 
-        // Move start position backwards to cover the left bracket
-        // Move end position forwards to cover the right bracket
-        Ok(attribute.into_span(start - 1, end + 1))
+        let attribute = Attribute::lookup_attribute(&word, Span::inclusive(start, end))?;
+
+        Ok(attribute.into_span(start, end))
     }
 
     //XXX(low): Can increase performance if we use iterator semantic and utilize some of the methods on String. See below
     // https://doc.rust-lang.org/stable/std/primitive.str.html#method.rsplit
     fn eat_word(&mut self, initial_char: char) -> SpannedTokenResult {
-        let (word, start, end) = self.eat_while(Some(initial_char), |ch| {
+        let start = self.position;
+
+        let word = self.eat_while(Some(initial_char), |ch| {
             ch.is_ascii_alphabetic() || ch.is_numeric() || ch == '_'
         });
 
+        let end = self.position;
+
         // Check if word either an identifier or a keyword
         if let Some(keyword_token) = Keyword::lookup_keyword(&word) {
             return Ok(keyword_token.into_span(start, end));
@@ -277,7 +281,7 @@ impl<'a> Lexer<'a> {
 
         // Check if word an int type
         // if no error occurred, then it is either a valid integer type or it is not an int type
-        let parsed_token = IntType::lookup_int_type(&word, Span::exclusive(start, end))?;
+        let parsed_token = IntType::lookup_int_type(&word, Span::inclusive(start, end))?;
 
         // Check if it is an int type
         if let Some(int_type_token) = parsed_token {
@@ -290,14 +294,18 @@ impl<'a> Lexer<'a> {
     }
 
     fn eat_digit(&mut self, initial_char: char) -> SpannedTokenResult {
-        let (integer_str, start, end) = self.eat_while(Some(initial_char), |ch| {
+        let start = self.position;
+
+        let integer_str = self.eat_while(Some(initial_char), |ch| {
             ch.is_ascii_digit() | ch.is_ascii_hexdigit() | (ch == 'x')
         });
 
+        let end = self.position;
+
         let integer = match FieldElement::try_from_str(&integer_str) {
             None => {
                 return Err(LexerErrorKind::InvalidIntegerLiteral {
-                    span: Span::exclusive(start, end),
+                    span: Span::inclusive(start, end),
                     found: integer_str,
                 })
             }
@@ -308,18 +316,38 @@ impl<'a> Lexer<'a> {
         Ok(integer_token.into_span(start, end))
     }
 
-    fn eat_string_literal(&mut self, is_format_string: bool) -> SpannedToken {
-        let (str_literal, start_span, end_span) = self.eat_while(None, |ch| ch != '"');
-        let str_literal_token =
-            if is_format_string { Token::FmtStr(str_literal) } else { Token::Str(str_literal) };
+    fn eat_string_literal(&mut self) -> SpannedTokenResult {
+        let start = self.position;
+
+        let str_literal = self.eat_while(None, |ch| ch != '"');
+
+        let str_literal_token = Token::Str(str_literal);
+
+        self.next_char(); // Advance past the closing quote
+
+        let end = self.position;
+        Ok(str_literal_token.into_span(start, end))
+    }
+
+    // This differs from `eat_string_literal` in that we want the leading `f` to be captured in the Span
+    fn eat_fmt_string(&mut self) -> SpannedTokenResult {
+        let start = self.position;
+
+        self.next_char();
+
+        let str_literal = self.eat_while(None, |ch| ch != '"');
+
+        let str_literal_token = Token::FmtStr(str_literal);
+
         self.next_char(); // Advance past the closing quote
-        str_literal_token.into_span(start_span, end_span)
+
+        let end = self.position;
+        Ok(str_literal_token.into_span(start, end))
     }
 
     fn eat_format_string_or_alpha_numeric(&mut self) -> SpannedTokenResult {
         if self.peek_char_is('"') {
-            self.next_char();
-            Ok(self.eat_string_literal(true))
+            self.eat_fmt_string()
         } else {
             self.eat_alpha_numeric('f')
         }
@@ -331,7 +359,7 @@ impl<'a> Lexer<'a> {
     }
 
     fn parse_block_comment(&mut self) -> SpannedTokenResult {
-        let span = Span::new(self.position..self.position + 1);
+        let start = self.position;
         let mut depth = 1usize;
 
         while let Some(ch) = self.next_char() {
@@ -358,6 +386,7 @@ impl<'a> Lexer<'a> {
         if depth == 0 {
             self.next_token()
         } else {
+            let span = Span::inclusive(start, self.position);
             Err(LexerErrorKind::UnterminatedBlockComment { span })
         }
     }