Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add numeric separator lexing #995

Merged
merged 2 commits into from
Dec 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion boa/src/syntax/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,13 @@ impl<R> Lexer<R> {
Punctuator::Colon.into(),
Span::new(start, self.cursor.pos()),
)),
'.' => SpreadLiteral::new().lex(&mut self.cursor, start),
'.' => {
if self.cursor.peek()?.map(|c| c >= b'0' && c <= b'9') == Some(true) {
NumberLiteral::new(next_ch as u8).lex(&mut self.cursor, start)
} else {
SpreadLiteral::new().lex(&mut self.cursor, start)
}
}
'(' => Ok(Token::new(
Punctuator::OpenParen.into(),
Span::new(start, self.cursor.pos()),
Expand Down
93 changes: 77 additions & 16 deletions boa/src/syntax/lexer/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,52 @@ where
}

// Consume the decimal digits.
cursor.take_while_ascii_pred(buf, &|ch| ch.is_digit(kind.base()))?;
take_integer(buf, cursor, kind, true)?;

Ok(())
}

fn take_integer<R>(
buf: &mut Vec<u8>,
cursor: &mut Cursor<R>,
kind: &NumericKind,
separator_allowed: bool,
) -> Result<(), Error>
where
R: Read,
{
let mut prev_is_underscore = false;
let mut pos = cursor.pos();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need the pos variable, cant we call cursor.pos() where we need the position?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to have the correct position when the separator is at the end of the number. If we take cursor.pos(), then the position is after the separator, on not on the separator.

while cursor.next_is_ascii_pred(&|c| c.is_digit(kind.base()) || c == '_')? {
pos = cursor.pos();
match cursor.next_byte()? {
Some(c) if char::from(c).is_digit(kind.base()) => {
prev_is_underscore = false;
buf.push(c);
}
Some(b'_') if separator_allowed => {
if prev_is_underscore {
return Err(Error::syntax(
"only one underscore is allowed as numeric separator",
cursor.pos(),
));
}
prev_is_underscore = true;
}
Some(b'_') if !separator_allowed => {
return Err(Error::syntax("separator is not allowed", pos));
}
_ => (),
}
}
if prev_is_underscore {
return Err(Error::syntax(
"underscores are not allowed at the end of numeric literals",
pos,
));
}
Ok(())
}
/// Utility function for checking the NumericLiteral is not followed by an `IdentifierStart` or `DecimalDigit` character.
///
/// More information:
Expand Down Expand Up @@ -149,6 +190,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
let mut kind = NumericKind::Integer(10);

let c = cursor.peek();
let mut legacy_octal = false;

if self.init == b'0' {
if let Some(ch) = c? {
Expand Down Expand Up @@ -180,7 +222,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
// Checks if the next char after '0o' is a digit of that base. if not return an error.
if !cursor.next_is_ascii_pred(&|ch| ch.is_digit(8))? {
return Err(Error::syntax(
"expected hexadecimal digit after number base prefix",
"expected octal digit after number base prefix",
cursor.pos(),
));
}
Expand All @@ -196,7 +238,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
// Checks if the next char after '0b' is a digit of that base. if not return an error.
if !cursor.next_is_ascii_pred(&|ch| ch.is_digit(2))? {
return Err(Error::syntax(
"expected hexadecimal digit after number base prefix",
"expected binary digit after number base prefix",
cursor.pos(),
));
}
Expand All @@ -211,6 +253,7 @@ impl<R> Tokenizer<R> for NumberLiteral {
));
}
byte => {
legacy_octal = true;
let ch = char::from(byte);
if ch.is_digit(8) {
// LegacyOctalIntegerLiteral
Expand All @@ -237,8 +280,6 @@ impl<R> Tokenizer<R> for NumberLiteral {
"leading 0's are not allowed in strict mode",
start_pos,
));
} else {
buf.push(cursor.next_byte()?.expect("Number digit vanished"));
}
} // Else indicates that the symbol is a non-number.
}
Expand All @@ -253,34 +294,54 @@ impl<R> Tokenizer<R> for NumberLiteral {
}
}

// Consume digits until a non-digit character is encountered or all the characters are consumed.
cursor.take_while_ascii_pred(&mut buf, &|c: char| c.is_digit(kind.base()))?;
let next = if self.init == b'.' {
Some(b'.')
} else {
// Consume digits and separators until a non-digit non-separator
// character is encountered or all the characters are consumed.
take_integer(&mut buf, cursor, &kind, !legacy_octal)?;
cursor.peek()?
};

// The non-digit character could be:
// 'n' To indicate a BigIntLiteralSuffix.
// '.' To indicate a decimal seperator.
// '.' To indicate a decimal separator.
// 'e' | 'E' To indicate an ExponentPart.
match cursor.peek()? {
match next {
Some(b'n') => {
// DecimalBigIntegerLiteral
// Lexing finished.

// Consume the n
if legacy_octal {
return Err(Error::syntax(
"'n' suffix not allowed in octal representation",
cursor.pos(),
));
}
cursor.next_byte()?.expect("n character vanished");

kind = kind.to_bigint();
}
Some(b'.') => {
if kind.base() == 10 {
// Only base 10 numbers can have a decimal seperator.
// Only base 10 numbers can have a decimal separator.
// Number literal lexing finished if a . is found for a number in a different base.

cursor.next_byte()?.expect(". token vanished");
buf.push(b'.'); // Consume the .
if self.init != b'.' {
cursor.next_byte()?.expect("'.' token vanished");
buf.push(b'.'); // Consume the .
}
kind = NumericKind::Rational;

// Consume digits until a non-digit character is encountered or all the characters are consumed.
cursor.take_while_ascii_pred(&mut buf, &|c: char| c.is_digit(kind.base()))?;
if cursor.peek()? == Some(b'_') {
return Err(Error::syntax(
"numeric separator not allowed after '.'",
cursor.pos(),
));
}

// Consume digits and separators until a non-digit non-separator
// character is encountered or all the characters are consumed.
take_integer(&mut buf, cursor, &kind, true)?;

// The non-digit character at this point must be an 'e' or 'E' to indicate an Exponent Part.
// Another '.' or 'n' is not allowed.
Expand Down
41 changes: 39 additions & 2 deletions boa/src/syntax/lexer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,44 @@ fn numbers() {
expect_tokens(&mut lexer, &expected);
}

#[test]
fn numbers_with_separators() {
let mut lexer = Lexer::new(
"1_0 2_0 0x3_4 056 7.8_9 4_2. 5_0e2 5_0e+2 5_0e-4 0b1_0 1_0.0_0e2 1.0E-0_1 -3_2".as_bytes(),
);

let expected = [
TokenKind::numeric_literal(10),
TokenKind::numeric_literal(20),
TokenKind::numeric_literal(52),
TokenKind::numeric_literal(46),
TokenKind::numeric_literal(7.89),
TokenKind::numeric_literal(42),
TokenKind::numeric_literal(5000),
TokenKind::numeric_literal(5000),
TokenKind::numeric_literal(0.005),
TokenKind::numeric_literal(2),
TokenKind::numeric_literal(1000),
TokenKind::numeric_literal(0.1),
TokenKind::Punctuator(Punctuator::Sub),
TokenKind::numeric_literal(32),
];

expect_tokens(&mut lexer, &expected);
}

#[test]
fn numbers_with_bad_separators() {
let numbers = [
"0b_10", "0x_10", "10_", "1._10", "1e+_10", "1E_10", "10__00",
];

for n in numbers.iter() {
let mut lexer = Lexer::new(n.as_bytes());
assert!(lexer.next().is_err());
}
}

#[test]
fn big_exp_numbers() {
let mut lexer = Lexer::new(&b"1.0e25 1.0e36 9.0e50"[..]);
Expand Down Expand Up @@ -418,8 +456,7 @@ fn implicit_octal_edge_case() {

let expected = [
TokenKind::numeric_literal(36),
TokenKind::Punctuator(Punctuator::Dot),
TokenKind::numeric_literal(5),
TokenKind::numeric_literal(0.5),
TokenKind::numeric_literal(94.5),
];

Expand Down