From 0d75917f0f4407c50bfa2b8a3a510a7f89ee847e Mon Sep 17 00:00:00 2001 From: Jonathan Johnson Date: Tue, 18 Apr 2023 19:08:21 -0700 Subject: [PATCH] Added inf/NaN support Also updated the todos in the reference. --- Reference.md | 22 +++++++++++----------- src/tokenizer.rs | 24 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/Reference.md b/Reference.md index 2ac3985..0f3951c 100644 --- a/Reference.md +++ b/Reference.md @@ -75,7 +75,7 @@ Binary values are parsed after encountering `0b` while parsing an ## Float - [x] Tokenizer support - - [ ] `inf`/`NaN` support + - [x] `inf`/`NaN` support - [x] Parser support - [ ] Deserializer Support - [ ] Documentation @@ -85,40 +85,40 @@ Binary values are parsed after encountering `0b` while parsing an - [x] Tokenizer support - [x] Parser support -- [ ] Deserializer Support +- [x] Deserializer Support - [ ] Documentation ## Character -- [ ] Tokenizer support +- [x] Tokenizer support - [x] Parser support -- [ ] Deserializer Support +- [x] Deserializer Support - [ ] Documentation ## Byte -- [ ] Tokenizer support +- [x] Tokenizer support - [x] Parser support -- [ ] Deserializer Support +- [x] Deserializer Support - [ ] Documentation ## String -- [ ] Tokenizer support - - [ ] Support same whitespace rules on raw line ending escaping. +- [x] Tokenizer support + - [x] Support same whitespace rules on raw line ending escaping. - [ ] Error-by-default on multiple line ending removal with raw line ending escaping, just like rustc, but allow a parsing option that prevents the errors. - [x] Parser support -- [ ] Deserializer Support +- [x] Deserializer Support - [ ] Documentation ## Byte String -- [ ] Tokenizer support +- [x] Tokenizer support - [ ] `b64` prefixed base64-encoded byte strings - [x] Parser support -- [ ] Deserializer Support +- [x] Deserializer Support - [ ] Documentation ## Map diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 39bfa0b..5ee5448 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -583,6 +583,19 @@ impl<'a, const INCLUDE_ALL: bool> Tokenizer<'a, INCLUDE_ALL> { fn tokenize_number(&mut self, start_char: u8) -> Result, Error> { let negative = start_char == b'-'; let signed = negative || start_char == b'+'; + // Check for inf/NaN + if signed && matches!(self.chars.peek(), Some('i' | 'N')) { + let mut token = self.tokenize_identifier(None)?; + match &mut token.kind { + TokenKind::Float(float) => { + if negative { + *float = -*float + } + return Ok(token); + } + _ => return Err(Error::new(token.location, ErrorKind::ExpectedDigit)), + } + } if signed { let next_char = self.next_or_eof()?; @@ -987,6 +1000,8 @@ impl<'a, const INCLUDE_ALL: bool> Tokenizer<'a, INCLUDE_ALL> { match source { "true" if !is_raw => TokenKind::Bool(true), "false" if !is_raw => TokenKind::Bool(false), + "inf" if !is_raw => TokenKind::Float(f64::INFINITY), + "NaN" if !is_raw => TokenKind::Float(f64::NAN), _ => TokenKind::Identifier(source), }, )) @@ -1940,6 +1955,15 @@ mod tests { test_tokens("+1.0e10", &[Token::new(0..7, TokenKind::Float(1.0e10))]); test_tokens("-1e10", &[Token::new(0..5, TokenKind::Float(-1e10))]); test_tokens("+1e10", &[Token::new(0..5, TokenKind::Float(1e10))]); + test_tokens("inf", &[Token::new(0..3, TokenKind::Float(f64::INFINITY))]); + test_tokens("NaN", &[Token::new(0..3, TokenKind::Float(f64::NAN))]); + test_tokens( + "-inf", + &[Token::new(0..4, TokenKind::Float(-f64::INFINITY))], + ); + test_tokens("-NaN", &[Token::new(0..4, TokenKind::Float(-f64::NAN))]); + test_tokens("+inf", &[Token::new(0..4, TokenKind::Float(f64::INFINITY))]); + test_tokens("+NaN", &[Token::new(0..4, TokenKind::Float(f64::NAN))]); } #[test]