Skip to content

Commit

Permalink
Added implicit map parsing mode
Browse files Browse the repository at this point in the history
This commit adds the concept of an implicit map -- detecting where the
root starts with `Identifier Colon` and switching into map parsing.

I commented on #6 about the remaining "edge case" I hadn't thought of --
handling of commas vs new lines in this mode. Currently, commas are
still required, and whitespace is insignificant, but this likely will be
updated to support removing commas between key-value pairs as long as
there's an end-of-line.
  • Loading branch information
ecton committed Apr 18, 2023
1 parent c3de35a commit 0463012
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 40 deletions.
30 changes: 25 additions & 5 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ use core::iter::Peekable;
use serde::de::{EnumAccess, MapAccess, SeqAccess, VariantAccess};
use serde::Deserializer as _;

use crate::parser::{Error, Event, Nested, Parser, Primitive};
use crate::parser::{Config, Error, Event, Nested, Parser, Primitive};

pub struct Deserializer<'de> {
parser: Peekable<Parser<'de>>,
}

impl<'de> Deserializer<'de> {
pub fn new(source: &'de str) -> Self {
pub fn new(source: &'de str, config: Config) -> Self {
Self {
parser: Parser::new(source).peekable(),
parser: Parser::new(source, config).peekable(),
}
}
}
Expand Down Expand Up @@ -319,8 +319,8 @@ impl<'de> serde::de::Deserializer<'de> for &mut Deserializer<'de> {
todo!("expected a map")
}
}
Some(_other) => {
todo!("expected struct")
Some(other) => {
todo!("expected struct, got {other:?}")
}
None => todo!("unexpected eof"),
}
Expand Down Expand Up @@ -486,6 +486,8 @@ impl serde::de::Error for Error {
mod tests {
use serde::{Deserialize, Serialize};

use crate::parser::Config;

#[test]
fn basic_named() {
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq)]
Expand All @@ -497,6 +499,24 @@ mod tests {
assert_eq!(parsed, BasicNamed { a: 1, b: -1 });
}

#[test]
fn implicit_map() {
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq)]
struct BasicNamed {
a: u32,
b: i32,
}
let parsed = BasicNamed::deserialize(&mut crate::de::Deserializer::new(
r#"a: 1,
b: -1"#,
Config {
allow_implicit_map: true,
},
))
.unwrap();
assert_eq!(parsed, BasicNamed { a: 1, b: -1 });
}

#[test]
fn optional() {
#[derive(Debug, Serialize, Deserialize, Eq, PartialEq)]
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub mod value;

#[cfg(feature = "serde")]
pub fn from_str<'de, D: serde::Deserialize<'de>>(source: &'de str) -> Result<D, parser::Error> {
let mut parser = de::Deserializer::new(source);
let mut parser = de::Deserializer::new(source, parser::Config::default());
// TODO verify eof
D::deserialize(&mut parser)
}
148 changes: 116 additions & 32 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,23 @@ use core::ops::Range;

use crate::tokenizer::{self, Balanced, Integer, Token, TokenKind, Tokenizer};

#[derive(Debug)]
pub struct Parser<'s> {
tokens: Tokenizer<'s, false>,
peeked: Option<Result<Token<'s>, tokenizer::Error>>,
nested: Vec<NestedState>,
finished: bool,
root_state: State<'s>,
config: Config,
}

impl<'s> Parser<'s> {
pub fn new(source: &'s str) -> Self {
pub fn new(source: &'s str, config: Config) -> Self {
Self {
tokens: Tokenizer::minified(source),
peeked: None,
nested: Vec::new(),
finished: false,
root_state: State::AtStart,
config,
}
}

Expand All @@ -45,6 +48,13 @@ impl<'s> Parser<'s> {
}
}

fn pop_nested_state(&mut self) {
if self.nested.pop().is_none() {
debug_assert!(matches!(self.root_state, State::ImplicitMap(_)));
self.root_state = State::Finished;
}
}

fn parse_token(
&mut self,
token: Token<'s>,
Expand Down Expand Up @@ -117,7 +127,7 @@ impl<'s> Parser<'s> {
})
}
TokenKind::Close(closed) if Some(closed) == allowed_close => {
self.nested.pop();
self.pop_nested_state();
Ok(Event::EndNested)
}
TokenKind::Colon | TokenKind::Comma | TokenKind::Close(_) => {
Expand All @@ -140,7 +150,7 @@ impl<'s> Parser<'s> {
let token = self.next_or_eof()?;
match token.kind {
TokenKind::Close(closed) if closed == end => {
self.nested.pop();
self.pop_nested_state();
Ok(Event::EndNested)
}
TokenKind::Comma => {
Expand All @@ -154,44 +164,55 @@ impl<'s> Parser<'s> {
}
}

fn map_state_mut(&mut self) -> &mut MapState {
if let Some(nested) = self.nested.last_mut() {
let NestedState::Map(map_state) = nested else { unreachable!("not a map state") };
map_state
} else {
let State::ImplicitMap(map_state) = &mut self.root_state else { unreachable!("not a map state") };
map_state
}
}

fn parse_map(&mut self, state: MapState) -> Result<Event<'s>, Error> {
match state {
MapState::ExpectingKey => {
*self.nested.last_mut().expect("required for this fn") =
NestedState::Map(MapState::ExpectingColon);
*self.map_state_mut() = MapState::ExpectingColon;

let token = self.next_or_eof()?;
self.parse_token(token, Some(Balanced::Brace))
}
MapState::ExpectingColon => {
let token = self.next_or_eof()?;
if matches!(token.kind, TokenKind::Colon) {
*self.nested.last_mut().expect("required for this fn") =
NestedState::Map(MapState::ExpectingValue);
*self.map_state_mut() = MapState::ExpectingValue;
self.parse_map(MapState::ExpectingValue)
} else {
todo!("expected colon, got {token:?}")
}
}
MapState::ExpectingValue => {
*self.nested.last_mut().expect("required for this fn") =
NestedState::Map(MapState::ExpectingComma);
*self.map_state_mut() = MapState::ExpectingComma;

let token = self.next_or_eof()?;
self.parse_token(token, Some(Balanced::Brace))
}
MapState::ExpectingComma => {
let token = self.next_or_eof()?;
match token.kind {
TokenKind::Close(closed) if closed == Balanced::Brace => {
self.nested.pop();
let token = self.next_token().transpose()?.map(|token| token.kind);
match token {
Some(TokenKind::Close(closed)) if closed == Balanced::Brace => {
self.pop_nested_state();
Ok(Event::EndNested)
}
TokenKind::Comma => {
*self.nested.last_mut().expect("required for this fn") =
NestedState::Map(MapState::ExpectingKey);
Some(TokenKind::Comma) => {
*self.map_state_mut() = MapState::ExpectingKey;
self.parse_map(MapState::ExpectingKey)
}
None if self.nested.is_empty() => {
// Implicit map
self.pop_nested_state();
Ok(Event::EndNested)
}
_ => todo!("expected comma or end"),
}
}
Expand All @@ -204,24 +225,85 @@ impl<'s> Iterator for Parser<'s> {

fn next(&mut self) -> Option<Self::Item> {
Some(match self.nested.last() {
None => match self.next_token()? {
Ok(token) => {
if self.finished {
todo!("error: trailing junk")
None => match &self.root_state {
State::AtStart => {
let token = match self.next_token()? {
Ok(token) => token,
Err(err) => return Some(Err(err.into())),
};
if self.config.allow_implicit_map
&& matches!(token.kind, TokenKind::Identifier(_))
{
let TokenKind::Identifier(identifier) = token.kind
else { unreachable!("just matched")};
match self.peek() {
Some(token) if matches!(token.kind, TokenKind::Colon) => {
// Switch to parsing an implicit map
self.root_state = State::StartingImplicitMap(identifier);
Ok(Event::BeginNested {
name: None,
kind: Nested::Map,
})
}
Some(token)
if matches!(
token.kind,
TokenKind::Open(Balanced::Brace | Balanced::Paren,)
) =>
{
let Some(Ok(Token{ kind: TokenKind::Open(kind), ..})) = self.next_token()
else { unreachable!("just peeked") };
self.root_state = State::Finished;
Ok(Event::BeginNested {
name: Some(identifier),
kind: match kind {
Balanced::Paren => Nested::Tuple,
Balanced::Brace => Nested::Map,
Balanced::Bracket => {
unreachable!("not matched in peek")
}
},
})
}
_ => {
self.root_state = State::Finished;
Ok(Event::Primitive(Primitive::Identifier(identifier)))
}
}
} else {
self.root_state = State::Finished;
self.parse_token(token, None)
}

self.finished = true;
self.parse_token(token, None)
}
Err(err) => Err(err.into()),
State::StartingImplicitMap(_) => {
let State::StartingImplicitMap(first_key) = std::mem::replace(&mut self.root_state, State::ImplicitMap(MapState::ExpectingColon))
else { unreachable!("just matched") };
Ok(Event::Primitive(Primitive::Identifier(first_key)))
}
State::ImplicitMap(state) => self.parse_map(*state),
State::Finished => todo!("error: trailing junk"),
},

Some(NestedState::Tuple(list)) => self.parse_sequence(*list, Balanced::Paren),
Some(NestedState::List(list)) => self.parse_sequence(*list, Balanced::Bracket),
Some(NestedState::Map(map)) => self.parse_map(*map),
})
}
}

#[derive(Default, Debug)]
pub struct Config {
pub allow_implicit_map: bool,
}

#[derive(Debug, Clone, Eq, PartialEq)]
enum State<'s> {
AtStart,
StartingImplicitMap(Cow<'s, str>),
ImplicitMap(MapState),
Finished,
}

#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Error {
pub location: Range<usize>,
Expand Down Expand Up @@ -325,7 +407,7 @@ mod tests {
use super::*;
#[test]
fn number_array() {
let events = Parser::new("[1,2,3]")
let events = Parser::new("[1,2,3]", Config::default())
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
Expand All @@ -341,7 +423,7 @@ mod tests {
Event::EndNested,
]
);
let events = Parser::new("[1,2,3,]")
let events = Parser::new("[1,2,3,]", Config::default())
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
Expand All @@ -358,9 +440,10 @@ mod tests {
]
);
}

#[test]
fn number_tuple() {
let events = Parser::new("(1,2,3)")
let events = Parser::new("(1,2,3)", Config::default())
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
Expand All @@ -376,7 +459,7 @@ mod tests {
Event::EndNested,
]
);
let events = Parser::new("(1,2,3,)")
let events = Parser::new("(1,2,3,)", Config::default())
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
Expand All @@ -393,9 +476,10 @@ mod tests {
]
);
}

#[test]
fn number_map() {
let events = Parser::new("{a:1,b:2}")
let events = Parser::new("{a:1,b:2}", Config::default())
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
Expand All @@ -412,7 +496,7 @@ mod tests {
Event::EndNested,
]
);
let events = Parser::new("{a:1,b:2,}")
let events = Parser::new("{a:1,b:2,}", Config::default())
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
Expand Down
2 changes: 1 addition & 1 deletion src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ pub enum Balanced {
Bracket,
}

#[derive(Clone)]
#[derive(Debug, Clone)]
pub struct Tokenizer<'a, const INCLUDE_ALL: bool> {
chars: CharIterator<'a>,
scratch: String,
Expand Down
2 changes: 1 addition & 1 deletion src/tokenizer/char_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use core::iter::Peekable;
use core::ops::Range;
use core::str::CharIndices;

#[derive(Clone)]
#[derive(Debug, Clone)]
pub struct CharIterator<'a> {
pub source: &'a str,
chars: Peekable<CharIndices<'a>>,
Expand Down

0 comments on commit 0463012

Please sign in to comment.