From 777915667e39bf7fc923d315bb1c1499e0b83244 Mon Sep 17 00:00:00 2001 From: Nicola Papale Date: Wed, 5 Jul 2023 14:43:46 +0200 Subject: [PATCH 1/4] Refactor parsing of reflect path module --- crates/bevy_reflect/src/path/mod.rs | 203 +++----------------------- crates/bevy_reflect/src/path/parse.rs | 174 ++++++++++++++++++++++ 2 files changed, 195 insertions(+), 182 deletions(-) create mode 100644 crates/bevy_reflect/src/path/parse.rs diff --git a/crates/bevy_reflect/src/path/mod.rs b/crates/bevy_reflect/src/path/mod.rs index 6ea95da503b11..36f69d5b8a894 100644 --- a/crates/bevy_reflect/src/path/mod.rs +++ b/crates/bevy_reflect/src/path/mod.rs @@ -1,49 +1,40 @@ mod access; +mod parse; use std::fmt; -use std::num::ParseIntError; use crate::Reflect; use access::Access; +use parse::PathParser; use thiserror::Error; -type ParseResult = Result; +pub use parse::ParseError; /// An error specific to accessing a field/index on a `Reflect`. #[derive(Debug, PartialEq, Eq, Error)] #[error(transparent)] pub struct AccessError<'a>(access::Error<'a>); -/// A parse error for a path string. -#[derive(Debug, PartialEq, Eq, Error)] -pub enum ReflectPathParseError { - #[error("expected an identifier at offset {offset}")] - ExpectedIdent { offset: usize }, - - #[error("encountered an unexpected token `{token}`")] - UnexpectedToken { offset: usize, token: &'static str }, - - #[error("expected token `{token}`, but it wasn't there.")] - ExpectedToken { offset: usize, token: &'static str }, - - #[error("failed to parse a usize")] - IndexParseError(#[from] ParseIntError), -} - /// An error returned from a failed path string query. #[derive(Debug, PartialEq, Eq, Error)] pub enum ReflectPathError<'a> { - #[error("{error}")] + #[error("At {offset} in path specification: {error}")] InvalidAccess { /// Position in the path string. offset: usize, error: AccessError<'a>, }, + #[error("failed to downcast to the path result to the given type")] InvalidDowncast, - #[error(transparent)] - Parse(#[from] ReflectPathParseError), + #[error("At {offset} in '{path}': {error}")] + ParseError { + /// Position in `path`. + offset: usize, + path: &'a str, + error: ParseError<'a>, + }, } /// A trait which allows nested [`Reflect`] values to be retrieved with path strings. @@ -417,139 +408,6 @@ impl fmt::Display for ParsedPath { Ok(()) } } - -struct PathParser<'a> { - path: &'a str, - index: usize, -} - -impl<'a> PathParser<'a> { - fn new(path: &'a str) -> Self { - Self { path, index: 0 } - } - - fn next_token(&mut self) -> Option> { - if self.index >= self.path.len() { - return None; - } - - match self.path[self.index..].chars().next().unwrap() { - Token::DOT => { - self.index += 1; - return Some(Token::Dot); - } - Token::CROSSHATCH => { - self.index += 1; - return Some(Token::CrossHatch); - } - Token::OPEN_BRACKET => { - self.index += 1; - return Some(Token::OpenBracket); - } - Token::CLOSE_BRACKET => { - self.index += 1; - return Some(Token::CloseBracket); - } - _ => {} - } - - // we can assume we are parsing an ident now - for (char_index, character) in self.path[self.index..].chars().enumerate() { - match character { - Token::DOT | Token::CROSSHATCH | Token::OPEN_BRACKET | Token::CLOSE_BRACKET => { - let ident = Token::Ident(&self.path[self.index..self.index + char_index]); - self.index += char_index; - return Some(ident); - } - _ => {} - } - } - let ident = Token::Ident(&self.path[self.index..]); - self.index = self.path.len(); - Some(ident) - } - - fn token_to_access(&mut self, token: Token<'a>) -> ParseResult> { - let current_offset = self.index; - match token { - Token::Dot => { - if let Some(Token::Ident(value)) = self.next_token() { - value - .parse::() - .map(Access::TupleIndex) - .or(Ok(Access::Field(value.into()))) - } else { - Err(ReflectPathParseError::ExpectedIdent { - offset: current_offset, - }) - } - } - Token::CrossHatch => { - if let Some(Token::Ident(value)) = self.next_token() { - Ok(Access::FieldIndex(value.parse::()?)) - } else { - Err(ReflectPathParseError::ExpectedIdent { - offset: current_offset, - }) - } - } - Token::OpenBracket => { - let access = if let Some(Token::Ident(value)) = self.next_token() { - Access::ListIndex(value.parse::()?) - } else { - return Err(ReflectPathParseError::ExpectedIdent { - offset: current_offset, - }); - }; - - if !matches!(self.next_token(), Some(Token::CloseBracket)) { - return Err(ReflectPathParseError::ExpectedToken { - offset: current_offset, - token: Token::OPEN_BRACKET_STR, - }); - } - - Ok(access) - } - Token::CloseBracket => Err(ReflectPathParseError::UnexpectedToken { - offset: current_offset, - token: Token::CLOSE_BRACKET_STR, - }), - Token::Ident(value) => value - .parse::() - .map(Access::TupleIndex) - .or(Ok(Access::Field(value.into()))), - } - } -} - -impl<'a> Iterator for PathParser<'a> { - type Item = (ParseResult>, usize); - - fn next(&mut self) -> Option { - let token = self.next_token()?; - let index = self.index; - Some((self.token_to_access(token), index)) - } -} - -enum Token<'a> { - Dot, - CrossHatch, - OpenBracket, - CloseBracket, - Ident(&'a str), -} - -impl<'a> Token<'a> { - const DOT: char = '.'; - const CROSSHATCH: char = '#'; - const OPEN_BRACKET: char = '['; - const CLOSE_BRACKET: char = ']'; - const OPEN_BRACKET_STR: &'static str = "["; - const CLOSE_BRACKET_STR: &'static str = "]"; -} - #[cfg(test)] #[allow(clippy::float_cmp, clippy::approx_constant)] mod tests { @@ -616,6 +474,13 @@ mod tests { Access::Field(field.into()) } + type StaticError = ReflectPathError<'static>; + + fn invalid_access(offset: usize, actual: TypeShape, expected: TypeShape) -> StaticError { + let error = AccessError(access::Error::Type { actual, expected }); + ReflectPathError::InvalidAccess { offset, error } + } + #[test] fn parsed_path_parse() { assert_eq!( @@ -791,40 +656,14 @@ mod tests { }), } ); - - assert_eq!( - a.reflect_path("x..").err().unwrap(), - ReflectPathError::Parse(ReflectPathParseError::ExpectedIdent { offset: 2 }) - ); - assert_eq!( a.reflect_path("x[0]").err().unwrap(), - ReflectPathError::InvalidAccess { - offset: 2, - error: AccessError(access::Error::Type { - actual: TypeShape::Struct, - expected: TypeShape::List - }), - } + invalid_access(2, TypeShape::Struct, TypeShape::List) ); - assert_eq!( a.reflect_path("y.x").err().unwrap(), - ReflectPathError::InvalidAccess { - offset: 2, - error: AccessError(access::Error::Type { - actual: TypeShape::List, - expected: TypeShape::Struct - }), - } + invalid_access(2, TypeShape::List, TypeShape::Struct) ); - - assert!(matches!( - a.reflect_path("y[badindex]"), - Err(ReflectPathError::Parse( - ReflectPathParseError::IndexParseError(_) - )) - )); } #[test] diff --git a/crates/bevy_reflect/src/path/parse.rs b/crates/bevy_reflect/src/path/parse.rs new file mode 100644 index 0000000000000..b1e5be1be235b --- /dev/null +++ b/crates/bevy_reflect/src/path/parse.rs @@ -0,0 +1,174 @@ +use std::{fmt, num::ParseIntError}; + +use thiserror::Error; + +use super::{Access, ReflectPathError}; + +#[derive(Debug, PartialEq, Eq, Error)] +#[error(transparent)] +pub struct ParseError<'a>(Error<'a>); + +/// A parse error for a path string. +#[derive(Debug, PartialEq, Eq, Error)] +enum Error<'a> { + #[error("expected an identifier, but reached end of path string")] + NoIdent, + + #[error("expected an identifier, got '{0}' instead")] + ExpectedIdent(Token<'a>), + + #[error("failed to parse index as integer")] + IndexParse(#[from] ParseIntError), + + #[error("A '[' wasn't closed, reached end of path string before finding a ']'")] + Unclosed, + + #[error("A '[' wasn't closed properly, got '{0}' instead")] + BadClose(Token<'a>), + + #[error("A ']' was found before an opening '['")] + CloseBeforeOpen, +} + +pub(super) struct PathParser<'a> { + path: &'a str, + offset: usize, +} +impl<'a> PathParser<'a> { + pub(super) fn new(path: &'a str) -> Self { + PathParser { path, offset: 0 } + } + + fn next_token(&mut self) -> Option> { + // TODO(perf): using get_unchecked SAFETY: self.offset is never larger than self.path + // & self.offset always splits on UTF-8 sequence boundaries + let input = &self.path[self.offset..]; + + // Return with `None` if empty. + let first_char = input.chars().next()?; + + if let Some(token) = Token::symbol_from_char(first_char) { + self.offset += 1; // NOTE: we assume all symbols are ASCII + return Some(token); + } + // We are parsing either `0123` or `field`. + // If we do not find a subsequent token, we are at the end of the parse string. + let ident = input.split_once(Token::SYMBOLS).map_or(input, |t| t.0); + + self.offset += ident.len(); + Some(Token::Ident(Ident(ident))) + } + + fn next_ident(&mut self) -> Result, Error<'a>> { + match self.next_token() { + Some(Token::Ident(ident)) => Ok(ident), + Some(other) => Err(Error::ExpectedIdent(other)), + None => Err(Error::NoIdent), + } + } + + fn access_following(&mut self, token: Token<'a>) -> Result, Error<'a>> { + match token { + Token::Dot => Ok(self.next_ident()?.field()), + Token::CrossHatch => self.next_ident()?.field_index(), + Token::Ident(ident) => Ok(ident.field()), + Token::CloseBracket => Err(Error::CloseBeforeOpen), + Token::OpenBracket => { + let index_ident = self.next_ident()?.index()?; + match self.next_token() { + Some(Token::CloseBracket) => Ok(index_ident), + Some(other) => Err(Error::BadClose(other)), + None => Err(Error::Unclosed), + } + } + } + } +} +impl<'a> Iterator for PathParser<'a> { + type Item = (Result, ReflectPathError<'a>>, usize); + + fn next(&mut self) -> Option { + let token = self.next_token()?; + let offset = self.offset; + let err = |error| ReflectPathError::ParseError { + offset, + path: self.path, + error: ParseError(error), + }; + Some((self.access_following(token).map_err(err), offset)) + } +} + +#[derive(Debug, PartialEq, Eq)] +struct Ident<'a>(&'a str); + +impl<'a> Ident<'a> { + fn field(self) -> Access<'a> { + let field = |_| Access::Field(self.0.into()); + self.0.parse().map(Access::TupleIndex).unwrap_or_else(field) + } + fn field_index(self) -> Result, Error<'a>> { + Ok(Access::FieldIndex(self.0.parse()?)) + } + fn index(self) -> Result, Error<'a>> { + Ok(Access::ListIndex(self.0.parse()?)) + } +} + +#[derive(Debug, PartialEq, Eq)] +enum Token<'a> { + Dot, + CrossHatch, + OpenBracket, + CloseBracket, + Ident(Ident<'a>), +} +impl fmt::Display for Token<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Token::Dot => f.write_str("."), + Token::CrossHatch => f.write_str("#"), + Token::OpenBracket => f.write_str("["), + Token::CloseBracket => f.write_str("]"), + Token::Ident(ident) => f.write_str(ident.0), + } + } +} +impl<'a> Token<'a> { + const SYMBOLS: &[char] = &['.', '#', '[', ']']; + fn symbol_from_char(char: char) -> Option { + match char { + '.' => Some(Self::Dot), + '#' => Some(Self::CrossHatch), + '[' => Some(Self::OpenBracket), + ']' => Some(Self::CloseBracket), + _ => None, + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::path::ParsedPath; + + #[test] + fn parse_invalid() { + assert_eq!( + ParsedPath::parse_static("x.."), + Err(ReflectPathError::ParseError { + error: ParseError(Error::ExpectedIdent(Token::Dot)), + offset: 2, + path: "x..", + }), + ); + assert!(matches!( + ParsedPath::parse_static("y[badindex]"), + Err(ReflectPathError::ParseError { + error: ParseError(Error::IndexParse(_)), + offset: 2, + path: "y[badindex]", + }), + )); + } +} From 26dae40d200f9f76a2dc9b76669ce7c8a18f04b2 Mon Sep 17 00:00:00 2001 From: Nicola Papale Date: Mon, 31 Jul 2023 12:09:34 +0200 Subject: [PATCH 2/4] Remove remark on unsafe performance --- crates/bevy_reflect/src/path/parse.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/bevy_reflect/src/path/parse.rs b/crates/bevy_reflect/src/path/parse.rs index b1e5be1be235b..c515f3884bf7e 100644 --- a/crates/bevy_reflect/src/path/parse.rs +++ b/crates/bevy_reflect/src/path/parse.rs @@ -40,8 +40,6 @@ impl<'a> PathParser<'a> { } fn next_token(&mut self) -> Option> { - // TODO(perf): using get_unchecked SAFETY: self.offset is never larger than self.path - // & self.offset always splits on UTF-8 sequence boundaries let input = &self.path[self.offset..]; // Return with `None` if empty. From 4c36831d8c82353e348a8f50b5d257296b95306c Mon Sep 17 00:00:00 2001 From: Nicola Papale Date: Thu, 3 Aug 2023 16:34:28 +0200 Subject: [PATCH 3/4] Implement MrGVSV suggestions --- crates/bevy_reflect/src/path/mod.rs | 4 ++-- crates/bevy_reflect/src/path/parse.rs | 15 ++++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/crates/bevy_reflect/src/path/mod.rs b/crates/bevy_reflect/src/path/mod.rs index 36f69d5b8a894..ae06fc89ad27d 100644 --- a/crates/bevy_reflect/src/path/mod.rs +++ b/crates/bevy_reflect/src/path/mod.rs @@ -18,7 +18,7 @@ pub struct AccessError<'a>(access::Error<'a>); /// An error returned from a failed path string query. #[derive(Debug, PartialEq, Eq, Error)] pub enum ReflectPathError<'a> { - #[error("At {offset} in path specification: {error}")] + #[error("at {offset} in path specification: {error}")] InvalidAccess { /// Position in the path string. offset: usize, @@ -28,7 +28,7 @@ pub enum ReflectPathError<'a> { #[error("failed to downcast to the path result to the given type")] InvalidDowncast, - #[error("At {offset} in '{path}': {error}")] + #[error("at {offset} in '{path}': {error}")] ParseError { /// Position in `path`. offset: usize, diff --git a/crates/bevy_reflect/src/path/parse.rs b/crates/bevy_reflect/src/path/parse.rs index c515f3884bf7e..10975ca547e1f 100644 --- a/crates/bevy_reflect/src/path/parse.rs +++ b/crates/bevy_reflect/src/path/parse.rs @@ -4,6 +4,7 @@ use thiserror::Error; use super::{Access, ReflectPathError}; +/// An error that occurs when parsing reflect path strings. #[derive(Debug, PartialEq, Eq, Error)] #[error(transparent)] pub struct ParseError<'a>(Error<'a>); @@ -18,15 +19,15 @@ enum Error<'a> { ExpectedIdent(Token<'a>), #[error("failed to parse index as integer")] - IndexParse(#[from] ParseIntError), + InvalidIndex(#[from] ParseIntError), - #[error("A '[' wasn't closed, reached end of path string before finding a ']'")] + #[error("a '[' wasn't closed, reached end of path string before finding a ']'")] Unclosed, - #[error("A '[' wasn't closed properly, got '{0}' instead")] + #[error("a '[' wasn't closed properly, got '{0}' instead")] BadClose(Token<'a>), - #[error("A ']' was found before an opening '['")] + #[error("a ']' was found before an opening '['")] CloseBeforeOpen, } @@ -72,7 +73,7 @@ impl<'a> PathParser<'a> { Token::Ident(ident) => Ok(ident.field()), Token::CloseBracket => Err(Error::CloseBeforeOpen), Token::OpenBracket => { - let index_ident = self.next_ident()?.index()?; + let index_ident = self.next_ident()?.list_index()?; match self.next_token() { Some(Token::CloseBracket) => Ok(index_ident), Some(other) => Err(Error::BadClose(other)), @@ -108,7 +109,7 @@ impl<'a> Ident<'a> { fn field_index(self) -> Result, Error<'a>> { Ok(Access::FieldIndex(self.0.parse()?)) } - fn index(self) -> Result, Error<'a>> { + fn list_index(self) -> Result, Error<'a>> { Ok(Access::ListIndex(self.0.parse()?)) } } @@ -163,7 +164,7 @@ mod test { assert!(matches!( ParsedPath::parse_static("y[badindex]"), Err(ReflectPathError::ParseError { - error: ParseError(Error::IndexParse(_)), + error: ParseError(Error::InvalidIndex(_)), offset: 2, path: "y[badindex]", }), From e49f27323bbde47bdab0da30bbf9d9b023ddb926 Mon Sep 17 00:00:00 2001 From: Nicola Papale Date: Sat, 5 Aug 2023 12:59:33 +0200 Subject: [PATCH 4/4] Replace CrossHatch with Pound --- crates/bevy_reflect/src/path/parse.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/bevy_reflect/src/path/parse.rs b/crates/bevy_reflect/src/path/parse.rs index 10975ca547e1f..fc990deb00a45 100644 --- a/crates/bevy_reflect/src/path/parse.rs +++ b/crates/bevy_reflect/src/path/parse.rs @@ -69,7 +69,7 @@ impl<'a> PathParser<'a> { fn access_following(&mut self, token: Token<'a>) -> Result, Error<'a>> { match token { Token::Dot => Ok(self.next_ident()?.field()), - Token::CrossHatch => self.next_ident()?.field_index(), + Token::Pound => self.next_ident()?.field_index(), Token::Ident(ident) => Ok(ident.field()), Token::CloseBracket => Err(Error::CloseBeforeOpen), Token::OpenBracket => { @@ -117,7 +117,7 @@ impl<'a> Ident<'a> { #[derive(Debug, PartialEq, Eq)] enum Token<'a> { Dot, - CrossHatch, + Pound, OpenBracket, CloseBracket, Ident(Ident<'a>), @@ -126,7 +126,7 @@ impl fmt::Display for Token<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Token::Dot => f.write_str("."), - Token::CrossHatch => f.write_str("#"), + Token::Pound => f.write_str("#"), Token::OpenBracket => f.write_str("["), Token::CloseBracket => f.write_str("]"), Token::Ident(ident) => f.write_str(ident.0), @@ -138,7 +138,7 @@ impl<'a> Token<'a> { fn symbol_from_char(char: char) -> Option { match char { '.' => Some(Self::Dot), - '#' => Some(Self::CrossHatch), + '#' => Some(Self::Pound), '[' => Some(Self::OpenBracket), ']' => Some(Self::CloseBracket), _ => None,