From 2c2d7492d2572f51b3a3365adf85649bfea25a1a Mon Sep 17 00:00:00 2001 From: Joss Date: Thu, 4 May 2023 12:39:16 +0100 Subject: [PATCH 1/7] chore(parser): optimize errors by: - switching labels to enums - Using LateAllocSet in place of BTreeSet --- crates/noirc_frontend/src/lexer/token.rs | 2 +- crates/noirc_frontend/src/parser/errors.rs | 45 ++-- .../src/parser/errors/late_alloc_set.rs | 214 ++++++++++++++++++ crates/noirc_frontend/src/parser/labels.rs | 40 ++++ crates/noirc_frontend/src/parser/mod.rs | 1 + crates/noirc_frontend/src/parser/parser.rs | 57 +++-- 6 files changed, 309 insertions(+), 50 deletions(-) create mode 100644 crates/noirc_frontend/src/parser/errors/late_alloc_set.rs create mode 100644 crates/noirc_frontend/src/parser/labels.rs diff --git a/crates/noirc_frontend/src/lexer/token.rs b/crates/noirc_frontend/src/lexer/token.rs index bfcd0f4be51..fe0e3bf1f90 100644 --- a/crates/noirc_frontend/src/lexer/token.rs +++ b/crates/noirc_frontend/src/lexer/token.rs @@ -189,7 +189,7 @@ impl fmt::Display for Token { } } -#[derive(PartialEq, Eq, Hash, Debug, Clone)] +#[derive(PartialEq, Eq, Hash, Debug, Clone, Ord, PartialOrd)] /// The different kinds of tokens that are possible in the target language pub enum TokenKind { Token(Token), diff --git a/crates/noirc_frontend/src/parser/errors.rs b/crates/noirc_frontend/src/parser/errors.rs index 7f19ef7f062..d1b86a4f040 100644 --- a/crates/noirc_frontend/src/parser/errors.rs +++ b/crates/noirc_frontend/src/parser/errors.rs @@ -1,16 +1,19 @@ -use std::collections::BTreeSet; - use crate::lexer::token::Token; use crate::BinaryOp; +use late_alloc_set::LateAllocSet; use iter_extended::vecmap; use noirc_errors::CustomDiagnostic as Diagnostic; use noirc_errors::Span; +use super::labels::ParserLabel; + +mod late_alloc_set; + #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserError { - expected_tokens: BTreeSet, - expected_labels: BTreeSet, + expected_tokens: LateAllocSet, + expected_labels: LateAllocSet, found: Token, reason: Option, span: Span, @@ -19,23 +22,17 @@ pub struct ParserError { impl ParserError { pub fn empty(found: Token, span: Span) -> ParserError { ParserError { - expected_tokens: BTreeSet::new(), - expected_labels: BTreeSet::new(), + expected_tokens: LateAllocSet::None, + expected_labels: LateAllocSet::None, found, reason: None, span, } } - pub fn expected(token: Token, found: Token, span: Span) -> ParserError { - let mut error = ParserError::empty(found, span); - error.expected_tokens.insert(token); - error - } - - pub fn expected_label(label: String, found: Token, span: Span) -> ParserError { + pub fn expected_label(label: ParserLabel, found: Token, span: Span) -> ParserError { let mut error = ParserError::empty(found, span); - error.expected_labels.insert(label); + error.expected_labels = error.expected_labels.insert(label); error } @@ -58,8 +55,8 @@ impl ParserError { impl std::fmt::Display for ParserError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut expected = vecmap(&self.expected_tokens, ToString::to_string); - expected.append(&mut vecmap(&self.expected_labels, Clone::clone)); + let mut expected = vecmap(self.expected_tokens.as_vec(), ToString::to_string); + expected.append(&mut vecmap(self.expected_labels.as_vec(), |ref_str| format!("{ref_str}"))); if expected.is_empty() { write!(f, "Unexpected {} in input", self.found) @@ -95,7 +92,7 @@ impl From for Diagnostic { impl chumsky::Error for ParserError { type Span = Span; - type Label = String; + type Label = ParserLabel; fn expected_input_found(span: Self::Span, expected: Iter, found: Option) -> Self where @@ -103,7 +100,7 @@ impl chumsky::Error for ParserError { { ParserError { expected_tokens: expected.into_iter().map(|opt| opt.unwrap_or(Token::EOF)).collect(), - expected_labels: BTreeSet::new(), + expected_labels: LateAllocSet::None, found: found.unwrap_or(Token::EOF), reason: None, span, @@ -111,9 +108,9 @@ impl chumsky::Error for ParserError { } fn with_label(mut self, label: Self::Label) -> Self { - self.expected_tokens.clear(); - self.expected_labels.clear(); - self.expected_labels.insert(label); + self.expected_tokens = LateAllocSet::None; + self.expected_labels = LateAllocSet::None; + self.expected_labels = self.expected_labels.insert(label); self } @@ -122,9 +119,9 @@ impl chumsky::Error for ParserError { // that reason and discard the other if present. // The spans of both errors must match, otherwise the error // messages and error spans may not line up. - fn merge(mut self, mut other: Self) -> Self { - self.expected_tokens.append(&mut other.expected_tokens); - self.expected_labels.append(&mut other.expected_labels); + fn merge(mut self, other: Self) -> Self { + self.expected_tokens = self.expected_tokens.append(other.expected_tokens); + self.expected_labels = self.expected_labels.append(other.expected_labels); if self.reason.is_none() { self.reason = other.reason; diff --git a/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs b/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs new file mode 100644 index 00000000000..04aab575f78 --- /dev/null +++ b/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs @@ -0,0 +1,214 @@ +//! `LateAllocSet` is an alternative to `BTreeSet` optimized for small sets that can be located +//! entirely in stack memory. Once the set size goes beyond 3, performance is less than that of a +//! `BTreeMap`. +//! +//! Approximately 20-50 times faster than `BTreeSet` it is beyond three elements - at which point +//! it switches to using a `BTreeSet` internally. This container makes sense for short lived sets +//! that very rarely go beyond 3 elements, and for which the elements can be represented entirely +//! in stack memory. +//! +//! This set's size is at least 3 times the size of it's element's, so it is not suitable to be +//! held as an item type in larger parent collections. +//! +//! Below - time taken to insert Nth element one millions times for differing types, sampled by +//! running `inserts_different_types` in tests below on a 2.3 GHz MacBook Pro (2019). +//! +//! | Nth insert | &str | u32 | Token | String | +//! ====================================================================== +//! | 0 -> 1 | 29.425ms | 27.088ms | 47.936ms | 150.282ms | +//! | 1 -> 2 | 33.252ms | 29.752ms | 60.845ms | 301.634ms | +//! | 2 -> 3 | 35.657ms | 31.898ms | 79.367ms | 487.948ms | +//! | 3 -> 4 | 1,324.44ms | 1,079.197ms | 1,846.823ms | 2,225.094ms | +//! | 4 -> 5 | 1,482.358ms | 1,231.839ms | 1,918.353ms | 2,541.392ms | + +use std::collections::BTreeSet; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(super) enum LateAllocSet { + None, + One(T), + Two(T, T), + Three(T, T, T), + Set(BTreeSet), +} + +impl LateAllocSet +where + T: std::cmp::Ord, +{ + pub(super) fn insert(self, x: T) -> Self { + match self { + LateAllocSet::None => LateAllocSet::One(x), + LateAllocSet::One(x0) => { + if x0 == x { + LateAllocSet::One(x0) + } else { + LateAllocSet::Two(x0, x) + } + } + LateAllocSet::Two(x0, x1) => { + if x0 == x || x1 == x { + LateAllocSet::Two(x0, x1) + } else { + LateAllocSet::Three(x0, x1, x) + } + } + LateAllocSet::Three(x0, x1, x2) => { + if x0 == x || x1 == x || x2 == x { + LateAllocSet::Three(x0, x1, x2) + } else { + LateAllocSet::Set(BTreeSet::from([x0, x1, x2, x])) + } + } + LateAllocSet::Set(mut xs) => { + xs.insert(x); + LateAllocSet::Set(xs) + } + } + } + + pub(super) fn as_vec(&self) -> Vec<&T> { + match self { + LateAllocSet::None => vec![], + LateAllocSet::One(x0) => vec![x0], + LateAllocSet::Two(x0, x1) => vec![x0, x1], + LateAllocSet::Three(x0, x1, x2) => vec![x0, x1, x2], + LateAllocSet::Set(xs) => xs.iter().collect::>(), + } + } + + pub(super) fn append(self, xs: LateAllocSet) -> Self { + let mut out = self; + match xs { + LateAllocSet::None => { + // No work + } + LateAllocSet::One(x0) => out = out.insert(x0), + LateAllocSet::Two(x0, x1) => { + out = out.insert(x0); + out = out.insert(x1); + } + LateAllocSet::Three(x0, x1, x2) => { + out = out.insert(x0); + out = out.insert(x1); + out = out.insert(x2); + } + LateAllocSet::Set(xs) => { + for x in xs { + out = out.insert(x); + } + } + } + out + } +} + +impl FromIterator for LateAllocSet +where + T: std::cmp::Ord, +{ + fn from_iter>(iter: I) -> Self { + let mut iter = iter.into_iter(); + let first = iter.next(); + if first.is_none() { + return LateAllocSet::None; + } + let second = iter.next(); + if second.is_none() { + return LateAllocSet::One(first.unwrap()); + } + let third = iter.next(); + if third.is_none() { + return LateAllocSet::Two(first.unwrap(), second.unwrap()); + } + let fourth = iter.next(); + if fourth.is_none() { + return LateAllocSet::Three(first.unwrap(), second.unwrap(), third.unwrap()); + } + let btree_set: BTreeSet = + [first.unwrap(), second.unwrap(), third.unwrap(), fourth.unwrap()] + .into_iter() + .chain(iter) + .collect(); + LateAllocSet::Set(btree_set) + } +} + +#[cfg(test)] +mod tests { + use std::{collections::BTreeSet, time::SystemTime}; + + use super::LateAllocSet; + use crate::token::Token; + + fn time_1m(f: F) + where + F: Fn(), + { + let start = SystemTime::now(); + for _ in 0..1000000 { + f(); + } + println!("{:?}", start.elapsed().unwrap()); + } + + fn time_1m_inserts_1_to_5(x0: F0, x1: F1, x2: F2, x3: F3, x4: F4) + where + T: std::cmp::Ord + Clone, + F0: Fn() -> T, + F1: Fn() -> T, + F2: Fn() -> T, + F3: Fn() -> T, + F4: Fn() -> T, + { + print!("0 -> 1: "); + time_1m(|| { + LateAllocSet::None.insert(x0()); + }); + + print!("1 -> 2: "); + time_1m(|| { + LateAllocSet::One(x0()).insert(x1()); + }); + print!("2 -> 3: "); + time_1m(|| { + LateAllocSet::Two(x0(), x1()).insert(x2()); + }); + print!("3 -> 4: "); + time_1m(|| { + LateAllocSet::Three(x0(), x1(), x2()).insert(x3()); + }); + print!("4 -> 5: "); + time_1m(|| { + LateAllocSet::Set(BTreeSet::from([x0(), x1(), x2(), x3()])).insert(x4()); + }); + } + + #[test] + #[ignore] + fn inserts_different_types() { + println!("\nelement type: &str"); + time_1m_inserts_1_to_5(|| "a", || "b", || "c", || "d", || "e"); + + println!("\nelement type: u32"); + time_1m_inserts_1_to_5(|| 0, || 1, || 2, || 3, || 4); + + println!("\nelement type: Token"); + time_1m_inserts_1_to_5( + || Token::Ampersand, + || Token::Arrow, + || Token::Assign, + || Token::Bang, + || Token::Caret, + ); + + println!("\nelement type: String"); + time_1m_inserts_1_to_5( + || String::from("a"), + || String::from("b"), + || String::from("c"), + || String::from("d"), + || String::from("e"), + ); + } +} diff --git a/crates/noirc_frontend/src/parser/labels.rs b/crates/noirc_frontend/src/parser/labels.rs new file mode 100644 index 00000000000..2e01db1af49 --- /dev/null +++ b/crates/noirc_frontend/src/parser/labels.rs @@ -0,0 +1,40 @@ +use std::fmt; + +use crate::token::TokenKind; + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum ParserLabel { + Atom, + BinaryOperator, + Cast, + Expression, + FieldAccess, + Global, + IntegerType, + Parameter, + Pattern, + Statement, + Term, + TypeExpression, + TokenKind(TokenKind), +} + +impl fmt::Display for ParserLabel { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ParserLabel::Atom => write!(f, "atom"), + ParserLabel::BinaryOperator => write!(f, "binary operator"), + ParserLabel::Cast => write!(f, "cast"), + ParserLabel::Expression => write!(f, "expression"), + ParserLabel::FieldAccess => write!(f, "field access"), + ParserLabel::Global => write!(f, "global"), + ParserLabel::IntegerType => write!(f, "integer type"), + ParserLabel::Parameter => write!(f, "parameter"), + ParserLabel::Pattern => write!(f, "pattern"), + ParserLabel::Statement => write!(f, "statement"), + ParserLabel::Term => write!(f, "term"), + ParserLabel::TypeExpression => write!(f, "type expression"), + ParserLabel::TokenKind(token_kind) => write!(f, "{:?}", token_kind), + } + } +} diff --git a/crates/noirc_frontend/src/parser/mod.rs b/crates/noirc_frontend/src/parser/mod.rs index 788c0eec895..b55c956eaf4 100644 --- a/crates/noirc_frontend/src/parser/mod.rs +++ b/crates/noirc_frontend/src/parser/mod.rs @@ -7,6 +7,7 @@ //! This file is mostly helper functions and types for the parser. For the parser itself, //! see parser.rs. The definition of the abstract syntax tree can be found in the `ast` folder. mod errors; +mod labels; #[allow(clippy::module_inception)] mod parser; diff --git a/crates/noirc_frontend/src/parser/parser.rs b/crates/noirc_frontend/src/parser/parser.rs index 575a9403ea8..b8f8c7d6dc0 100644 --- a/crates/noirc_frontend/src/parser/parser.rs +++ b/crates/noirc_frontend/src/parser/parser.rs @@ -24,9 +24,9 @@ //! be limited to cases like the above `fn` example where it is clear we shouldn't back out of the //! current parser to try alternative parsers in a `choice` expression. use super::{ - foldl_with_span, parameter_name_recovery, parameter_recovery, parenthesized, then_commit, - then_commit_ignore, top_level_statement_recovery, ExprParser, ForRange, NoirParser, - ParsedModule, ParserError, Precedence, SubModule, TopLevelStatement, + foldl_with_span, labels::ParserLabel, parameter_name_recovery, parameter_recovery, + parenthesized, then_commit, then_commit_ignore, top_level_statement_recovery, ExprParser, + ForRange, NoirParser, ParsedModule, ParserError, Precedence, SubModule, TopLevelStatement, }; use crate::ast::{Expression, ExpressionKind, LetStatement, Statement, UnresolvedType}; use crate::lexer::Lexer; @@ -113,7 +113,7 @@ fn top_level_statement( /// global_declaration: 'global' ident global_type_annotation '=' literal fn global_declaration() -> impl NoirParser { let p = ignore_then_commit( - keyword(Keyword::Global).labelled("global"), + keyword(Keyword::Global).labelled(ParserLabel::Global), ident().map(Pattern::Identifier), ); let p = then_commit(p, global_type_annotation()); @@ -273,7 +273,7 @@ fn lambda_parameters() -> impl NoirParser> { .recover_via(parameter_name_recovery()) .then(typ.or_not().map(|typ| typ.unwrap_or(UnresolvedType::Unspecified))); - parameter.separated_by(just(Token::Comma)).allow_trailing().labelled("parameter") + parameter.separated_by(just(Token::Comma)).allow_trailing().labelled(ParserLabel::Parameter) } fn function_parameters<'a>( @@ -292,7 +292,7 @@ fn function_parameters<'a>( let parameter = full_parameter.or(self_parameter); - parameter.separated_by(just(Token::Comma)).allow_trailing().labelled("parameter") + parameter.separated_by(just(Token::Comma)).allow_trailing().labelled(ParserLabel::Parameter) } /// This parser always parses no input and fails @@ -308,7 +308,7 @@ fn self_parameter() -> impl NoirParser<(Pattern, UnresolvedType, AbiVisibility)> let self_type = UnresolvedType::Named(path, vec![]); Ok((Pattern::Identifier(ident), self_type, AbiVisibility::Private)) } - _ => Err(ParserError::expected_label("parameter".to_owned(), found, span)), + _ => Err(ParserError::expected_label(ParserLabel::Parameter, found, span)), }) } @@ -406,7 +406,11 @@ fn token_kind(token_kind: TokenKind) -> impl NoirParser { if found.kind() == token_kind { Ok(found) } else { - Err(ParserError::expected_label(token_kind.to_string(), found, span)) + Err(ParserError::expected_label( + ParserLabel::TokenKind(token_kind.clone()), + found, + span, + )) } }) } @@ -446,7 +450,7 @@ fn constrain<'a, P>(expr_parser: P) -> impl NoirParser + 'a where P: ExprParser + 'a, { - ignore_then_commit(keyword(Keyword::Constrain).labelled("statement"), expr_parser) + ignore_then_commit(keyword(Keyword::Constrain).labelled(ParserLabel::Statement), expr_parser) .map(|expr| Statement::Constrain(ConstrainStatement(expr))) } @@ -455,7 +459,7 @@ where P: ExprParser + 'a, { ignore_then_commit(keyword(Keyword::Assert), parenthesized(expr_parser)) - .labelled("statement") + .labelled(ParserLabel::Statement) .map(|expr| Statement::Constrain(ConstrainStatement(expr))) } @@ -463,7 +467,7 @@ fn declaration<'a, P>(expr_parser: P) -> impl NoirParser + 'a where P: ExprParser + 'a, { - let p = ignore_then_commit(keyword(Keyword::Let).labelled("statement"), pattern()); + let p = ignore_then_commit(keyword(Keyword::Let).labelled(ParserLabel::Statement), pattern()); let p = p.then(optional_type_annotation()); let p = then_commit_ignore(p, just(Token::Assign)); let p = then_commit(p, expr_parser); @@ -497,14 +501,15 @@ fn pattern() -> impl NoirParser { choice((mut_pattern, tuple_pattern, struct_pattern, ident_pattern)) }) - .labelled("pattern") + .labelled(ParserLabel::Pattern) } fn assignment<'a, P>(expr_parser: P) -> impl NoirParser + 'a where P: ExprParser + 'a, { - let fallible = lvalue(expr_parser.clone()).then(assign_operator()).labelled("statement"); + let fallible = + lvalue(expr_parser.clone()).then(assign_operator()).labelled(ParserLabel::Statement); then_commit(fallible, expr_parser).map_with_span( |((identifier, operator), expression), span| { @@ -606,7 +611,7 @@ fn int_type() -> impl NoirParser { .then(filter_map(|span, token: Token| match token { Token::IntType(int_type) => Ok(int_type), unexpected => { - Err(ParserError::expected_label("integer type".to_string(), unexpected, span)) + Err(ParserError::expected_label(ParserLabel::IntegerType, unexpected, span)) } })) .map(UnresolvedType::from_int_token) @@ -652,7 +657,7 @@ fn array_type(type_parser: impl NoirParser) -> impl NoirParser impl NoirParser { recursive(|expr| expression_with_precedence(Precedence::lowest_type_precedence(), expr, true)) - .labelled("type expression") + .labelled(ParserLabel::TypeExpression) .try_map(UnresolvedTypeExpression::from_expr) } @@ -678,7 +683,7 @@ where fn expression() -> impl ExprParser { recursive(|expr| expression_with_precedence(Precedence::Lowest, expr, false)) - .labelled("expression") + .labelled(ParserLabel::Expression) } // An expression is a single term followed by 0 or more (OP subexpression)* @@ -695,9 +700,9 @@ where { if precedence == Precedence::Highest { if is_type_expression { - type_expression_term(expr_parser).boxed().labelled("term") + type_expression_term(expr_parser).boxed().labelled(ParserLabel::Term) } else { - term(expr_parser).boxed().labelled("term") + term(expr_parser).boxed().labelled(ParserLabel::Term) } } else { let next_precedence = @@ -711,7 +716,7 @@ where .then(then_commit(operator_with_precedence(precedence), next_expr).repeated()) .foldl(create_infix_expression) .boxed() - .labelled("expression") + .labelled(ParserLabel::Expression) } } @@ -727,7 +732,7 @@ fn operator_with_precedence(precedence: Precedence) -> impl NoirParser(expr_parser: P) -> impl NoirParser From d7a76d63010b8b8f51326047f48fff5733fe4deb Mon Sep 17 00:00:00 2001 From: Joss Date: Thu, 4 May 2023 15:01:36 +0100 Subject: [PATCH 2/7] chore(parser): wrap LateAllocSet enum in struct --- crates/noirc_frontend/src/parser/errors.rs | 18 +-- .../src/parser/errors/late_alloc_set.rs | 120 +++++++++++------- 2 files changed, 80 insertions(+), 58 deletions(-) diff --git a/crates/noirc_frontend/src/parser/errors.rs b/crates/noirc_frontend/src/parser/errors.rs index d1b86a4f040..280a074ecf2 100644 --- a/crates/noirc_frontend/src/parser/errors.rs +++ b/crates/noirc_frontend/src/parser/errors.rs @@ -22,8 +22,8 @@ pub struct ParserError { impl ParserError { pub fn empty(found: Token, span: Span) -> ParserError { ParserError { - expected_tokens: LateAllocSet::None, - expected_labels: LateAllocSet::None, + expected_tokens: LateAllocSet::new(), + expected_labels: LateAllocSet::new(), found, reason: None, span, @@ -32,7 +32,7 @@ impl ParserError { pub fn expected_label(label: ParserLabel, found: Token, span: Span) -> ParserError { let mut error = ParserError::empty(found, span); - error.expected_labels = error.expected_labels.insert(label); + error.expected_labels.insert(label); error } @@ -100,7 +100,7 @@ impl chumsky::Error for ParserError { { ParserError { expected_tokens: expected.into_iter().map(|opt| opt.unwrap_or(Token::EOF)).collect(), - expected_labels: LateAllocSet::None, + expected_labels: LateAllocSet::new(), found: found.unwrap_or(Token::EOF), reason: None, span, @@ -108,9 +108,9 @@ impl chumsky::Error for ParserError { } fn with_label(mut self, label: Self::Label) -> Self { - self.expected_tokens = LateAllocSet::None; - self.expected_labels = LateAllocSet::None; - self.expected_labels = self.expected_labels.insert(label); + self.expected_tokens.clear(); + self.expected_labels.clear(); + self.expected_labels.insert(label); self } @@ -120,8 +120,8 @@ impl chumsky::Error for ParserError { // The spans of both errors must match, otherwise the error // messages and error spans may not line up. fn merge(mut self, other: Self) -> Self { - self.expected_tokens = self.expected_tokens.append(other.expected_tokens); - self.expected_labels = self.expected_labels.append(other.expected_labels); + self.expected_tokens.append(other.expected_tokens); + self.expected_labels.append(other.expected_labels); if self.reason.is_none() { self.reason = other.reason; diff --git a/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs b/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs index 04aab575f78..394b6ed8fcc 100644 --- a/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs +++ b/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs @@ -24,7 +24,7 @@ use std::collections::BTreeSet; #[derive(Debug, Clone, PartialEq, Eq)] -pub(super) enum LateAllocSet { +enum LateAllocSetData { None, One(T), Two(T, T), @@ -32,78 +32,90 @@ pub(super) enum LateAllocSet { Set(BTreeSet), } +#[derive(Debug, Clone, PartialEq, Eq)] +pub(super) struct LateAllocSet { + data: LateAllocSetData, +} + impl LateAllocSet where T: std::cmp::Ord, { - pub(super) fn insert(self, x: T) -> Self { - match self { - LateAllocSet::None => LateAllocSet::One(x), - LateAllocSet::One(x0) => { + pub(super) fn new() -> Self { + LateAllocSet { data: LateAllocSetData::None } + } + + pub(super) fn insert(&mut self, x: T) { + let old_data = std::mem::replace(&mut self.data, LateAllocSetData::None); + self.data = match old_data { + LateAllocSetData::None => LateAllocSetData::One(x), + LateAllocSetData::One(x0) => { if x0 == x { - LateAllocSet::One(x0) + LateAllocSetData::One(x0) } else { - LateAllocSet::Two(x0, x) + LateAllocSetData::Two(x0, x) } } - LateAllocSet::Two(x0, x1) => { + LateAllocSetData::Two(x0, x1) => { if x0 == x || x1 == x { - LateAllocSet::Two(x0, x1) + LateAllocSetData::Two(x0, x1) } else { - LateAllocSet::Three(x0, x1, x) + LateAllocSetData::Three(x0, x1, x) } } - LateAllocSet::Three(x0, x1, x2) => { + LateAllocSetData::Three(x0, x1, x2) => { if x0 == x || x1 == x || x2 == x { - LateAllocSet::Three(x0, x1, x2) + LateAllocSetData::Three(x0, x1, x2) } else { - LateAllocSet::Set(BTreeSet::from([x0, x1, x2, x])) + LateAllocSetData::Set(BTreeSet::from([x0, x1, x2, x])) } } - LateAllocSet::Set(mut xs) => { + LateAllocSetData::Set(mut xs) => { xs.insert(x); - LateAllocSet::Set(xs) + LateAllocSetData::Set(xs) } - } + }; } pub(super) fn as_vec(&self) -> Vec<&T> { - match self { - LateAllocSet::None => vec![], - LateAllocSet::One(x0) => vec![x0], - LateAllocSet::Two(x0, x1) => vec![x0, x1], - LateAllocSet::Three(x0, x1, x2) => vec![x0, x1, x2], - LateAllocSet::Set(xs) => xs.iter().collect::>(), + match &self.data { + LateAllocSetData::None => vec![], + LateAllocSetData::One(x0) => vec![x0], + LateAllocSetData::Two(x0, x1) => vec![x0, x1], + LateAllocSetData::Three(x0, x1, x2) => vec![x0, x1, x2], + LateAllocSetData::Set(xs) => xs.iter().collect::>(), } } - pub(super) fn append(self, xs: LateAllocSet) -> Self { - let mut out = self; - match xs { - LateAllocSet::None => { + pub(super) fn append(&mut self, other: LateAllocSet) { + match other.data { + LateAllocSetData::None => { // No work } - LateAllocSet::One(x0) => out = out.insert(x0), - LateAllocSet::Two(x0, x1) => { - out = out.insert(x0); - out = out.insert(x1); + LateAllocSetData::One(x0) => self.insert(x0), + LateAllocSetData::Two(x0, x1) => { + self.insert(x0); + self.insert(x1); } - LateAllocSet::Three(x0, x1, x2) => { - out = out.insert(x0); - out = out.insert(x1); - out = out.insert(x2); + LateAllocSetData::Three(x0, x1, x2) => { + self.insert(x0); + self.insert(x1); + self.insert(x2); } - LateAllocSet::Set(xs) => { + LateAllocSetData::Set(xs) => { for x in xs { - out = out.insert(x); + self.insert(x); } } } - out + } + + pub(super) fn clear(&mut self) { + self.data = LateAllocSetData::None; } } -impl FromIterator for LateAllocSet +impl FromIterator for LateAllocSetData where T: std::cmp::Ord, { @@ -111,34 +123,43 @@ where let mut iter = iter.into_iter(); let first = iter.next(); if first.is_none() { - return LateAllocSet::None; + return LateAllocSetData::None; } let second = iter.next(); if second.is_none() { - return LateAllocSet::One(first.unwrap()); + return LateAllocSetData::One(first.unwrap()); } let third = iter.next(); if third.is_none() { - return LateAllocSet::Two(first.unwrap(), second.unwrap()); + return LateAllocSetData::Two(first.unwrap(), second.unwrap()); } let fourth = iter.next(); if fourth.is_none() { - return LateAllocSet::Three(first.unwrap(), second.unwrap(), third.unwrap()); + return LateAllocSetData::Three(first.unwrap(), second.unwrap(), third.unwrap()); } let btree_set: BTreeSet = [first.unwrap(), second.unwrap(), third.unwrap(), fourth.unwrap()] .into_iter() .chain(iter) .collect(); - LateAllocSet::Set(btree_set) + LateAllocSetData::Set(btree_set) } } +impl FromIterator for LateAllocSet +where + T: std::cmp::Ord, +{ + fn from_iter>(iter: I) -> Self { + let data: LateAllocSetData = iter.into_iter().collect(); + LateAllocSet { data } + } +} #[cfg(test)] mod tests { use std::{collections::BTreeSet, time::SystemTime}; - use super::LateAllocSet; + use super::{LateAllocSet, LateAllocSetData}; use crate::token::Token; fn time_1m(f: F) @@ -163,24 +184,25 @@ mod tests { { print!("0 -> 1: "); time_1m(|| { - LateAllocSet::None.insert(x0()); + LateAllocSet { data: LateAllocSetData::None }.insert(x0()); }); print!("1 -> 2: "); time_1m(|| { - LateAllocSet::One(x0()).insert(x1()); + LateAllocSet { data: LateAllocSetData::One(x0()) }.insert(x1()); }); print!("2 -> 3: "); time_1m(|| { - LateAllocSet::Two(x0(), x1()).insert(x2()); + LateAllocSet { data: LateAllocSetData::Two(x0(), x1()) }.insert(x2()); }); print!("3 -> 4: "); time_1m(|| { - LateAllocSet::Three(x0(), x1(), x2()).insert(x3()); + LateAllocSet { data: LateAllocSetData::Three(x0(), x1(), x2()) }.insert(x3()); }); print!("4 -> 5: "); time_1m(|| { - LateAllocSet::Set(BTreeSet::from([x0(), x1(), x2(), x3()])).insert(x4()); + LateAllocSet { data: LateAllocSetData::Set(BTreeSet::from([x0(), x1(), x2(), x3()])) } + .insert(x4()); }); } From 989b93432eaacc40b1be8a4115b3a29cf05077aa Mon Sep 17 00:00:00 2001 From: Joss Date: Thu, 4 May 2023 15:34:44 +0100 Subject: [PATCH 3/7] chore(parser): fix comment table formatting --- crates/noirc_frontend/src/parser/errors/late_alloc_set.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs b/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs index 394b6ed8fcc..4cc82166451 100644 --- a/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs +++ b/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs @@ -14,7 +14,7 @@ //! running `inserts_different_types` in tests below on a 2.3 GHz MacBook Pro (2019). //! //! | Nth insert | &str | u32 | Token | String | -//! ====================================================================== +//! |------------|-------------|-------------|-------------|-------------| //! | 0 -> 1 | 29.425ms | 27.088ms | 47.936ms | 150.282ms | //! | 1 -> 2 | 33.252ms | 29.752ms | 60.845ms | 301.634ms | //! | 2 -> 3 | 35.657ms | 31.898ms | 79.367ms | 487.948ms | From 53b453ebf27a6b53645f6696b58a82c48ed3a7eb Mon Sep 17 00:00:00 2001 From: Joss Date: Tue, 9 May 2023 13:25:44 +0100 Subject: [PATCH 4/7] chore(parser): ParserError to use SmallOrdSet --- crates/noirc_frontend/src/ast/mod.rs | 3 + .../src/hir/resolution/errors.rs | 4 +- .../src/hir/resolution/resolver.rs | 2 +- crates/noirc_frontend/src/parser/errors.rs | 33 ++- .../src/parser/errors/late_alloc_set.rs | 236 ------------------ 5 files changed, 26 insertions(+), 252 deletions(-) delete mode 100644 crates/noirc_frontend/src/parser/errors/late_alloc_set.rs diff --git a/crates/noirc_frontend/src/ast/mod.rs b/crates/noirc_frontend/src/ast/mod.rs index 6bd5c148d66..3cb9d9b211c 100644 --- a/crates/noirc_frontend/src/ast/mod.rs +++ b/crates/noirc_frontend/src/ast/mod.rs @@ -148,6 +148,9 @@ pub enum Signedness { } impl UnresolvedTypeExpression { + // This large error size is justified because it improves parsing speeds by around 40% in + // release mode. See `ParserError` definition for further explanation. + #[allow(clippy::result_large_err)] pub fn from_expr( expr: Expression, span: Span, diff --git a/crates/noirc_frontend/src/hir/resolution/errors.rs b/crates/noirc_frontend/src/hir/resolution/errors.rs index c57e4c890d2..87257cbb842 100644 --- a/crates/noirc_frontend/src/hir/resolution/errors.rs +++ b/crates/noirc_frontend/src/hir/resolution/errors.rs @@ -57,7 +57,7 @@ pub enum ResolverError { #[error("Incorrect amount of arguments to generic type constructor")] IncorrectGenericCount { span: Span, struct_type: String, actual: usize, expected: usize }, #[error("{0}")] - ParserError(ParserError), + ParserError(Box), #[error("Function is not defined in a contract yet sets its contract visibility")] ContractFunctionTypeInNormalFunction { span: Span }, } @@ -252,7 +252,7 @@ impl From for Diagnostic { span, ) } - ResolverError::ParserError(error) => error.into(), + ResolverError::ParserError(error) => (*error).into(), ResolverError::ContractFunctionTypeInNormalFunction { span } => Diagnostic::simple_error( "Only functions defined within contracts can set their contract function type".into(), "Non-contract functions cannot be 'open'".into(), diff --git a/crates/noirc_frontend/src/hir/resolution/resolver.rs b/crates/noirc_frontend/src/hir/resolution/resolver.rs index f03bcefeb2d..d80bca9df17 100644 --- a/crates/noirc_frontend/src/hir/resolution/resolver.rs +++ b/crates/noirc_frontend/src/hir/resolution/resolver.rs @@ -859,7 +859,7 @@ impl<'a> Resolver<'a> { let span = length.span; let length = UnresolvedTypeExpression::from_expr(*length, span).unwrap_or_else( |error| { - self.errors.push(ResolverError::ParserError(error)); + self.errors.push(ResolverError::ParserError(Box::new(error))); UnresolvedTypeExpression::Constant(0, span) }, ); diff --git a/crates/noirc_frontend/src/parser/errors.rs b/crates/noirc_frontend/src/parser/errors.rs index 280a074ecf2..1306c3a5b20 100644 --- a/crates/noirc_frontend/src/parser/errors.rs +++ b/crates/noirc_frontend/src/parser/errors.rs @@ -1,6 +1,6 @@ use crate::lexer::token::Token; use crate::BinaryOp; -use late_alloc_set::LateAllocSet; +use small_ord_set::SmallOrdSet; use iter_extended::vecmap; use noirc_errors::CustomDiagnostic as Diagnostic; @@ -8,12 +8,19 @@ use noirc_errors::Span; use super::labels::ParserLabel; -mod late_alloc_set; - +/// Represents a parsing error, or a parsing error in the making. +/// +/// `ParserError` is used extensively by the parser, as it not only used to report badly formed +/// token streams, but also as a general intermediate that accumulates information as various +/// parsing rules are tried. This struct is constructed and destructed with a very high frequency +/// and as such, the time taken to do so significantly impacts parsing performance. For this +/// reason we use `SmallOrdSet` to avoid heap allocations for as long as possible - this greatly +/// inflates the size of the error, but this is justified by a resulting increase in parsing +/// speeds of approximately 40% in release mode. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserError { - expected_tokens: LateAllocSet, - expected_labels: LateAllocSet, + expected_tokens: SmallOrdSet<[Token; 3]>, + expected_labels: SmallOrdSet<[ParserLabel; 3]>, found: Token, reason: Option, span: Span, @@ -22,8 +29,8 @@ pub struct ParserError { impl ParserError { pub fn empty(found: Token, span: Span) -> ParserError { ParserError { - expected_tokens: LateAllocSet::new(), - expected_labels: LateAllocSet::new(), + expected_tokens: SmallOrdSet::new(), + expected_labels: SmallOrdSet::new(), found, reason: None, span, @@ -55,8 +62,8 @@ impl ParserError { impl std::fmt::Display for ParserError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut expected = vecmap(self.expected_tokens.as_vec(), ToString::to_string); - expected.append(&mut vecmap(self.expected_labels.as_vec(), |ref_str| format!("{ref_str}"))); + let mut expected = vecmap(self.expected_tokens.iter(), ToString::to_string); + expected.append(&mut vecmap(self.expected_labels.iter(), |label| format!("{label}"))); if expected.is_empty() { write!(f, "Unexpected {} in input", self.found) @@ -100,7 +107,7 @@ impl chumsky::Error for ParserError { { ParserError { expected_tokens: expected.into_iter().map(|opt| opt.unwrap_or(Token::EOF)).collect(), - expected_labels: LateAllocSet::new(), + expected_labels: SmallOrdSet::new(), found: found.unwrap_or(Token::EOF), reason: None, span, @@ -119,9 +126,9 @@ impl chumsky::Error for ParserError { // that reason and discard the other if present. // The spans of both errors must match, otherwise the error // messages and error spans may not line up. - fn merge(mut self, other: Self) -> Self { - self.expected_tokens.append(other.expected_tokens); - self.expected_labels.append(other.expected_labels); + fn merge(mut self, mut other: Self) -> Self { + self.expected_tokens.append(&mut other.expected_tokens); + self.expected_labels.append(&mut other.expected_labels); if self.reason.is_none() { self.reason = other.reason; diff --git a/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs b/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs deleted file mode 100644 index 4cc82166451..00000000000 --- a/crates/noirc_frontend/src/parser/errors/late_alloc_set.rs +++ /dev/null @@ -1,236 +0,0 @@ -//! `LateAllocSet` is an alternative to `BTreeSet` optimized for small sets that can be located -//! entirely in stack memory. Once the set size goes beyond 3, performance is less than that of a -//! `BTreeMap`. -//! -//! Approximately 20-50 times faster than `BTreeSet` it is beyond three elements - at which point -//! it switches to using a `BTreeSet` internally. This container makes sense for short lived sets -//! that very rarely go beyond 3 elements, and for which the elements can be represented entirely -//! in stack memory. -//! -//! This set's size is at least 3 times the size of it's element's, so it is not suitable to be -//! held as an item type in larger parent collections. -//! -//! Below - time taken to insert Nth element one millions times for differing types, sampled by -//! running `inserts_different_types` in tests below on a 2.3 GHz MacBook Pro (2019). -//! -//! | Nth insert | &str | u32 | Token | String | -//! |------------|-------------|-------------|-------------|-------------| -//! | 0 -> 1 | 29.425ms | 27.088ms | 47.936ms | 150.282ms | -//! | 1 -> 2 | 33.252ms | 29.752ms | 60.845ms | 301.634ms | -//! | 2 -> 3 | 35.657ms | 31.898ms | 79.367ms | 487.948ms | -//! | 3 -> 4 | 1,324.44ms | 1,079.197ms | 1,846.823ms | 2,225.094ms | -//! | 4 -> 5 | 1,482.358ms | 1,231.839ms | 1,918.353ms | 2,541.392ms | - -use std::collections::BTreeSet; - -#[derive(Debug, Clone, PartialEq, Eq)] -enum LateAllocSetData { - None, - One(T), - Two(T, T), - Three(T, T, T), - Set(BTreeSet), -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub(super) struct LateAllocSet { - data: LateAllocSetData, -} - -impl LateAllocSet -where - T: std::cmp::Ord, -{ - pub(super) fn new() -> Self { - LateAllocSet { data: LateAllocSetData::None } - } - - pub(super) fn insert(&mut self, x: T) { - let old_data = std::mem::replace(&mut self.data, LateAllocSetData::None); - self.data = match old_data { - LateAllocSetData::None => LateAllocSetData::One(x), - LateAllocSetData::One(x0) => { - if x0 == x { - LateAllocSetData::One(x0) - } else { - LateAllocSetData::Two(x0, x) - } - } - LateAllocSetData::Two(x0, x1) => { - if x0 == x || x1 == x { - LateAllocSetData::Two(x0, x1) - } else { - LateAllocSetData::Three(x0, x1, x) - } - } - LateAllocSetData::Three(x0, x1, x2) => { - if x0 == x || x1 == x || x2 == x { - LateAllocSetData::Three(x0, x1, x2) - } else { - LateAllocSetData::Set(BTreeSet::from([x0, x1, x2, x])) - } - } - LateAllocSetData::Set(mut xs) => { - xs.insert(x); - LateAllocSetData::Set(xs) - } - }; - } - - pub(super) fn as_vec(&self) -> Vec<&T> { - match &self.data { - LateAllocSetData::None => vec![], - LateAllocSetData::One(x0) => vec![x0], - LateAllocSetData::Two(x0, x1) => vec![x0, x1], - LateAllocSetData::Three(x0, x1, x2) => vec![x0, x1, x2], - LateAllocSetData::Set(xs) => xs.iter().collect::>(), - } - } - - pub(super) fn append(&mut self, other: LateAllocSet) { - match other.data { - LateAllocSetData::None => { - // No work - } - LateAllocSetData::One(x0) => self.insert(x0), - LateAllocSetData::Two(x0, x1) => { - self.insert(x0); - self.insert(x1); - } - LateAllocSetData::Three(x0, x1, x2) => { - self.insert(x0); - self.insert(x1); - self.insert(x2); - } - LateAllocSetData::Set(xs) => { - for x in xs { - self.insert(x); - } - } - } - } - - pub(super) fn clear(&mut self) { - self.data = LateAllocSetData::None; - } -} - -impl FromIterator for LateAllocSetData -where - T: std::cmp::Ord, -{ - fn from_iter>(iter: I) -> Self { - let mut iter = iter.into_iter(); - let first = iter.next(); - if first.is_none() { - return LateAllocSetData::None; - } - let second = iter.next(); - if second.is_none() { - return LateAllocSetData::One(first.unwrap()); - } - let third = iter.next(); - if third.is_none() { - return LateAllocSetData::Two(first.unwrap(), second.unwrap()); - } - let fourth = iter.next(); - if fourth.is_none() { - return LateAllocSetData::Three(first.unwrap(), second.unwrap(), third.unwrap()); - } - let btree_set: BTreeSet = - [first.unwrap(), second.unwrap(), third.unwrap(), fourth.unwrap()] - .into_iter() - .chain(iter) - .collect(); - LateAllocSetData::Set(btree_set) - } -} - -impl FromIterator for LateAllocSet -where - T: std::cmp::Ord, -{ - fn from_iter>(iter: I) -> Self { - let data: LateAllocSetData = iter.into_iter().collect(); - LateAllocSet { data } - } -} -#[cfg(test)] -mod tests { - use std::{collections::BTreeSet, time::SystemTime}; - - use super::{LateAllocSet, LateAllocSetData}; - use crate::token::Token; - - fn time_1m(f: F) - where - F: Fn(), - { - let start = SystemTime::now(); - for _ in 0..1000000 { - f(); - } - println!("{:?}", start.elapsed().unwrap()); - } - - fn time_1m_inserts_1_to_5(x0: F0, x1: F1, x2: F2, x3: F3, x4: F4) - where - T: std::cmp::Ord + Clone, - F0: Fn() -> T, - F1: Fn() -> T, - F2: Fn() -> T, - F3: Fn() -> T, - F4: Fn() -> T, - { - print!("0 -> 1: "); - time_1m(|| { - LateAllocSet { data: LateAllocSetData::None }.insert(x0()); - }); - - print!("1 -> 2: "); - time_1m(|| { - LateAllocSet { data: LateAllocSetData::One(x0()) }.insert(x1()); - }); - print!("2 -> 3: "); - time_1m(|| { - LateAllocSet { data: LateAllocSetData::Two(x0(), x1()) }.insert(x2()); - }); - print!("3 -> 4: "); - time_1m(|| { - LateAllocSet { data: LateAllocSetData::Three(x0(), x1(), x2()) }.insert(x3()); - }); - print!("4 -> 5: "); - time_1m(|| { - LateAllocSet { data: LateAllocSetData::Set(BTreeSet::from([x0(), x1(), x2(), x3()])) } - .insert(x4()); - }); - } - - #[test] - #[ignore] - fn inserts_different_types() { - println!("\nelement type: &str"); - time_1m_inserts_1_to_5(|| "a", || "b", || "c", || "d", || "e"); - - println!("\nelement type: u32"); - time_1m_inserts_1_to_5(|| 0, || 1, || 2, || 3, || 4); - - println!("\nelement type: Token"); - time_1m_inserts_1_to_5( - || Token::Ampersand, - || Token::Arrow, - || Token::Assign, - || Token::Bang, - || Token::Caret, - ); - - println!("\nelement type: String"); - time_1m_inserts_1_to_5( - || String::from("a"), - || String::from("b"), - || String::from("c"), - || String::from("d"), - || String::from("e"), - ); - } -} From 9d888675bc22007d36eaef34d5d8a3a71dc0af26 Mon Sep 17 00:00:00 2001 From: Joss Date: Tue, 9 May 2023 13:41:50 +0100 Subject: [PATCH 5/7] chore(parser): ParserLabel -> ParsingRuleLabel --- crates/noirc_frontend/src/parser/errors.rs | 8 +-- crates/noirc_frontend/src/parser/labels.rs | 32 ++++++----- crates/noirc_frontend/src/parser/parser.rs | 64 +++++++++++++--------- 3 files changed, 58 insertions(+), 46 deletions(-) diff --git a/crates/noirc_frontend/src/parser/errors.rs b/crates/noirc_frontend/src/parser/errors.rs index aa0161f60cc..513b33278ec 100644 --- a/crates/noirc_frontend/src/parser/errors.rs +++ b/crates/noirc_frontend/src/parser/errors.rs @@ -7,7 +7,7 @@ use iter_extended::vecmap; use noirc_errors::CustomDiagnostic as Diagnostic; use noirc_errors::Span; -use super::labels::ParserLabel; +use super::labels::ParsingRuleLabel; #[derive(Debug, Clone, PartialEq, Eq, Error)] pub enum ParserErrorReason { @@ -35,7 +35,7 @@ pub enum ParserErrorReason { #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserError { expected_tokens: SmallOrdSet<[Token; 3]>, - expected_labels: SmallOrdSet<[ParserLabel; 3]>, + expected_labels: SmallOrdSet<[ParsingRuleLabel; 3]>, found: Token, reason: Option, span: Span, @@ -52,7 +52,7 @@ impl ParserError { } } - pub fn expected_label(label: ParserLabel, found: Token, span: Span) -> ParserError { + pub fn expected_label(label: ParsingRuleLabel, found: Token, span: Span) -> ParserError { let mut error = ParserError::empty(found, span); error.expected_labels.insert(label); error @@ -116,7 +116,7 @@ impl From for Diagnostic { impl chumsky::Error for ParserError { type Span = Span; - type Label = ParserLabel; + type Label = ParsingRuleLabel; fn expected_input_found(span: Self::Span, expected: Iter, found: Option) -> Self where diff --git a/crates/noirc_frontend/src/parser/labels.rs b/crates/noirc_frontend/src/parser/labels.rs index 2e01db1af49..b43c10fb9e7 100644 --- a/crates/noirc_frontend/src/parser/labels.rs +++ b/crates/noirc_frontend/src/parser/labels.rs @@ -2,8 +2,10 @@ use std::fmt; use crate::token::TokenKind; +/// Used to annotate parsing rules with extra context that can be presented to the user later in +/// the case of an error. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub enum ParserLabel { +pub enum ParsingRuleLabel { Atom, BinaryOperator, Cast, @@ -19,22 +21,22 @@ pub enum ParserLabel { TokenKind(TokenKind), } -impl fmt::Display for ParserLabel { +impl fmt::Display for ParsingRuleLabel { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - ParserLabel::Atom => write!(f, "atom"), - ParserLabel::BinaryOperator => write!(f, "binary operator"), - ParserLabel::Cast => write!(f, "cast"), - ParserLabel::Expression => write!(f, "expression"), - ParserLabel::FieldAccess => write!(f, "field access"), - ParserLabel::Global => write!(f, "global"), - ParserLabel::IntegerType => write!(f, "integer type"), - ParserLabel::Parameter => write!(f, "parameter"), - ParserLabel::Pattern => write!(f, "pattern"), - ParserLabel::Statement => write!(f, "statement"), - ParserLabel::Term => write!(f, "term"), - ParserLabel::TypeExpression => write!(f, "type expression"), - ParserLabel::TokenKind(token_kind) => write!(f, "{:?}", token_kind), + ParsingRuleLabel::Atom => write!(f, "atom"), + ParsingRuleLabel::BinaryOperator => write!(f, "binary operator"), + ParsingRuleLabel::Cast => write!(f, "cast"), + ParsingRuleLabel::Expression => write!(f, "expression"), + ParsingRuleLabel::FieldAccess => write!(f, "field access"), + ParsingRuleLabel::Global => write!(f, "global"), + ParsingRuleLabel::IntegerType => write!(f, "integer type"), + ParsingRuleLabel::Parameter => write!(f, "parameter"), + ParsingRuleLabel::Pattern => write!(f, "pattern"), + ParsingRuleLabel::Statement => write!(f, "statement"), + ParsingRuleLabel::Term => write!(f, "term"), + ParsingRuleLabel::TypeExpression => write!(f, "type expression"), + ParsingRuleLabel::TokenKind(token_kind) => write!(f, "{:?}", token_kind), } } } diff --git a/crates/noirc_frontend/src/parser/parser.rs b/crates/noirc_frontend/src/parser/parser.rs index 9030b20c6b5..98b45247567 100644 --- a/crates/noirc_frontend/src/parser/parser.rs +++ b/crates/noirc_frontend/src/parser/parser.rs @@ -24,7 +24,7 @@ //! be limited to cases like the above `fn` example where it is clear we shouldn't back out of the //! current parser to try alternative parsers in a `choice` expression. use super::{ - foldl_with_span, labels::ParserLabel, parameter_name_recovery, parameter_recovery, + foldl_with_span, labels::ParsingRuleLabel, parameter_name_recovery, parameter_recovery, parenthesized, then_commit, then_commit_ignore, top_level_statement_recovery, ExprParser, ForRange, NoirParser, ParsedModule, ParserError, ParserErrorReason, Precedence, SubModule, TopLevelStatement, @@ -114,7 +114,7 @@ fn top_level_statement( /// global_declaration: 'global' ident global_type_annotation '=' literal fn global_declaration() -> impl NoirParser { let p = ignore_then_commit( - keyword(Keyword::Global).labelled(ParserLabel::Global), + keyword(Keyword::Global).labelled(ParsingRuleLabel::Global), ident().map(Pattern::Identifier), ); let p = then_commit(p, global_type_annotation()); @@ -274,7 +274,10 @@ fn lambda_parameters() -> impl NoirParser> { .recover_via(parameter_name_recovery()) .then(typ.or_not().map(|typ| typ.unwrap_or(UnresolvedType::Unspecified))); - parameter.separated_by(just(Token::Comma)).allow_trailing().labelled(ParserLabel::Parameter) + parameter + .separated_by(just(Token::Comma)) + .allow_trailing() + .labelled(ParsingRuleLabel::Parameter) } fn function_parameters<'a>( @@ -293,7 +296,10 @@ fn function_parameters<'a>( let parameter = full_parameter.or(self_parameter); - parameter.separated_by(just(Token::Comma)).allow_trailing().labelled(ParserLabel::Parameter) + parameter + .separated_by(just(Token::Comma)) + .allow_trailing() + .labelled(ParsingRuleLabel::Parameter) } /// This parser always parses no input and fails @@ -309,7 +315,7 @@ fn self_parameter() -> impl NoirParser<(Pattern, UnresolvedType, AbiVisibility)> let self_type = UnresolvedType::Named(path, vec![]); Ok((Pattern::Identifier(ident), self_type, AbiVisibility::Private)) } - _ => Err(ParserError::expected_label(ParserLabel::Parameter, found, span)), + _ => Err(ParserError::expected_label(ParsingRuleLabel::Parameter, found, span)), }) } @@ -408,7 +414,7 @@ fn token_kind(token_kind: TokenKind) -> impl NoirParser { Ok(found) } else { Err(ParserError::expected_label( - ParserLabel::TokenKind(token_kind.clone()), + ParsingRuleLabel::TokenKind(token_kind.clone()), found, span, )) @@ -451,12 +457,15 @@ fn constrain<'a, P>(expr_parser: P) -> impl NoirParser + 'a where P: ExprParser + 'a, { - ignore_then_commit(keyword(Keyword::Constrain).labelled(ParserLabel::Statement), expr_parser) - .map(|expr| Statement::Constrain(ConstrainStatement(expr))) - .validate(|expr, span, emit| { - emit(ParserError::with_reason(ParserErrorReason::ConstrainDeprecated, span)); - expr - }) + ignore_then_commit( + keyword(Keyword::Constrain).labelled(ParsingRuleLabel::Statement), + expr_parser, + ) + .map(|expr| Statement::Constrain(ConstrainStatement(expr))) + .validate(|expr, span, emit| { + emit(ParserError::with_reason(ParserErrorReason::ConstrainDeprecated, span)); + expr + }) } fn assertion<'a, P>(expr_parser: P) -> impl NoirParser + 'a @@ -464,7 +473,7 @@ where P: ExprParser + 'a, { ignore_then_commit(keyword(Keyword::Assert), parenthesized(expr_parser)) - .labelled(ParserLabel::Statement) + .labelled(ParsingRuleLabel::Statement) .map(|expr| Statement::Constrain(ConstrainStatement(expr))) } @@ -472,7 +481,8 @@ fn declaration<'a, P>(expr_parser: P) -> impl NoirParser + 'a where P: ExprParser + 'a, { - let p = ignore_then_commit(keyword(Keyword::Let).labelled(ParserLabel::Statement), pattern()); + let p = + ignore_then_commit(keyword(Keyword::Let).labelled(ParsingRuleLabel::Statement), pattern()); let p = p.then(optional_type_annotation()); let p = then_commit_ignore(p, just(Token::Assign)); let p = then_commit(p, expr_parser); @@ -506,7 +516,7 @@ fn pattern() -> impl NoirParser { choice((mut_pattern, tuple_pattern, struct_pattern, ident_pattern)) }) - .labelled(ParserLabel::Pattern) + .labelled(ParsingRuleLabel::Pattern) } fn assignment<'a, P>(expr_parser: P) -> impl NoirParser + 'a @@ -514,7 +524,7 @@ where P: ExprParser + 'a, { let fallible = - lvalue(expr_parser.clone()).then(assign_operator()).labelled(ParserLabel::Statement); + lvalue(expr_parser.clone()).then(assign_operator()).labelled(ParsingRuleLabel::Statement); then_commit(fallible, expr_parser).map_with_span( |((identifier, operator), expression), span| { @@ -629,7 +639,7 @@ fn int_type() -> impl NoirParser { .then(filter_map(|span, token: Token| match token { Token::IntType(int_type) => Ok(int_type), unexpected => { - Err(ParserError::expected_label(ParserLabel::IntegerType, unexpected, span)) + Err(ParserError::expected_label(ParsingRuleLabel::IntegerType, unexpected, span)) } })) .map(UnresolvedType::from_int_token) @@ -675,7 +685,7 @@ fn array_type(type_parser: impl NoirParser) -> impl NoirParser impl NoirParser { recursive(|expr| expression_with_precedence(Precedence::lowest_type_precedence(), expr, true)) - .labelled(ParserLabel::TypeExpression) + .labelled(ParsingRuleLabel::TypeExpression) .try_map(UnresolvedTypeExpression::from_expr) } @@ -701,7 +711,7 @@ where fn expression() -> impl ExprParser { recursive(|expr| expression_with_precedence(Precedence::Lowest, expr, false)) - .labelled(ParserLabel::Expression) + .labelled(ParsingRuleLabel::Expression) } // An expression is a single term followed by 0 or more (OP subexpression)* @@ -718,9 +728,9 @@ where { if precedence == Precedence::Highest { if is_type_expression { - type_expression_term(expr_parser).boxed().labelled(ParserLabel::Term) + type_expression_term(expr_parser).boxed().labelled(ParsingRuleLabel::Term) } else { - term(expr_parser).boxed().labelled(ParserLabel::Term) + term(expr_parser).boxed().labelled(ParsingRuleLabel::Term) } } else { let next_precedence = @@ -734,7 +744,7 @@ where .then(then_commit(operator_with_precedence(precedence), next_expr).repeated()) .foldl(create_infix_expression) .boxed() - .labelled(ParserLabel::Expression) + .labelled(ParsingRuleLabel::Expression) } } @@ -759,7 +769,7 @@ fn operator_with_precedence(precedence: Precedence) -> impl NoirParser(expr_parser: P) -> impl NoirParser From 78110be8805f9bfe106aecb89d6bc67db819db70 Mon Sep 17 00:00:00 2001 From: Joss Date: Tue, 9 May 2023 13:50:07 +0100 Subject: [PATCH 6/7] chore(parser): tidy iter usage --- crates/noirc_frontend/src/parser/errors.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/noirc_frontend/src/parser/errors.rs b/crates/noirc_frontend/src/parser/errors.rs index 513b33278ec..763c7cda767 100644 --- a/crates/noirc_frontend/src/parser/errors.rs +++ b/crates/noirc_frontend/src/parser/errors.rs @@ -67,8 +67,8 @@ impl ParserError { impl std::fmt::Display for ParserError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut expected = vecmap(self.expected_tokens.iter(), ToString::to_string); - expected.append(&mut vecmap(self.expected_labels.iter(), |label| format!("{label}"))); + let mut expected = vecmap(&self.expected_tokens, ToString::to_string); + expected.append(&mut vecmap(&self.expected_labels, |label| format!("{label}"))); if expected.is_empty() { write!(f, "Unexpected {} in input", self.found) From 32d1d566f957467a622ccd4c86c37cc4a2bd2b90 Mon Sep 17 00:00:00 2001 From: Joss Date: Tue, 9 May 2023 18:14:57 +0100 Subject: [PATCH 7/7] chore(parser): tweak SmallOrdSet sizes --- crates/noirc_frontend/src/parser/errors.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/crates/noirc_frontend/src/parser/errors.rs b/crates/noirc_frontend/src/parser/errors.rs index 763c7cda767..d4a294482a8 100644 --- a/crates/noirc_frontend/src/parser/errors.rs +++ b/crates/noirc_frontend/src/parser/errors.rs @@ -32,10 +32,13 @@ pub enum ParserErrorReason { /// reason we use `SmallOrdSet` to avoid heap allocations for as long as possible - this greatly /// inflates the size of the error, but this is justified by a resulting increase in parsing /// speeds of approximately 40% in release mode. +/// +/// Both `expected_tokens` and `expected_labels` use `SmallOrdSet` sized 1. In the of labels this +/// is optimal. In the of tokens we stop here due to fast diminishing returns. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserError { - expected_tokens: SmallOrdSet<[Token; 3]>, - expected_labels: SmallOrdSet<[ParsingRuleLabel; 3]>, + expected_tokens: SmallOrdSet<[Token; 1]>, + expected_labels: SmallOrdSet<[ParsingRuleLabel; 1]>, found: Token, reason: Option, span: Span,