Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(parser): Parser error optimisation #1292

Merged
merged 8 commits into from
May 9, 2023
Merged
2 changes: 1 addition & 1 deletion crates/noirc_frontend/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ impl fmt::Display for Token {
}
}

#[derive(PartialEq, Eq, Hash, Debug, Clone)]
#[derive(PartialEq, Eq, Hash, Debug, Clone, Ord, PartialOrd)]
/// The different kinds of tokens that are possible in the target language
pub enum TokenKind {
Token(Token),
Expand Down
37 changes: 17 additions & 20 deletions crates/noirc_frontend/src/parser/errors.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
use std::collections::BTreeSet;

use crate::lexer::token::Token;
use crate::BinaryOp;
use late_alloc_set::LateAllocSet;

use iter_extended::vecmap;
use noirc_errors::CustomDiagnostic as Diagnostic;
use noirc_errors::Span;

use super::labels::ParserLabel;

mod late_alloc_set;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParserError {
expected_tokens: BTreeSet<Token>,
expected_labels: BTreeSet<String>,
expected_tokens: LateAllocSet<Token>,
expected_labels: LateAllocSet<ParserLabel>,
found: Token,
reason: Option<String>,
span: Span,
Expand All @@ -19,21 +22,15 @@ pub struct ParserError {
impl ParserError {
pub fn empty(found: Token, span: Span) -> ParserError {
ParserError {
expected_tokens: BTreeSet::new(),
expected_labels: BTreeSet::new(),
expected_tokens: LateAllocSet::new(),
expected_labels: LateAllocSet::new(),
found,
reason: None,
span,
}
}

pub fn expected(token: Token, found: Token, span: Span) -> ParserError {
let mut error = ParserError::empty(found, span);
error.expected_tokens.insert(token);
error
}

pub fn expected_label(label: String, found: Token, span: Span) -> ParserError {
pub fn expected_label(label: ParserLabel, found: Token, span: Span) -> ParserError {
let mut error = ParserError::empty(found, span);
error.expected_labels.insert(label);
error
Expand All @@ -58,8 +55,8 @@ impl ParserError {

impl std::fmt::Display for ParserError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut expected = vecmap(&self.expected_tokens, ToString::to_string);
expected.append(&mut vecmap(&self.expected_labels, Clone::clone));
let mut expected = vecmap(self.expected_tokens.as_vec(), ToString::to_string);
expected.append(&mut vecmap(self.expected_labels.as_vec(), |ref_str| format!("{ref_str}")));

if expected.is_empty() {
write!(f, "Unexpected {} in input", self.found)
Expand Down Expand Up @@ -95,15 +92,15 @@ impl From<ParserError> for Diagnostic {

impl chumsky::Error<Token> for ParserError {
type Span = Span;
type Label = String;
type Label = ParserLabel;

fn expected_input_found<Iter>(span: Self::Span, expected: Iter, found: Option<Token>) -> Self
where
Iter: IntoIterator<Item = Option<Token>>,
{
ParserError {
expected_tokens: expected.into_iter().map(|opt| opt.unwrap_or(Token::EOF)).collect(),
expected_labels: BTreeSet::new(),
expected_labels: LateAllocSet::new(),
found: found.unwrap_or(Token::EOF),
reason: None,
span,
Expand All @@ -122,9 +119,9 @@ impl chumsky::Error<Token> for ParserError {
// that reason and discard the other if present.
// The spans of both errors must match, otherwise the error
// messages and error spans may not line up.
fn merge(mut self, mut other: Self) -> Self {
self.expected_tokens.append(&mut other.expected_tokens);
self.expected_labels.append(&mut other.expected_labels);
fn merge(mut self, other: Self) -> Self {
self.expected_tokens.append(other.expected_tokens);
self.expected_labels.append(other.expected_labels);

if self.reason.is_none() {
self.reason = other.reason;
Expand Down
236 changes: 236 additions & 0 deletions crates/noirc_frontend/src/parser/errors/late_alloc_set.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
//! `LateAllocSet` is an alternative to `BTreeSet` optimized for small sets that can be located
//! entirely in stack memory. Once the set size goes beyond 3, performance is less than that of a
//! `BTreeMap`.
//!
//! Approximately 20-50 times faster than `BTreeSet` it is beyond three elements - at which point
//! it switches to using a `BTreeSet` internally. This container makes sense for short lived sets
//! that very rarely go beyond 3 elements, and for which the elements can be represented entirely
//! in stack memory.
//!
//! This set's size is at least 3 times the size of it's element's, so it is not suitable to be
//! held as an item type in larger parent collections.
//!
//! Below - time taken to insert Nth element one millions times for differing types, sampled by
//! running `inserts_different_types` in tests below on a 2.3 GHz MacBook Pro (2019).
//!
//! | Nth insert | &str | u32 | Token | String |
//! |------------|-------------|-------------|-------------|-------------|
//! | 0 -> 1 | 29.425ms | 27.088ms | 47.936ms | 150.282ms |
//! | 1 -> 2 | 33.252ms | 29.752ms | 60.845ms | 301.634ms |
//! | 2 -> 3 | 35.657ms | 31.898ms | 79.367ms | 487.948ms |
//! | 3 -> 4 | 1,324.44ms | 1,079.197ms | 1,846.823ms | 2,225.094ms |
//! | 4 -> 5 | 1,482.358ms | 1,231.839ms | 1,918.353ms | 2,541.392ms |

use std::collections::BTreeSet;

#[derive(Debug, Clone, PartialEq, Eq)]
enum LateAllocSetData<T> {
None,
One(T),
Two(T, T),
Three(T, T, T),
Set(BTreeSet<T>),
}
jfecher marked this conversation as resolved.
Show resolved Hide resolved

#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) struct LateAllocSet<T> {
data: LateAllocSetData<T>,
}

impl<T> LateAllocSet<T>
where
T: std::cmp::Ord,
{
pub(super) fn new() -> Self {
LateAllocSet { data: LateAllocSetData::None }
}

pub(super) fn insert(&mut self, x: T) {
let old_data = std::mem::replace(&mut self.data, LateAllocSetData::None);
self.data = match old_data {
LateAllocSetData::None => LateAllocSetData::One(x),
LateAllocSetData::One(x0) => {
if x0 == x {
LateAllocSetData::One(x0)
} else {
LateAllocSetData::Two(x0, x)
}
}
LateAllocSetData::Two(x0, x1) => {
if x0 == x || x1 == x {
LateAllocSetData::Two(x0, x1)
} else {
LateAllocSetData::Three(x0, x1, x)
}
}
LateAllocSetData::Three(x0, x1, x2) => {
if x0 == x || x1 == x || x2 == x {
LateAllocSetData::Three(x0, x1, x2)
} else {
LateAllocSetData::Set(BTreeSet::from([x0, x1, x2, x]))
}
}
LateAllocSetData::Set(mut xs) => {
xs.insert(x);
LateAllocSetData::Set(xs)
}
};
}

pub(super) fn as_vec(&self) -> Vec<&T> {
match &self.data {
LateAllocSetData::None => vec![],
LateAllocSetData::One(x0) => vec![x0],
LateAllocSetData::Two(x0, x1) => vec![x0, x1],
LateAllocSetData::Three(x0, x1, x2) => vec![x0, x1, x2],
LateAllocSetData::Set(xs) => xs.iter().collect::<Vec<_>>(),
}
}

pub(super) fn append(&mut self, other: LateAllocSet<T>) {
match other.data {
LateAllocSetData::None => {
// No work
}
LateAllocSetData::One(x0) => self.insert(x0),
LateAllocSetData::Two(x0, x1) => {
self.insert(x0);
self.insert(x1);
}
LateAllocSetData::Three(x0, x1, x2) => {
self.insert(x0);
self.insert(x1);
self.insert(x2);
}
LateAllocSetData::Set(xs) => {
for x in xs {
self.insert(x);
}
}
}
}

pub(super) fn clear(&mut self) {
self.data = LateAllocSetData::None;
}
}

impl<T> FromIterator<T> for LateAllocSetData<T>
where
T: std::cmp::Ord,
{
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
let mut iter = iter.into_iter();
let first = iter.next();
if first.is_none() {
return LateAllocSetData::None;
}
let second = iter.next();
if second.is_none() {
return LateAllocSetData::One(first.unwrap());
}
let third = iter.next();
if third.is_none() {
return LateAllocSetData::Two(first.unwrap(), second.unwrap());
}
let fourth = iter.next();
if fourth.is_none() {
return LateAllocSetData::Three(first.unwrap(), second.unwrap(), third.unwrap());
}
let btree_set: BTreeSet<T> =
[first.unwrap(), second.unwrap(), third.unwrap(), fourth.unwrap()]
.into_iter()
.chain(iter)
.collect();
LateAllocSetData::Set(btree_set)
}
}

impl<T> FromIterator<T> for LateAllocSet<T>
where
T: std::cmp::Ord,
{
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
let data: LateAllocSetData<T> = iter.into_iter().collect();
LateAllocSet { data }
}
}
#[cfg(test)]
mod tests {
use std::{collections::BTreeSet, time::SystemTime};

use super::{LateAllocSet, LateAllocSetData};
use crate::token::Token;

fn time_1m<F>(f: F)
where
F: Fn(),
{
let start = SystemTime::now();
for _ in 0..1000000 {
f();
}
println!("{:?}", start.elapsed().unwrap());
}

fn time_1m_inserts_1_to_5<T, F0, F1, F2, F3, F4>(x0: F0, x1: F1, x2: F2, x3: F3, x4: F4)
where
T: std::cmp::Ord + Clone,
F0: Fn() -> T,
F1: Fn() -> T,
F2: Fn() -> T,
F3: Fn() -> T,
F4: Fn() -> T,
{
print!("0 -> 1: ");
time_1m(|| {
LateAllocSet { data: LateAllocSetData::None }.insert(x0());
});

print!("1 -> 2: ");
time_1m(|| {
LateAllocSet { data: LateAllocSetData::One(x0()) }.insert(x1());
});
print!("2 -> 3: ");
time_1m(|| {
LateAllocSet { data: LateAllocSetData::Two(x0(), x1()) }.insert(x2());
});
print!("3 -> 4: ");
time_1m(|| {
LateAllocSet { data: LateAllocSetData::Three(x0(), x1(), x2()) }.insert(x3());
});
print!("4 -> 5: ");
time_1m(|| {
LateAllocSet { data: LateAllocSetData::Set(BTreeSet::from([x0(), x1(), x2(), x3()])) }
.insert(x4());
});
}

#[test]
#[ignore]
fn inserts_different_types() {
println!("\nelement type: &str");
time_1m_inserts_1_to_5(|| "a", || "b", || "c", || "d", || "e");

println!("\nelement type: u32");
time_1m_inserts_1_to_5(|| 0, || 1, || 2, || 3, || 4);

println!("\nelement type: Token");
time_1m_inserts_1_to_5(
|| Token::Ampersand,
|| Token::Arrow,
|| Token::Assign,
|| Token::Bang,
|| Token::Caret,
);

println!("\nelement type: String");
time_1m_inserts_1_to_5(
|| String::from("a"),
|| String::from("b"),
|| String::from("c"),
|| String::from("d"),
|| String::from("e"),
);
}
}
40 changes: 40 additions & 0 deletions crates/noirc_frontend/src/parser/labels.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use std::fmt;

use crate::token::TokenKind;

#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
jfecher marked this conversation as resolved.
Show resolved Hide resolved
pub enum ParserLabel {
Atom,
BinaryOperator,
Cast,
Expression,
FieldAccess,
Global,
IntegerType,
Parameter,
Pattern,
Statement,
Term,
TypeExpression,
TokenKind(TokenKind),
}

impl fmt::Display for ParserLabel {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ParserLabel::Atom => write!(f, "atom"),
ParserLabel::BinaryOperator => write!(f, "binary operator"),
ParserLabel::Cast => write!(f, "cast"),
ParserLabel::Expression => write!(f, "expression"),
ParserLabel::FieldAccess => write!(f, "field access"),
ParserLabel::Global => write!(f, "global"),
ParserLabel::IntegerType => write!(f, "integer type"),
ParserLabel::Parameter => write!(f, "parameter"),
ParserLabel::Pattern => write!(f, "pattern"),
ParserLabel::Statement => write!(f, "statement"),
ParserLabel::Term => write!(f, "term"),
ParserLabel::TypeExpression => write!(f, "type expression"),
ParserLabel::TokenKind(token_kind) => write!(f, "{:?}", token_kind),
}
}
}
1 change: 1 addition & 0 deletions crates/noirc_frontend/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//! This file is mostly helper functions and types for the parser. For the parser itself,
//! see parser.rs. The definition of the abstract syntax tree can be found in the `ast` folder.
mod errors;
mod labels;
#[allow(clippy::module_inception)]
mod parser;

Expand Down
Loading