Merge pull request #1792 from rust-bakery/nom-language

Introduce the nom-language crate
rust-bakery · Dec 8, 2024 · 555eab9 · 555eab9
2 parents c5c8f49 + 6a25312
commit 555eab9
Show file tree

Hide file tree

Showing 15 changed files with 1,046 additions and 286 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -41,7 +41,7 @@ default-features = false
 [dev-dependencies]
 doc-comment = "0.3"
 proptest = "=1.0.0"
-
+nom-language = { path = "./nom-language" }
 
 [package.metadata.docs.rs]
 features = ["alloc", "std", "docsrs"]
@@ -66,6 +66,10 @@ name = "css"
 [[test]]
 name = "custom_errors"
 
+[[test]]
+name = "expression_ast"
+required-features = ["alloc"]
+
 [[test]]
 name = "float"
 
@@ -142,4 +146,4 @@ coveralls = { repository = "Geal/nom", branch = "main", service = "github" }
 maintenance = { status = "actively-developed" }
 
 [workspace]
-members = [".", "benchmarks/"]
+members = [".", "benchmarks/", "nom-language"]
diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml
@@ -56,3 +56,4 @@ harness = false
 
 [dev-dependencies]
 codspeed-criterion-compat = "2.4.1"
+nom-language = { path = "../nom-language" }
diff --git a/benchmarks/benches/json.rs b/benchmarks/benches/json.rs
@@ -7,13 +7,14 @@ use nom::{
   bytes::{tag, take},
   character::{anychar, char, multispace0, none_of},
   combinator::{map, map_opt, map_res, value, verify},
-  error::{Error, ErrorKind, FromExternalError, ParseError, VerboseError},
+  error::{Error, ErrorKind, FromExternalError, ParseError},
   multi::{fold, separated_list0},
   number::double,
   number::recognize_float,
   sequence::{delimited, preceded, separated_pair},
   Check, Complete, Emit, IResult, Mode, OutputM, Parser,
 };
+use nom_language::error::VerboseError;
 
 use std::{collections::HashMap, marker::PhantomData, num::ParseIntError};
 

diff --git a/doc/choosing_a_combinator.md b/doc/choosing_a_combinator.md
@@ -106,6 +106,7 @@ The following parsers could be found on [docs.rs number section](https://docs.rs
 
 - [`escaped`](https://docs.rs/nom/latest/nom/bytes/complete/fn.escaped.html): Matches a byte string with escaped characters
 - [`escaped_transform`](https://docs.rs/nom/latest/nom/bytes/complete/fn.escaped_transform.html): Matches a byte string with escaped characters, and returns a new string with the escaped characters replaced
+- [`precedence`](https://docs.rs/nom/latest/nom/precedence/fn.precedence.html): Parses an expression with regards to operator precedence
 
 ## Binary format parsing
 

diff --git a/examples/json.rs b/examples/json.rs
@@ -5,12 +5,13 @@ use nom::{
   bytes::complete::{escaped, tag, take_while},
   character::complete::{alphanumeric1 as alphanumeric, char, one_of},
   combinator::{cut, map, opt, value},
-  error::{context, convert_error, ContextError, ErrorKind, ParseError, VerboseError},
+  error::{context, ContextError, ErrorKind, ParseError},
   multi::separated_list0,
   number::complete::double,
   sequence::{delimited, preceded, separated_pair, terminated},
   Err, IResult, Parser,
 };
+use nom_language::error::{convert_error, VerboseError};
 use std::collections::HashMap;
 use std::str;
 

diff --git a/examples/s_expression.rs b/examples/s_expression.rs
@@ -9,11 +9,12 @@ use nom::{
   bytes::complete::tag,
   character::complete::{alpha1, char, digit1, multispace0, multispace1, one_of},
   combinator::{cut, map, map_res, opt},
-  error::{context, VerboseError},
+  error::context,
   multi::many,
   sequence::{delimited, preceded, terminated},
   IResult, Parser,
 };
+use nom_language::error::VerboseError;
 
 /// We start by defining the types that define the shape of data that we want.
 /// In this case, we want something tree-like

diff --git a/nom-language/Cargo.toml b/nom-language/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "nom-language"
+version = "0.0.1"
+authors = ["[email protected]"]
+description = "Language parsing focused combinators for the nom parser library"
+edition = "2021"
+license = "MIT"
+repository = "https://github.com/rust-bakery/nom"
+
+[dependencies]
+nom = { path = "..", version = "8.0.0-alpha2" }
diff --git a/nom-language/src/error.rs b/nom-language/src/error.rs
@@ -0,0 +1,262 @@
+use std::fmt;
+
+use nom::{
+  error::{ContextError, ErrorKind, FromExternalError, ParseError},
+  ErrorConvert,
+};
+
+/// This error type accumulates errors and their position when backtracking
+/// through a parse tree. With some post processing,
+/// it can be used to display user friendly error messages
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct VerboseError<I> {
+  /// List of errors accumulated by `VerboseError`, containing the affected
+  /// part of input data, and some context
+  pub errors: Vec<(I, VerboseErrorKind)>,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+/// Error context for `VerboseError`
+pub enum VerboseErrorKind {
+  /// Static string added by the `context` function
+  Context(&'static str),
+  /// Indicates which character was expected by the `char` function
+  Char(char),
+  /// Error kind given by various nom parsers
+  Nom(ErrorKind),
+}
+
+impl<I> ParseError<I> for VerboseError<I> {
+  fn from_error_kind(input: I, kind: ErrorKind) -> Self {
+    VerboseError {
+      errors: vec![(input, VerboseErrorKind::Nom(kind))],
+    }
+  }
+
+  fn append(input: I, kind: ErrorKind, mut other: Self) -> Self {
+    other.errors.push((input, VerboseErrorKind::Nom(kind)));
+    other
+  }
+
+  fn from_char(input: I, c: char) -> Self {
+    VerboseError {
+      errors: vec![(input, VerboseErrorKind::Char(c))],
+    }
+  }
+}
+
+impl<I> ContextError<I> for VerboseError<I> {
+  fn add_context(input: I, ctx: &'static str, mut other: Self) -> Self {
+    other.errors.push((input, VerboseErrorKind::Context(ctx)));
+    other
+  }
+}
+
+impl<I, E> FromExternalError<I, E> for VerboseError<I> {
+  /// Create a new error from an input position and an external error
+  fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self {
+    Self::from_error_kind(input, kind)
+  }
+}
+
+impl<I: fmt::Display> fmt::Display for VerboseError<I> {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    writeln!(f, "Parse error:")?;
+    for (input, error) in &self.errors {
+      match error {
+        VerboseErrorKind::Nom(e) => writeln!(f, "{:?} at: {}", e, input)?,
+        VerboseErrorKind::Char(c) => writeln!(f, "expected '{}' at: {}", c, input)?,
+        VerboseErrorKind::Context(s) => writeln!(f, "in section '{}', at: {}", s, input)?,
+      }
+    }
+
+    Ok(())
+  }
+}
+
+impl<I: fmt::Debug + fmt::Display> std::error::Error for VerboseError<I> {}
+
+impl From<VerboseError<&[u8]>> for VerboseError<Vec<u8>> {
+  fn from(value: VerboseError<&[u8]>) -> Self {
+    VerboseError {
+      errors: value
+        .errors
+        .into_iter()
+        .map(|(i, e)| (i.to_owned(), e))
+        .collect(),
+    }
+  }
+}
+
+impl From<VerboseError<&str>> for VerboseError<String> {
+  fn from(value: VerboseError<&str>) -> Self {
+    VerboseError {
+      errors: value
+        .errors
+        .into_iter()
+        .map(|(i, e)| (i.to_owned(), e))
+        .collect(),
+    }
+  }
+}
+
+impl<I> ErrorConvert<VerboseError<I>> for VerboseError<(I, usize)> {
+  fn convert(self) -> VerboseError<I> {
+    VerboseError {
+      errors: self.errors.into_iter().map(|(i, e)| (i.0, e)).collect(),
+    }
+  }
+}
+
+impl<I> ErrorConvert<VerboseError<(I, usize)>> for VerboseError<I> {
+  fn convert(self) -> VerboseError<(I, usize)> {
+    VerboseError {
+      errors: self.errors.into_iter().map(|(i, e)| ((i, 0), e)).collect(),
+    }
+  }
+}
+
+/// Transforms a `VerboseError` into a trace with input position information
+///
+/// The errors contain references to input data that must come from `input`,
+/// because nom calculates byte offsets between them
+pub fn convert_error<I: core::ops::Deref<Target = str>>(input: I, e: VerboseError<I>) -> String {
+  use nom::Offset;
+  use std::fmt::Write;
+
+  let mut result = String::new();
+
+  for (i, (substring, kind)) in e.errors.iter().enumerate() {
+    let offset = input.offset(substring);
+
+    if input.is_empty() {
+      match kind {
+        VerboseErrorKind::Char(c) => {
+          write!(&mut result, "{}: expected '{}', got empty input\n\n", i, c)
+        }
+        VerboseErrorKind::Context(s) => write!(&mut result, "{}: in {}, got empty input\n\n", i, s),
+        VerboseErrorKind::Nom(e) => write!(&mut result, "{}: in {:?}, got empty input\n\n", i, e),
+      }
+    } else {
+      let prefix = &input.as_bytes()[..offset];
+
+      // Count the number of newlines in the first `offset` bytes of input
+      let line_number = prefix.iter().filter(|&&b| b == b'\n').count() + 1;
+
+      // Find the line that includes the subslice:
+      // Find the *last* newline before the substring starts
+      let line_begin = prefix
+        .iter()
+        .rev()
+        .position(|&b| b == b'\n')
+        .map(|pos| offset - pos)
+        .unwrap_or(0);
+
+      // Find the full line after that newline
+      let line = input[line_begin..]
+        .lines()
+        .next()
+        .unwrap_or(&input[line_begin..])
+        .trim_end();
+
+      // The (1-indexed) column number is the offset of our substring into that line
+      let column_number = line.offset(substring) + 1;
+
+      match kind {
+        VerboseErrorKind::Char(c) => {
+          if let Some(actual) = substring.chars().next() {
+            write!(
+              &mut result,
+              "{i}: at line {line_number}:\n\
+               {line}\n\
+               {caret:>column$}\n\
+               expected '{expected}', found {actual}\n\n",
+              i = i,
+              line_number = line_number,
+              line = line,
+              caret = '^',
+              column = column_number,
+              expected = c,
+              actual = actual,
+            )
+          } else {
+            write!(
+              &mut result,
+              "{i}: at line {line_number}:\n\
+               {line}\n\
+               {caret:>column$}\n\
+               expected '{expected}', got end of input\n\n",
+              i = i,
+              line_number = line_number,
+              line = line,
+              caret = '^',
+              column = column_number,
+              expected = c,
+            )
+          }
+        }
+        VerboseErrorKind::Context(s) => write!(
+          &mut result,
+          "{i}: at line {line_number}, in {context}:\n\
+             {line}\n\
+             {caret:>column$}\n\n",
+          i = i,
+          line_number = line_number,
+          context = s,
+          line = line,
+          caret = '^',
+          column = column_number,
+        ),
+        VerboseErrorKind::Nom(e) => write!(
+          &mut result,
+          "{i}: at line {line_number}, in {nom_err:?}:\n\
+             {line}\n\
+             {caret:>column$}\n\n",
+          i = i,
+          line_number = line_number,
+          nom_err = e,
+          line = line,
+          caret = '^',
+          column = column_number,
+        ),
+      }
+    }
+    // Because `write!` to a `String` is infallible, this `unwrap` is fine.
+    .unwrap();
+  }
+
+  result
+}
+
+#[test]
+fn convert_error_panic() {
+  use nom::character::complete::char;
+  use nom::IResult;
+
+  let input = "";
+
+  let _result: IResult<_, _, VerboseError<&str>> = char('x')(input);
+}
+
+#[test]
+fn issue_1027_convert_error_panic_nonempty() {
+  use nom::character::complete::char;
+  use nom::sequence::pair;
+  use nom::Err;
+  use nom::IResult;
+  use nom::Parser;
+
+  let input = "a";
+
+  let result: IResult<_, _, VerboseError<&str>> = pair(char('a'), char('b')).parse(input);
+  let err = match result.unwrap_err() {
+    Err::Error(e) => e,
+    _ => unreachable!(),
+  };
+
+  let msg = convert_error(input, err);
+  assert_eq!(
+    msg,
+    "0: at line 1:\na\n ^\nexpected \'b\', got end of input\n\n"
+  );
+}
diff --git a/nom-language/src/lib.rs b/nom-language/src/lib.rs
@@ -0,0 +1,9 @@
+//! # Langage parsing combinators for the nom parser combinators library
+//!
+//! nom is a parser combinator library with a focus on safe parsing,
+//! streaming patterns, and zero copy.
+//! While nom provides general purpose combinators, this crate is targeted
+//! at language parsing.
+
+pub mod error;
+pub mod precedence;
Original file line number	Diff line number	Diff line change
Expand Up		@@ -56,3 +56,4 @@ harness = false

		[dev-dependencies]
		codspeed-criterion-compat = "2.4.1"
		nom-language = { path = "../nom-language" }