Skip to content

Commit

Permalink
Merge pull request #1792 from rust-bakery/nom-language
Browse files Browse the repository at this point in the history
Introduce the nom-language crate
  • Loading branch information
Geal authored Dec 8, 2024
2 parents c5c8f49 + 6a25312 commit 555eab9
Show file tree
Hide file tree
Showing 15 changed files with 1,046 additions and 286 deletions.
8 changes: 6 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ default-features = false
[dev-dependencies]
doc-comment = "0.3"
proptest = "=1.0.0"

nom-language = { path = "./nom-language" }

[package.metadata.docs.rs]
features = ["alloc", "std", "docsrs"]
Expand All @@ -66,6 +66,10 @@ name = "css"
[[test]]
name = "custom_errors"

[[test]]
name = "expression_ast"
required-features = ["alloc"]

[[test]]
name = "float"

Expand Down Expand Up @@ -142,4 +146,4 @@ coveralls = { repository = "Geal/nom", branch = "main", service = "github" }
maintenance = { status = "actively-developed" }

[workspace]
members = [".", "benchmarks/"]
members = [".", "benchmarks/", "nom-language"]
1 change: 1 addition & 0 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,4 @@ harness = false

[dev-dependencies]
codspeed-criterion-compat = "2.4.1"
nom-language = { path = "../nom-language" }
3 changes: 2 additions & 1 deletion benchmarks/benches/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ use nom::{
bytes::{tag, take},
character::{anychar, char, multispace0, none_of},
combinator::{map, map_opt, map_res, value, verify},
error::{Error, ErrorKind, FromExternalError, ParseError, VerboseError},
error::{Error, ErrorKind, FromExternalError, ParseError},
multi::{fold, separated_list0},
number::double,
number::recognize_float,
sequence::{delimited, preceded, separated_pair},
Check, Complete, Emit, IResult, Mode, OutputM, Parser,
};
use nom_language::error::VerboseError;

use std::{collections::HashMap, marker::PhantomData, num::ParseIntError};

Expand Down
1 change: 1 addition & 0 deletions doc/choosing_a_combinator.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ The following parsers could be found on [docs.rs number section](https://docs.rs

- [`escaped`](https://docs.rs/nom/latest/nom/bytes/complete/fn.escaped.html): Matches a byte string with escaped characters
- [`escaped_transform`](https://docs.rs/nom/latest/nom/bytes/complete/fn.escaped_transform.html): Matches a byte string with escaped characters, and returns a new string with the escaped characters replaced
- [`precedence`](https://docs.rs/nom/latest/nom/precedence/fn.precedence.html): Parses an expression with regards to operator precedence

## Binary format parsing

Expand Down
3 changes: 2 additions & 1 deletion examples/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ use nom::{
bytes::complete::{escaped, tag, take_while},
character::complete::{alphanumeric1 as alphanumeric, char, one_of},
combinator::{cut, map, opt, value},
error::{context, convert_error, ContextError, ErrorKind, ParseError, VerboseError},
error::{context, ContextError, ErrorKind, ParseError},
multi::separated_list0,
number::complete::double,
sequence::{delimited, preceded, separated_pair, terminated},
Err, IResult, Parser,
};
use nom_language::error::{convert_error, VerboseError};
use std::collections::HashMap;
use std::str;

Expand Down
3 changes: 2 additions & 1 deletion examples/s_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ use nom::{
bytes::complete::tag,
character::complete::{alpha1, char, digit1, multispace0, multispace1, one_of},
combinator::{cut, map, map_res, opt},
error::{context, VerboseError},
error::context,
multi::many,
sequence::{delimited, preceded, terminated},
IResult, Parser,
};
use nom_language::error::VerboseError;

/// We start by defining the types that define the shape of data that we want.
/// In this case, we want something tree-like
Expand Down
11 changes: 11 additions & 0 deletions nom-language/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "nom-language"
version = "0.0.1"
authors = ["[email protected]"]
description = "Language parsing focused combinators for the nom parser library"
edition = "2021"
license = "MIT"
repository = "https://github.com/rust-bakery/nom"

[dependencies]
nom = { path = "..", version = "8.0.0-alpha2" }
262 changes: 262 additions & 0 deletions nom-language/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
use std::fmt;

use nom::{
error::{ContextError, ErrorKind, FromExternalError, ParseError},
ErrorConvert,
};

/// This error type accumulates errors and their position when backtracking
/// through a parse tree. With some post processing,
/// it can be used to display user friendly error messages
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct VerboseError<I> {
/// List of errors accumulated by `VerboseError`, containing the affected
/// part of input data, and some context
pub errors: Vec<(I, VerboseErrorKind)>,
}

#[derive(Clone, Debug, Eq, PartialEq)]
/// Error context for `VerboseError`
pub enum VerboseErrorKind {
/// Static string added by the `context` function
Context(&'static str),
/// Indicates which character was expected by the `char` function
Char(char),
/// Error kind given by various nom parsers
Nom(ErrorKind),
}

impl<I> ParseError<I> for VerboseError<I> {
fn from_error_kind(input: I, kind: ErrorKind) -> Self {
VerboseError {
errors: vec![(input, VerboseErrorKind::Nom(kind))],
}
}

fn append(input: I, kind: ErrorKind, mut other: Self) -> Self {
other.errors.push((input, VerboseErrorKind::Nom(kind)));
other
}

fn from_char(input: I, c: char) -> Self {
VerboseError {
errors: vec![(input, VerboseErrorKind::Char(c))],
}
}
}

impl<I> ContextError<I> for VerboseError<I> {
fn add_context(input: I, ctx: &'static str, mut other: Self) -> Self {
other.errors.push((input, VerboseErrorKind::Context(ctx)));
other
}
}

impl<I, E> FromExternalError<I, E> for VerboseError<I> {
/// Create a new error from an input position and an external error
fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self {
Self::from_error_kind(input, kind)
}
}

impl<I: fmt::Display> fmt::Display for VerboseError<I> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "Parse error:")?;
for (input, error) in &self.errors {
match error {
VerboseErrorKind::Nom(e) => writeln!(f, "{:?} at: {}", e, input)?,
VerboseErrorKind::Char(c) => writeln!(f, "expected '{}' at: {}", c, input)?,
VerboseErrorKind::Context(s) => writeln!(f, "in section '{}', at: {}", s, input)?,
}
}

Ok(())
}
}

impl<I: fmt::Debug + fmt::Display> std::error::Error for VerboseError<I> {}

impl From<VerboseError<&[u8]>> for VerboseError<Vec<u8>> {
fn from(value: VerboseError<&[u8]>) -> Self {
VerboseError {
errors: value
.errors
.into_iter()
.map(|(i, e)| (i.to_owned(), e))
.collect(),
}
}
}

impl From<VerboseError<&str>> for VerboseError<String> {
fn from(value: VerboseError<&str>) -> Self {
VerboseError {
errors: value
.errors
.into_iter()
.map(|(i, e)| (i.to_owned(), e))
.collect(),
}
}
}

impl<I> ErrorConvert<VerboseError<I>> for VerboseError<(I, usize)> {
fn convert(self) -> VerboseError<I> {
VerboseError {
errors: self.errors.into_iter().map(|(i, e)| (i.0, e)).collect(),
}
}
}

impl<I> ErrorConvert<VerboseError<(I, usize)>> for VerboseError<I> {
fn convert(self) -> VerboseError<(I, usize)> {
VerboseError {
errors: self.errors.into_iter().map(|(i, e)| ((i, 0), e)).collect(),
}
}
}

/// Transforms a `VerboseError` into a trace with input position information
///
/// The errors contain references to input data that must come from `input`,
/// because nom calculates byte offsets between them
pub fn convert_error<I: core::ops::Deref<Target = str>>(input: I, e: VerboseError<I>) -> String {
use nom::Offset;
use std::fmt::Write;

let mut result = String::new();

for (i, (substring, kind)) in e.errors.iter().enumerate() {
let offset = input.offset(substring);

if input.is_empty() {
match kind {
VerboseErrorKind::Char(c) => {
write!(&mut result, "{}: expected '{}', got empty input\n\n", i, c)
}
VerboseErrorKind::Context(s) => write!(&mut result, "{}: in {}, got empty input\n\n", i, s),
VerboseErrorKind::Nom(e) => write!(&mut result, "{}: in {:?}, got empty input\n\n", i, e),
}
} else {
let prefix = &input.as_bytes()[..offset];

// Count the number of newlines in the first `offset` bytes of input
let line_number = prefix.iter().filter(|&&b| b == b'\n').count() + 1;

// Find the line that includes the subslice:
// Find the *last* newline before the substring starts
let line_begin = prefix
.iter()
.rev()
.position(|&b| b == b'\n')
.map(|pos| offset - pos)
.unwrap_or(0);

// Find the full line after that newline
let line = input[line_begin..]
.lines()
.next()
.unwrap_or(&input[line_begin..])
.trim_end();

// The (1-indexed) column number is the offset of our substring into that line
let column_number = line.offset(substring) + 1;

match kind {
VerboseErrorKind::Char(c) => {
if let Some(actual) = substring.chars().next() {
write!(
&mut result,
"{i}: at line {line_number}:\n\
{line}\n\
{caret:>column$}\n\
expected '{expected}', found {actual}\n\n",
i = i,
line_number = line_number,
line = line,
caret = '^',
column = column_number,
expected = c,
actual = actual,
)
} else {
write!(
&mut result,
"{i}: at line {line_number}:\n\
{line}\n\
{caret:>column$}\n\
expected '{expected}', got end of input\n\n",
i = i,
line_number = line_number,
line = line,
caret = '^',
column = column_number,
expected = c,
)
}
}
VerboseErrorKind::Context(s) => write!(
&mut result,
"{i}: at line {line_number}, in {context}:\n\
{line}\n\
{caret:>column$}\n\n",
i = i,
line_number = line_number,
context = s,
line = line,
caret = '^',
column = column_number,
),
VerboseErrorKind::Nom(e) => write!(
&mut result,
"{i}: at line {line_number}, in {nom_err:?}:\n\
{line}\n\
{caret:>column$}\n\n",
i = i,
line_number = line_number,
nom_err = e,
line = line,
caret = '^',
column = column_number,
),
}
}
// Because `write!` to a `String` is infallible, this `unwrap` is fine.
.unwrap();
}

result
}

#[test]
fn convert_error_panic() {
use nom::character::complete::char;
use nom::IResult;

let input = "";

let _result: IResult<_, _, VerboseError<&str>> = char('x')(input);
}

#[test]
fn issue_1027_convert_error_panic_nonempty() {
use nom::character::complete::char;
use nom::sequence::pair;
use nom::Err;
use nom::IResult;
use nom::Parser;

let input = "a";

let result: IResult<_, _, VerboseError<&str>> = pair(char('a'), char('b')).parse(input);
let err = match result.unwrap_err() {
Err::Error(e) => e,
_ => unreachable!(),
};

let msg = convert_error(input, err);
assert_eq!(
msg,
"0: at line 1:\na\n ^\nexpected \'b\', got end of input\n\n"
);
}
9 changes: 9 additions & 0 deletions nom-language/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
//! # Langage parsing combinators for the nom parser combinators library
//!
//! nom is a parser combinator library with a focus on safe parsing,
//! streaming patterns, and zero copy.
//! While nom provides general purpose combinators, this crate is targeted
//! at language parsing.
pub mod error;
pub mod precedence;
Loading

0 comments on commit 555eab9

Please sign in to comment.