Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Feature multi range #1402

Closed
wants to merge 38 commits into from
Closed
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
1da834c
Add NomRange type and implement range argument for many
cenodis Sep 17, 2021
f2236c8
Add range fold parser
cenodis Sep 17, 2021
c8de0b4
Use core::ops::RangeBounds instead
cenodis Sep 17, 2021
0dd44ca
Allow conversion from usize to RangeBounds<usize>
cenodis Sep 17, 2021
1ee3ee4
Add documentation
cenodis Sep 17, 2021
9644b7f
Fix formatting
cenodis Sep 17, 2021
8e5c840
Move IntoRangeBounds trait into traits.rs
cenodis Sep 17, 2021
2dad275
Deprecate obsolete functions
cenodis Sep 17, 2021
cd48ff0
Rename fold_many to fold
cenodis Sep 17, 2021
d67bafb
Add proper error types, Fix corner cases with unbounded and inclusive…
cenodis Sep 18, 2021
3f814c5
Add tests for usize -> range. Document behaviour of unbounded ranges.
cenodis Sep 18, 2021
346c738
Migrate examples and tests to use ranged parsers instead
cenodis Sep 18, 2021
bc7a403
Offload range iteration to a seperate struct
cenodis Sep 18, 2021
d62f02e
Rename range iterators and add proper documentation
cenodis Sep 18, 2021
617a847
Fix edge case when Bound::Included(usize::MAX)
cenodis Sep 18, 2021
b103e07
Fix warning and add test for exclusive ranges
cenodis Sep 18, 2021
9ad22e8
Use core instead of std, Use deprecated version of usize::MAX to rema…
cenodis Sep 18, 2021
5c0fb7f
Add alloc feature flag for fold tests
cenodis Sep 18, 2021
de0d6ec
Remove println macros
cenodis Sep 18, 2021
a37a728
Better documentation for the range parameter
cenodis Sep 19, 2021
b2121ca
Use explicit for loops instead of iterators
cenodis Sep 19, 2021
d5b1d1c
Use monomorphisation instead of generic RangeBounds
cenodis Sep 21, 2021
207b714
Fix some edge cases, Add documentation
cenodis Sep 22, 2021
8f5da05
Use core::usize::MAX because of MSRV
cenodis Sep 22, 2021
b996058
Disambiguate range methods
cenodis Sep 22, 2021
ea2162e
Use NomRange trait for many
cenodis Sep 22, 2021
eb0f2e2
Fix tests
cenodis Sep 22, 2021
acbacc7
Handle edge cases in trait and remove obsolete map in parsers
cenodis Sep 23, 2021
01ae1a3
Elide subtraction in inclusive ranges
cenodis Sep 24, 2021
63cb930
Elide end bound subtraction where possible
cenodis Sep 24, 2021
f6950a4
Improve and organize tests for many
cenodis Sep 24, 2021
ecc51eb
Cleanup tests
cenodis Sep 25, 2021
ca6eee0
Cleanup
cenodis Sep 25, 2021
9e44ba4
Replace deprecated parsers in bench tests
cenodis Sep 25, 2021
1a4c2b3
Add feature flag to bounds import
cenodis Sep 25, 2021
52d801f
Correct iterator documentation
cenodis Sep 25, 2021
9e66291
Change inversion check to remain compatible with rust 1.41.1
cenodis Sep 25, 2021
fdd720f
Remove deprecation warning
cenodis Sep 25, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions examples/s_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use nom::{
character::complete::{alpha1, char, digit1, multispace0, multispace1, one_of},
combinator::{cut, map, map_res, opt},
error::{context, VerboseError},
multi::many0,
multi::many,
sequence::{delimited, preceded, terminated, tuple},
IResult, Parser,
};
Expand Down Expand Up @@ -176,7 +176,7 @@ where
/// `tuple` is used to sequence parsers together, so we can translate this directly
/// and then map over it to transform the output into an `Expr::Application`
fn parse_application<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
let application_inner = map(tuple((parse_expr, many0(parse_expr))), |(head, tail)| {
let application_inner = map(tuple((parse_expr, many(0.., parse_expr))), |(head, tail)| {
Expr::Application(Box::new(head), tail)
});
// finally, we wrap it in an s-expression
Expand Down Expand Up @@ -226,7 +226,7 @@ fn parse_quote<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>>
// we find the `'` (quote) character, use cut to say that we're unambiguously
// looking for an s-expression of 0 or more expressions, and then parse them
map(
context("quote", preceded(tag("'"), cut(s_exp(many0(parse_expr))))),
context("quote", preceded(tag("'"), cut(s_exp(many(0.., parse_expr))))),
|exprs| Expr::Quote(exprs),
)(i)
}
Expand Down
9 changes: 5 additions & 4 deletions examples/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use nom::bytes::streaming::{is_not, take_while_m_n};
use nom::character::streaming::{char, multispace1};
use nom::combinator::{map, map_opt, map_res, value, verify};
use nom::error::{FromExternalError, ParseError};
use nom::multi::fold_many0;
use nom::multi::fold;
use nom::sequence::{delimited, preceded};
use nom::IResult;

Expand Down Expand Up @@ -139,9 +139,10 @@ fn parse_string<'a, E>(input: &'a str) -> IResult<&'a str, String, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
// fold_many0 is the equivalent of iterator::fold. It runs a parser in a loop,
// fold is the equivalent of iterator::fold. It runs a parser in a loop,
// and for each output value, calls a folding function on each output value.
let build_string = fold_many0(
let build_string = fold(
0..,
// Our parser function– parses a single string fragment
parse_fragment,
// Our init value, an empty string
Expand All @@ -160,7 +161,7 @@ where

// Finally, parse the string. Note that, if `build_string` could accept a raw
// " character, the closing delimiter " would never match. When using
// `delimited` with a looping parser (like fold_many0), be sure that the
// `delimited` with a looping parser (like fold), be sure that the
// loop won't accidentally match your closing delimiter!
delimited(char('"'), build_string, char('"'))(input)
}
Expand Down
6 changes: 6 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,8 @@ pub enum ErrorKind {
Float,
Satisfy,
Fail,
Many,
Fold,
}

#[rustfmt::skip]
Expand Down Expand Up @@ -477,6 +479,8 @@ pub fn error_to_u32(e: &ErrorKind) -> u32 {
ErrorKind::Float => 73,
ErrorKind::Satisfy => 74,
ErrorKind::Fail => 75,
ErrorKind::Many => 76,
ErrorKind::Fold => 77,
}
}

Expand Down Expand Up @@ -539,6 +543,8 @@ impl ErrorKind {
ErrorKind::Float => "Float",
ErrorKind::Satisfy => "Satisfy",
ErrorKind::Fail => "Fail",
ErrorKind::Many => "Many",
ErrorKind::Fold => "Fold",
}
}
}
Expand Down
176 changes: 175 additions & 1 deletion src/multi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ use crate::error::ParseError;
use crate::internal::{Err, IResult, Needed, Parser};
#[cfg(feature = "alloc")]
use crate::lib::std::vec::Vec;
use crate::traits::{InputLength, InputTake, ToUsize};
use crate::traits::{InputLength, InputTake, ToUsize, IntoRangeBounds, RangeIterator};
use core::num::NonZeroUsize;
use core::ops::{RangeBounds, Bound};

/// Repeats the embedded parser until it fails
/// and returns the results in a `Vec`.
Expand Down Expand Up @@ -37,6 +38,7 @@ use core::num::NonZeroUsize;
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
#[deprecated = "Replaced with `many`"]
cenodis marked this conversation as resolved.
Show resolved Hide resolved
pub fn many0<I, O, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, Vec<O>, E>
where
I: Clone + InputLength,
Expand Down Expand Up @@ -92,6 +94,7 @@ where
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
#[deprecated = "Replaced with `many`"]
pub fn many1<I, O, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, Vec<O>, E>
where
I: Clone + InputLength,
Expand Down Expand Up @@ -349,6 +352,7 @@ where
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
#[deprecated = "Replaced with `many`"]
pub fn many_m_n<I, O, E, F>(
min: usize,
max: usize,
Expand Down Expand Up @@ -637,6 +641,7 @@ where
/// assert_eq!(parser("123123"), Ok(("123123", vec![])));
/// assert_eq!(parser(""), Ok(("", vec![])));
/// ```
#[deprecated = "Replaced with `fold`"]
pub fn fold_many0<I, O, E, F, G, H, R>(
mut f: F,
mut init: H,
Expand Down Expand Up @@ -708,6 +713,7 @@ where
/// assert_eq!(parser("123123"), Err(Err::Error(Error::new("123123", ErrorKind::Many1))));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Many1))));
/// ```
#[deprecated = "Replaced with `fold`"]
pub fn fold_many1<I, O, E, F, G, H, R>(
mut f: F,
mut init: H,
Expand Down Expand Up @@ -792,6 +798,7 @@ where
/// assert_eq!(parser(""), Ok(("", vec![])));
/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"])));
/// ```
#[deprecated = "Replaced with `fold`"]
pub fn fold_many_m_n<I, O, E, F, G, H, R>(
min: usize,
max: usize,
Expand Down Expand Up @@ -991,3 +998,170 @@ where
Ok((input, res))
}
}

/// Repeats the embedded parser and returns the results in a `Vec`.
/// Fails if the amount of time the embedded parser is run is not
/// within the specified range.
/// # Arguments
/// * `range` The amount of times to apply the parser. A range without
/// an upper bound is the same as `(lower..=usize::MAX)`.
/// * `parse` The parser to apply.
/// ```rust
/// # #[macro_use] extern crate nom;
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::multi::many;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
/// many(0..=2, tag("abc"))(s)
/// }
///
/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"])));
/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"])));
/// assert_eq!(parser("123123"), Ok(("123123", vec![])));
/// assert_eq!(parser(""), Ok(("", vec![])));
/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"])));
/// ```
#[cfg(feature = "alloc")]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
pub fn many<I, O, E, F, G, H>(
range: G,
mut parse: F,
) -> impl FnMut(I) -> IResult<I, Vec<O>, E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
E: ParseError<I>,
G: IntoRangeBounds<H>,
H: RangeBounds<usize>,
{
let range = range.convert();
move |mut input: I| {
let start = match range.start_bound() {
cenodis marked this conversation as resolved.
Show resolved Hide resolved
Bound::Included(start) if !range.contains(start) => return Err(Err::Failure(E::from_error_kind(input, ErrorKind::Many))),
Bound::Excluded(start) if !range.contains(start) => return Err(Err::Failure(E::from_error_kind(input, ErrorKind::Many))),
Bound::Included(start) => Some(*start),
Bound::Excluded(start) => Some(*start + 1),
cenodis marked this conversation as resolved.
Show resolved Hide resolved
_ => None,
};


let mut res = crate::lib::std::vec::Vec::with_capacity(start.unwrap_or(0));

for count in range.bounded_iter() {
let len = input.input_len();
match parse.parse(input.clone()) {
Ok((tail, value)) => {
// infinite loop check: the parser must always consume
if tail.input_len() == len {
return Err(Err::Error(E::from_error_kind(input, ErrorKind::Many)));
}

res.push(value);
input = tail;
}
Err(Err::Error(e)) => {
if !range.contains(&count) {
return Err(Err::Error(E::append(input, ErrorKind::Many, e)));
} else {
return Ok((input, res));
}
}
Err(e) => {
return Err(e);
}
}
}

Ok((input, res))
}
}

/// Applies a parser and accumulates the results using a given
/// function and initial value.
/// Fails if the amount of time the embedded parser is run is not
/// within the specified range.
///
/// # Arguments
/// * `range` The amount of times to apply the parser. A range without
/// an upper bound means the parser can run infinitely.
/// * `parse` The parser to apply.
/// * `init` A function returning the initial value.
/// * `fold` The function that combines a result of `f` with
/// the current accumulator.
/// ```rust
/// # #[macro_use] extern crate nom;
/// # use nom::{Err, error::ErrorKind, Needed, IResult};
/// use nom::multi::fold;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, Vec<&str>> {
/// fold(
/// 0..=2,
/// tag("abc"),
/// Vec::new,
/// |mut acc: Vec<_>, item| {
/// acc.push(item);
/// acc
/// }
/// )(s)
/// }
///
/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"])));
/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"])));
/// assert_eq!(parser("123123"), Ok(("123123", vec![])));
/// assert_eq!(parser(""), Ok(("", vec![])));
/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"])));
/// ```
pub fn fold<I, O, E, F, G, H, J, K, R>(
range: J,
mut parse: F,
mut init: H,
mut fold: G,
) -> impl FnMut(I) -> IResult<I, R, E>
where
I: Clone + InputLength,
F: Parser<I, O, E>,
G: FnMut(R, O) -> R,
H: FnMut() -> R,
E: ParseError<I>,
J: IntoRangeBounds<K>,
K: RangeBounds<usize>,
{
let range = range.convert();
move |mut input: I| {
match range.start_bound() {
Bound::Included(start) if !range.contains(start) => return Err(Err::Failure(E::from_error_kind(input, ErrorKind::Fold))),
Bound::Excluded(start) if !range.contains(start) => return Err(Err::Failure(E::from_error_kind(input, ErrorKind::Fold))),
_ => {},
}

let mut acc = init();

for count in range.saturating_iter() {
let len = input.input_len();
match parse.parse(input.clone()) {
Ok((tail, value)) => {
// infinite loop check: the parser must always consume
if tail.input_len() == len {
return Err(Err::Error(E::from_error_kind(tail, ErrorKind::Fold)));
}

acc = fold(acc, value);
input = tail;
}
//FInputXMError: handle failure properly
Err(Err::Error(err)) => {
if !range.contains(&count) {
return Err(Err::Error(E::append(input, ErrorKind::Fold, err)));
} else {
break;
}
}
Err(e) => return Err(e),
}
}

Ok((input, acc))
}
}
Loading