Skip to content

Commit

Permalink
Adds unchecked parsing mode
Browse files Browse the repository at this point in the history
  • Loading branch information
Tpt committed Jan 2, 2024
1 parent e89dc54 commit 01625aa
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 13 deletions.
22 changes: 22 additions & 0 deletions benches/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ fn iri_parse(c: &mut Criterion) {
}
})
});
c.bench_function("Iri::parse_unchecked", |b| {
b.iter(|| {
for iri in abs_examples().iter() {
Iri::parse_unchecked(*iri).unwrap();
}
})
});
}

fn iri_parse_relative(c: &mut Criterion) {
Expand All @@ -48,6 +55,13 @@ fn iri_parse_relative(c: &mut Criterion) {
}
})
});
c.bench_function("IriRef::parse_unchecked", |b| {
b.iter(|| {
for iri in abs_examples().iter() {
IriRef::parse_unchecked(*iri).unwrap();
}
})
});
}

fn iri_resolve(c: &mut Criterion) {
Expand Down Expand Up @@ -108,6 +122,14 @@ fn iri_resolve(c: &mut Criterion) {
}
})
});
c.bench_function("Iri::resolve_into_unchecked", |b| {
b.iter(|| {
for relative in examples.iter() {
buf.clear();
base.resolve_into_unchecked(relative, &mut buf).unwrap();
}
})
});
}

criterion_group!(iri, iri_parse, iri_parse_relative, iri_resolve);
Expand Down
8 changes: 7 additions & 1 deletion fuzz/fuzz_targets/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ use std::str;
fuzz_target!(|data: &[u8]| {
let base = IriRef::parse("http://a/b/c/d;p?q").unwrap();
if let Ok(s) = str::from_utf8(data) {
let _ = base.resolve(s);
let valid_result = base.resolve(s);

// We check that unchecked resolving gives the same result
let unchecked_result = base.resolve_unchecked(s);
if let Ok(valid) = valid_result {
assert_eq!(valid, unchecked_result.unwrap());
}
}
});
138 changes: 126 additions & 12 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,37 @@ impl<T: Deref<Target = str>> IriRef<T> {
///
/// This operation keeps internally the `iri` parameter and does not allocate.
///
/// Use [`parse_unchecked`](Self::parse_unchecked) if you already know the IRI is valid to get faster processing.
///
/// ```
/// use oxiri::IriRef;
///
/// IriRef::parse("//foo.com/bar/baz")?;
/// # Result::<(), oxiri::IriParseError>::Ok(())
/// ```
pub fn parse(iri: T) -> Result<Self, IriParseError> {
let positions = IriParser::parse(&iri, None, &mut VoidOutputBuffer::default())?;
let positions = IriParser::<_, false>::parse(&iri, None, &mut VoidOutputBuffer::default())?;
Ok(Self { iri, positions })
}

/// Variant of [`parse`](Self::parse) that assumes that the IRI is valid to skip validation.
///
/// ```
/// use oxiri::IriRef;
///
/// IriRef::parse_unchecked("//foo.com/bar/baz")?;
/// # Result::<(), oxiri::IriParseError>::Ok(())
/// ```
pub fn parse_unchecked(iri: T) -> Result<Self, IriParseError> {
let positions = IriParser::<_, true>::parse(&iri, None, &mut VoidOutputBuffer::default())?;
Ok(Self { iri, positions })
}

/// Validates and resolved a relative IRI against the current IRI
/// following [RFC 3986](https://www.ietf.org/rfc/rfc3986.html) relative URI resolution algorithm.
///
/// Use [`resolve_unchecked`](Self::resolve_unchecked) if you already know the IRI is valid to get faster processing.
///
/// ```
/// use oxiri::IriRef;
///
Expand All @@ -71,7 +88,26 @@ impl<T: Deref<Target = str>> IriRef<T> {
/// ```
pub fn resolve(&self, iri: &str) -> Result<IriRef<String>, IriParseError> {
let mut target_buffer = String::with_capacity(self.iri.len() + iri.len());
let positions = IriParser::parse(iri, Some(self.as_ref()), &mut target_buffer)?;
let positions = IriParser::<_, false>::parse(iri, Some(self.as_ref()), &mut target_buffer)?;
Ok(IriRef {
iri: target_buffer,
positions,
})
}

/// Variant of [`resolve`](Self::resolve) that assumes that the IRI is valid to skip validation not useful for relative IRI resolving.
///
/// ```
/// use oxiri::IriRef;
///
/// let base_iri = IriRef::parse("//foo.com/bar/baz")?;
/// let iri = base_iri.resolve_unchecked("bat#foo")?;
/// assert_eq!(iri.into_inner(), "//foo.com/bar/bat#foo");
/// # Result::<(), oxiri::IriParseError>::Ok(())
/// ```
pub fn resolve_unchecked(&self, iri: &str) -> Result<IriRef<String>, IriParseError> {
let mut target_buffer = String::with_capacity(self.iri.len() + iri.len());
let positions = IriParser::<_, true>::parse(iri, Some(self.as_ref()), &mut target_buffer)?;
Ok(IriRef {
iri: target_buffer,
positions,
Expand All @@ -83,6 +119,8 @@ impl<T: Deref<Target = str>> IriRef<T> {
///
/// It outputs the resolved IRI into `target_buffer` to avoid any memory allocation.
///
/// Use [`resolve_into_unchecked`](Self::resolve_into_unchecked) if you already know the IRI is valid to get faster processing.
///
/// ```
/// use oxiri::IriRef;
///
Expand All @@ -93,7 +131,27 @@ impl<T: Deref<Target = str>> IriRef<T> {
/// # Result::<(), oxiri::IriParseError>::Ok(())
/// ```
pub fn resolve_into(&self, iri: &str, target_buffer: &mut String) -> Result<(), IriParseError> {
IriParser::parse(iri, Some(self.as_ref()), target_buffer)?;
IriParser::<_, false>::parse(iri, Some(self.as_ref()), target_buffer)?;
Ok(())
}

/// Variant of [`resolve_into`](Self::resolve_into) that assumes that the IRI is valid to skip validation not useful for relative IRI resolving.
///
/// ```
/// use oxiri::IriRef;
///
/// let base_iri = IriRef::parse("//foo.com/bar/baz")?;
/// let mut result = String::default();
/// let iri = base_iri.resolve_into_unchecked("bat#foo", &mut result)?;
/// assert_eq!(result, "//foo.com/bar/bat#foo");
/// # Result::<(), oxiri::IriParseError>::Ok(())
/// ```
pub fn resolve_into_unchecked(
&self,
iri: &str,
target_buffer: &mut String,
) -> Result<(), IriParseError> {
IriParser::<_, true>::parse(iri, Some(self.as_ref()), target_buffer)?;
Ok(())
}

Expand Down Expand Up @@ -493,6 +551,8 @@ impl<T: Deref<Target = str>> Iri<T> {
///
/// This operation keeps internally the `iri` parameter and does not allocate.
///
/// Use [`parse_unchecked`](Self::parse_unchecked) if you already know the IRI is valid to get faster processing.
///
/// ```
/// use oxiri::Iri;
///
Expand All @@ -503,9 +563,23 @@ impl<T: Deref<Target = str>> Iri<T> {
IriRef::parse(iri)?.try_into()
}

/// Variant of [`parse`](Self::parse) that assumes that the IRI is valid to skip validation.
///
/// ```
/// use oxiri::Iri;
///
/// Iri::parse_unchecked("http://foo.com/bar/baz")?;
/// # Result::<(), oxiri::IriParseError>::Ok(())
/// ```
pub fn parse_unchecked(iri: T) -> Result<Self, IriParseError> {
IriRef::parse_unchecked(iri)?.try_into()
}

/// Validates and resolved a relative IRI against the current IRI
/// following [RFC 3986](https://www.ietf.org/rfc/rfc3986.html) relative URI resolution algorithm.
///
/// Use [`resolve_unchecked`](Self::resolve_unchecked) if you already know the IRI is valid to get faster processing.
///
/// ```
/// use oxiri::Iri;
///
Expand All @@ -518,11 +592,27 @@ impl<T: Deref<Target = str>> Iri<T> {
Ok(Iri(self.0.resolve(iri)?))
}

/// Variant of [`resolve`](Self::resolve) that assumes that the IRI is valid to skip validation not useful for relative IRI resolving.
///
/// ```
/// use oxiri::Iri;
///
/// let base_iri = Iri::parse("http://foo.com/bar/baz")?;
/// let iri = base_iri.resolve_unchecked("bat#foo")?;
/// assert_eq!(iri.into_inner(), "http://foo.com/bar/bat#foo");
/// # Result::<(), oxiri::IriParseError>::Ok(())
/// ```
pub fn resolve_unchecked(&self, iri: &str) -> Result<Iri<String>, IriParseError> {
Ok(Iri(self.0.resolve_unchecked(iri)?))
}

/// Validates and resolved a relative IRI against the current IRI
/// following [RFC 3986](https://www.ietf.org/rfc/rfc3986.html) relative URI resolution algorithm.
///
/// It outputs the resolved IRI into `target_buffer` to avoid any memory allocation.
///
/// Use [`resolve_into_unchecked`](Self::resolve_into_unchecked) if you already know the IRI is valid to get faster processing.
///
/// ```
/// use oxiri::Iri;
///
Expand All @@ -536,6 +626,25 @@ impl<T: Deref<Target = str>> Iri<T> {
self.0.resolve_into(iri, target_buffer)
}

/// Variant of [`resolve_into`](Self::resolve_into) that assumes that the IRI is valid to skip validation not useful for relative IRI resolving.
///
/// ```
/// use oxiri::Iri;
///
/// let base_iri = Iri::parse("http://foo.com/bar/baz")?;
/// let mut result = String::default();
/// let iri = base_iri.resolve_into_unchecked("bat#foo", &mut result)?;
/// assert_eq!(result, "http://foo.com/bar/bat#foo");
/// # Result::<(), oxiri::IriParseError>::Ok(())
/// ```
pub fn resolve_into_unchecked(
&self,
iri: &str,
target_buffer: &mut String,
) -> Result<(), IriParseError> {
self.0.resolve_into_unchecked(iri, target_buffer)
}

/// Returns an IRI borrowing this IRI's text
#[inline]
pub fn as_ref(&self) -> Iri<&str> {
Expand Down Expand Up @@ -1040,7 +1149,6 @@ struct ParserInput<'a> {
position: usize,
}
impl<'a> ParserInput<'a> {
#[inline]
fn next(&mut self) -> Option<char> {
if let Some(head) = self.value.next() {
self.position += head.len_utf8();
Expand All @@ -1050,12 +1158,10 @@ impl<'a> ParserInput<'a> {
}
}

#[inline]
fn front(&self) -> Option<char> {
self.value.clone().next()
}

#[inline]
fn starts_with(&self, c: char) -> bool {
self.value.as_str().starts_with(c)
}
Expand All @@ -1064,7 +1170,7 @@ impl<'a> ParserInput<'a> {
/// parser implementing https://url.spec.whatwg.org/#concept-basic-url-parser without the normalization or backward compatibility bits to comply with RFC 3987
///
/// A sub function takes care of each state
struct IriParser<'a, O: OutputBuffer> {
struct IriParser<'a, O: OutputBuffer, const UNCHECKED: bool> {
iri: &'a str,
base: Option<IriRef<&'a str>>,
input: ParserInput<'a>,
Expand All @@ -1073,7 +1179,7 @@ struct IriParser<'a, O: OutputBuffer> {
input_scheme_end: usize,
}

impl<'a, O: OutputBuffer> IriParser<'a, O> {
impl<'a, O: OutputBuffer, const UNCHECKED: bool> IriParser<'a, O, UNCHECKED> {
fn parse(
iri: &'a str,
base: Option<IriRef<&'a str>>,
Expand Down Expand Up @@ -1261,8 +1367,10 @@ impl<'a, O: OutputBuffer> IriParser<'a, O> {
self.output.push(c);
if c == ']' {
let ip = &self.iri[start_position + 1..self.input.position - 1];
if let Err(error) = Ipv6Addr::from_str(ip) {
return self.parse_error(IriParseErrorKind::InvalidHostIp(error));
if !UNCHECKED {
if let Err(error) = Ipv6Addr::from_str(ip) {
return self.parse_error(IriParseErrorKind::InvalidHostIp(error));
}
}

let c = self.input.next();
Expand Down Expand Up @@ -1408,7 +1516,10 @@ impl<'a, O: OutputBuffer> IriParser<'a, O> {
}

fn read_url_codepoint_or_echar(&mut self, c: char) -> Result<(), IriParseError> {
if c == '%' {
if UNCHECKED {
self.output.push(c);
Ok(())
} else if c == '%' {
self.read_echar()
} else if is_url_code_point(c) {
self.output.push(c);
Expand All @@ -1419,7 +1530,10 @@ impl<'a, O: OutputBuffer> IriParser<'a, O> {
}

fn read_url_query_codepoint_or_echar(&mut self, c: char) -> Result<(), IriParseError> {
if c == '%' {
if UNCHECKED {
self.output.push(c);
Ok(())
} else if c == '%' {
self.read_echar()
} else if is_url_query_code_point(c) {
self.output.push(c);
Expand Down
20 changes: 20 additions & 0 deletions tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ fn test_parsing() {
if let Err(error) = Iri::parse(*e) {
panic!("{} on IRI {}", error, e);
}
if let Err(error) = Iri::parse_unchecked(*e) {
panic!("{} on IRI {}", error, e);
}
}
}

Expand Down Expand Up @@ -140,6 +143,12 @@ fn test_relative_parsing() {
if let Err(error) = base.resolve(e) {
panic!("{} on relative IRI {}", error, e);
}
if let Err(error) = IriRef::parse_unchecked(*e) {
panic!("{} on relative IRI {}", error, e);
}
if let Err(error) = base.resolve_unchecked(e) {
panic!("{} on relative IRI {}", error, e);
}
}
}

Expand Down Expand Up @@ -531,6 +540,17 @@ fn test_resolve_relative_iri() {
relative, base, error
),
}
match base.resolve_unchecked(relative) {
Ok(result) => assert_eq!(
result.as_str(),
*output,
"Lenient resolving of {relative} against {base} is wrong. Found {result} and expecting {output}"
),
Err(error) => panic!(
"Lenient resolving of {} against {} failed with error: {}",
relative, base, error
),
}
}
}

Expand Down

0 comments on commit 01625aa

Please sign in to comment.