From 01625aa4cd1dc0c2fbe15f4c0dce20e9ed4e6f17 Mon Sep 17 00:00:00 2001 From: Tpt Date: Mon, 1 Jan 2024 21:17:43 +0100 Subject: [PATCH] Adds unchecked parsing mode --- benches/lib.rs | 22 ++++++ fuzz/fuzz_targets/resolve.rs | 8 +- src/lib.rs | 138 ++++++++++++++++++++++++++++++++--- tests/lib.rs | 20 +++++ 4 files changed, 175 insertions(+), 13 deletions(-) diff --git a/benches/lib.rs b/benches/lib.rs index fba308d..47493ec 100644 --- a/benches/lib.rs +++ b/benches/lib.rs @@ -38,6 +38,13 @@ fn iri_parse(c: &mut Criterion) { } }) }); + c.bench_function("Iri::parse_unchecked", |b| { + b.iter(|| { + for iri in abs_examples().iter() { + Iri::parse_unchecked(*iri).unwrap(); + } + }) + }); } fn iri_parse_relative(c: &mut Criterion) { @@ -48,6 +55,13 @@ fn iri_parse_relative(c: &mut Criterion) { } }) }); + c.bench_function("IriRef::parse_unchecked", |b| { + b.iter(|| { + for iri in abs_examples().iter() { + IriRef::parse_unchecked(*iri).unwrap(); + } + }) + }); } fn iri_resolve(c: &mut Criterion) { @@ -108,6 +122,14 @@ fn iri_resolve(c: &mut Criterion) { } }) }); + c.bench_function("Iri::resolve_into_unchecked", |b| { + b.iter(|| { + for relative in examples.iter() { + buf.clear(); + base.resolve_into_unchecked(relative, &mut buf).unwrap(); + } + }) + }); } criterion_group!(iri, iri_parse, iri_parse_relative, iri_resolve); diff --git a/fuzz/fuzz_targets/resolve.rs b/fuzz/fuzz_targets/resolve.rs index 74313ee..459e424 100644 --- a/fuzz/fuzz_targets/resolve.rs +++ b/fuzz/fuzz_targets/resolve.rs @@ -6,6 +6,12 @@ use std::str; fuzz_target!(|data: &[u8]| { let base = IriRef::parse("http://a/b/c/d;p?q").unwrap(); if let Ok(s) = str::from_utf8(data) { - let _ = base.resolve(s); + let valid_result = base.resolve(s); + + // We check that unchecked resolving gives the same result + let unchecked_result = base.resolve_unchecked(s); + if let Ok(valid) = valid_result { + assert_eq!(valid, unchecked_result.unwrap()); + } } }); diff --git a/src/lib.rs b/src/lib.rs index 6e51966..910a703 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,6 +47,8 @@ impl> IriRef { /// /// This operation keeps internally the `iri` parameter and does not allocate. /// + /// Use [`parse_unchecked`](Self::parse_unchecked) if you already know the IRI is valid to get faster processing. + /// /// ``` /// use oxiri::IriRef; /// @@ -54,13 +56,28 @@ impl> IriRef { /// # Result::<(), oxiri::IriParseError>::Ok(()) /// ``` pub fn parse(iri: T) -> Result { - let positions = IriParser::parse(&iri, None, &mut VoidOutputBuffer::default())?; + let positions = IriParser::<_, false>::parse(&iri, None, &mut VoidOutputBuffer::default())?; + Ok(Self { iri, positions }) + } + + /// Variant of [`parse`](Self::parse) that assumes that the IRI is valid to skip validation. + /// + /// ``` + /// use oxiri::IriRef; + /// + /// IriRef::parse_unchecked("//foo.com/bar/baz")?; + /// # Result::<(), oxiri::IriParseError>::Ok(()) + /// ``` + pub fn parse_unchecked(iri: T) -> Result { + let positions = IriParser::<_, true>::parse(&iri, None, &mut VoidOutputBuffer::default())?; Ok(Self { iri, positions }) } /// Validates and resolved a relative IRI against the current IRI /// following [RFC 3986](https://www.ietf.org/rfc/rfc3986.html) relative URI resolution algorithm. /// + /// Use [`resolve_unchecked`](Self::resolve_unchecked) if you already know the IRI is valid to get faster processing. + /// /// ``` /// use oxiri::IriRef; /// @@ -71,7 +88,26 @@ impl> IriRef { /// ``` pub fn resolve(&self, iri: &str) -> Result, IriParseError> { let mut target_buffer = String::with_capacity(self.iri.len() + iri.len()); - let positions = IriParser::parse(iri, Some(self.as_ref()), &mut target_buffer)?; + let positions = IriParser::<_, false>::parse(iri, Some(self.as_ref()), &mut target_buffer)?; + Ok(IriRef { + iri: target_buffer, + positions, + }) + } + + /// Variant of [`resolve`](Self::resolve) that assumes that the IRI is valid to skip validation not useful for relative IRI resolving. + /// + /// ``` + /// use oxiri::IriRef; + /// + /// let base_iri = IriRef::parse("//foo.com/bar/baz")?; + /// let iri = base_iri.resolve_unchecked("bat#foo")?; + /// assert_eq!(iri.into_inner(), "//foo.com/bar/bat#foo"); + /// # Result::<(), oxiri::IriParseError>::Ok(()) + /// ``` + pub fn resolve_unchecked(&self, iri: &str) -> Result, IriParseError> { + let mut target_buffer = String::with_capacity(self.iri.len() + iri.len()); + let positions = IriParser::<_, true>::parse(iri, Some(self.as_ref()), &mut target_buffer)?; Ok(IriRef { iri: target_buffer, positions, @@ -83,6 +119,8 @@ impl> IriRef { /// /// It outputs the resolved IRI into `target_buffer` to avoid any memory allocation. /// + /// Use [`resolve_into_unchecked`](Self::resolve_into_unchecked) if you already know the IRI is valid to get faster processing. + /// /// ``` /// use oxiri::IriRef; /// @@ -93,7 +131,27 @@ impl> IriRef { /// # Result::<(), oxiri::IriParseError>::Ok(()) /// ``` pub fn resolve_into(&self, iri: &str, target_buffer: &mut String) -> Result<(), IriParseError> { - IriParser::parse(iri, Some(self.as_ref()), target_buffer)?; + IriParser::<_, false>::parse(iri, Some(self.as_ref()), target_buffer)?; + Ok(()) + } + + /// Variant of [`resolve_into`](Self::resolve_into) that assumes that the IRI is valid to skip validation not useful for relative IRI resolving. + /// + /// ``` + /// use oxiri::IriRef; + /// + /// let base_iri = IriRef::parse("//foo.com/bar/baz")?; + /// let mut result = String::default(); + /// let iri = base_iri.resolve_into_unchecked("bat#foo", &mut result)?; + /// assert_eq!(result, "//foo.com/bar/bat#foo"); + /// # Result::<(), oxiri::IriParseError>::Ok(()) + /// ``` + pub fn resolve_into_unchecked( + &self, + iri: &str, + target_buffer: &mut String, + ) -> Result<(), IriParseError> { + IriParser::<_, true>::parse(iri, Some(self.as_ref()), target_buffer)?; Ok(()) } @@ -493,6 +551,8 @@ impl> Iri { /// /// This operation keeps internally the `iri` parameter and does not allocate. /// + /// Use [`parse_unchecked`](Self::parse_unchecked) if you already know the IRI is valid to get faster processing. + /// /// ``` /// use oxiri::Iri; /// @@ -503,9 +563,23 @@ impl> Iri { IriRef::parse(iri)?.try_into() } + /// Variant of [`parse`](Self::parse) that assumes that the IRI is valid to skip validation. + /// + /// ``` + /// use oxiri::Iri; + /// + /// Iri::parse_unchecked("http://foo.com/bar/baz")?; + /// # Result::<(), oxiri::IriParseError>::Ok(()) + /// ``` + pub fn parse_unchecked(iri: T) -> Result { + IriRef::parse_unchecked(iri)?.try_into() + } + /// Validates and resolved a relative IRI against the current IRI /// following [RFC 3986](https://www.ietf.org/rfc/rfc3986.html) relative URI resolution algorithm. /// + /// Use [`resolve_unchecked`](Self::resolve_unchecked) if you already know the IRI is valid to get faster processing. + /// /// ``` /// use oxiri::Iri; /// @@ -518,11 +592,27 @@ impl> Iri { Ok(Iri(self.0.resolve(iri)?)) } + /// Variant of [`resolve`](Self::resolve) that assumes that the IRI is valid to skip validation not useful for relative IRI resolving. + /// + /// ``` + /// use oxiri::Iri; + /// + /// let base_iri = Iri::parse("http://foo.com/bar/baz")?; + /// let iri = base_iri.resolve_unchecked("bat#foo")?; + /// assert_eq!(iri.into_inner(), "http://foo.com/bar/bat#foo"); + /// # Result::<(), oxiri::IriParseError>::Ok(()) + /// ``` + pub fn resolve_unchecked(&self, iri: &str) -> Result, IriParseError> { + Ok(Iri(self.0.resolve_unchecked(iri)?)) + } + /// Validates and resolved a relative IRI against the current IRI /// following [RFC 3986](https://www.ietf.org/rfc/rfc3986.html) relative URI resolution algorithm. /// /// It outputs the resolved IRI into `target_buffer` to avoid any memory allocation. /// + /// Use [`resolve_into_unchecked`](Self::resolve_into_unchecked) if you already know the IRI is valid to get faster processing. + /// /// ``` /// use oxiri::Iri; /// @@ -536,6 +626,25 @@ impl> Iri { self.0.resolve_into(iri, target_buffer) } + /// Variant of [`resolve_into`](Self::resolve_into) that assumes that the IRI is valid to skip validation not useful for relative IRI resolving. + /// + /// ``` + /// use oxiri::Iri; + /// + /// let base_iri = Iri::parse("http://foo.com/bar/baz")?; + /// let mut result = String::default(); + /// let iri = base_iri.resolve_into_unchecked("bat#foo", &mut result)?; + /// assert_eq!(result, "http://foo.com/bar/bat#foo"); + /// # Result::<(), oxiri::IriParseError>::Ok(()) + /// ``` + pub fn resolve_into_unchecked( + &self, + iri: &str, + target_buffer: &mut String, + ) -> Result<(), IriParseError> { + self.0.resolve_into_unchecked(iri, target_buffer) + } + /// Returns an IRI borrowing this IRI's text #[inline] pub fn as_ref(&self) -> Iri<&str> { @@ -1040,7 +1149,6 @@ struct ParserInput<'a> { position: usize, } impl<'a> ParserInput<'a> { - #[inline] fn next(&mut self) -> Option { if let Some(head) = self.value.next() { self.position += head.len_utf8(); @@ -1050,12 +1158,10 @@ impl<'a> ParserInput<'a> { } } - #[inline] fn front(&self) -> Option { self.value.clone().next() } - #[inline] fn starts_with(&self, c: char) -> bool { self.value.as_str().starts_with(c) } @@ -1064,7 +1170,7 @@ impl<'a> ParserInput<'a> { /// parser implementing https://url.spec.whatwg.org/#concept-basic-url-parser without the normalization or backward compatibility bits to comply with RFC 3987 /// /// A sub function takes care of each state -struct IriParser<'a, O: OutputBuffer> { +struct IriParser<'a, O: OutputBuffer, const UNCHECKED: bool> { iri: &'a str, base: Option>, input: ParserInput<'a>, @@ -1073,7 +1179,7 @@ struct IriParser<'a, O: OutputBuffer> { input_scheme_end: usize, } -impl<'a, O: OutputBuffer> IriParser<'a, O> { +impl<'a, O: OutputBuffer, const UNCHECKED: bool> IriParser<'a, O, UNCHECKED> { fn parse( iri: &'a str, base: Option>, @@ -1261,8 +1367,10 @@ impl<'a, O: OutputBuffer> IriParser<'a, O> { self.output.push(c); if c == ']' { let ip = &self.iri[start_position + 1..self.input.position - 1]; - if let Err(error) = Ipv6Addr::from_str(ip) { - return self.parse_error(IriParseErrorKind::InvalidHostIp(error)); + if !UNCHECKED { + if let Err(error) = Ipv6Addr::from_str(ip) { + return self.parse_error(IriParseErrorKind::InvalidHostIp(error)); + } } let c = self.input.next(); @@ -1408,7 +1516,10 @@ impl<'a, O: OutputBuffer> IriParser<'a, O> { } fn read_url_codepoint_or_echar(&mut self, c: char) -> Result<(), IriParseError> { - if c == '%' { + if UNCHECKED { + self.output.push(c); + Ok(()) + } else if c == '%' { self.read_echar() } else if is_url_code_point(c) { self.output.push(c); @@ -1419,7 +1530,10 @@ impl<'a, O: OutputBuffer> IriParser<'a, O> { } fn read_url_query_codepoint_or_echar(&mut self, c: char) -> Result<(), IriParseError> { - if c == '%' { + if UNCHECKED { + self.output.push(c); + Ok(()) + } else if c == '%' { self.read_echar() } else if is_url_query_code_point(c) { self.output.push(c); diff --git a/tests/lib.rs b/tests/lib.rs index bbf2470..74c69d7 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -40,6 +40,9 @@ fn test_parsing() { if let Err(error) = Iri::parse(*e) { panic!("{} on IRI {}", error, e); } + if let Err(error) = Iri::parse_unchecked(*e) { + panic!("{} on IRI {}", error, e); + } } } @@ -140,6 +143,12 @@ fn test_relative_parsing() { if let Err(error) = base.resolve(e) { panic!("{} on relative IRI {}", error, e); } + if let Err(error) = IriRef::parse_unchecked(*e) { + panic!("{} on relative IRI {}", error, e); + } + if let Err(error) = base.resolve_unchecked(e) { + panic!("{} on relative IRI {}", error, e); + } } } @@ -531,6 +540,17 @@ fn test_resolve_relative_iri() { relative, base, error ), } + match base.resolve_unchecked(relative) { + Ok(result) => assert_eq!( + result.as_str(), + *output, + "Lenient resolving of {relative} against {base} is wrong. Found {result} and expecting {output}" + ), + Err(error) => panic!( + "Lenient resolving of {} against {} failed with error: {}", + relative, base, error + ), + } } }