Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix no space after colon #174

Merged
merged 2 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 44 additions & 19 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -953,20 +953,18 @@ fn parse_token<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> {
#[allow(missing_docs)]
// WARNING: Exported for internal benchmarks, not fit for public consumption
pub fn parse_uri<'a>(bytes: &mut Bytes<'a>) -> Result<&'a str> {
let start = bytes.pos();
simd::match_uri_vectored(bytes);
// URI must have at least one char
let uri_len = simd::match_uri_vectored(bytes.as_ref());
if uri_len == 0 {
if bytes.pos() == start {
return Err(Error::Token);
}
// SAFETY: these bytes have just been matched here above.
unsafe { bytes.advance(uri_len) };
let uri_slice = bytes.slice();

let space_delim = next!(bytes);
if space_delim == b' ' {
// SAFETY: all bytes within `uri_slice` must have been `is_token` and therefore also utf-8.
let uri = unsafe { str::from_utf8_unchecked(uri_slice) };
Ok(Status::Complete(uri))
if next!(bytes) == b' ' {
return Ok(Status::Complete(
// SAFETY: all bytes up till `i` must have been `is_token` and therefore also utf-8.
unsafe { str::from_utf8_unchecked(bytes.slice_skip(1)) },
));
} else {
Err(Error::Token)
}
Expand Down Expand Up @@ -1181,15 +1179,15 @@ fn parse_headers_iter_uninit<'a>(
#[allow(clippy::never_loop)]
// parse header name until colon
let header_name: &str = 'name: loop {
let len = simd::match_header_name_vectored(bytes.as_ref());
// SAFETY: these bytes have just been matched here above.
unsafe { bytes.advance(len) };
let bslice = bytes.slice();
simd::match_header_name_vectored(bytes);
let mut b = next!(bytes);

// SAFETY: previously bumped by 1 with next! -> always safe.
let bslice = unsafe { bytes.slice_skip(1) };
// SAFETY: previous call to match_header_name_vectored ensured all bytes are valid
// header name chars, and as such also valid utf-8.
let name = unsafe { str::from_utf8_unchecked(bslice) };

let mut b = next!(bytes);
if b == b':' {
break 'name name;
}
Expand All @@ -1215,7 +1213,6 @@ fn parse_headers_iter_uninit<'a>(
// eat white space between colon and value
'whitespace_after_colon: loop {
b = next!(bytes);

if b == b' ' || b == b'\t' {
bytes.slice();
continue 'whitespace_after_colon;
Expand All @@ -1242,9 +1239,7 @@ fn parse_headers_iter_uninit<'a>(
'value_lines: loop {
// parse value till EOL

let len = simd::match_header_value_vectored(bytes.as_ref());
// SAFETY: these bytes have just been matched here above.
unsafe { bytes.advance(len) };
simd::match_header_value_vectored(bytes);
let b = next!(bytes);

//found_ctl
Expand Down Expand Up @@ -1515,6 +1510,20 @@ mod tests {
}
}

req! {
// test the avx2 parsing
test_request_header_no_space_after_colon,
b"GET / HTTP/1.1\r\nUser-Agent:omg-no-space1234567890some1234567890agent1234567890\r\n\r\n",
|req| {
assert_eq!(req.method.unwrap(), "GET");
assert_eq!(req.path.unwrap(), "/");
assert_eq!(req.version.unwrap(), 1);
assert_eq!(req.headers.len(), 1);
assert_eq!(req.headers[0].name, "User-Agent");
assert_eq!(req.headers[0].value, &b"omg-no-space1234567890some1234567890agent1234567890"[..]);
}
}

req! {
test_request_headers_max,
b"GET / HTTP/1.1\r\nA: A\r\nB: B\r\nC: C\r\nD: D\r\n\r\n",
Expand Down Expand Up @@ -2591,4 +2600,20 @@ mod tests {
assert_eq!(response.headers[0].name, "Space-Before-Header");
assert_eq!(response.headers[0].value, &b"hello there"[..]);
}

#[test]
fn test_no_space_after_colon() {
let mut headers = [EMPTY_HEADER; 1];
let mut response = Response::new(&mut headers[..]);
let result = crate::ParserConfig::default()
.parse_response(&mut response, b"HTTP/1.1 200 OK\r\nfoo:bar\r\n\r\n");

assert_eq!(result, Ok(Status::Complete(28)));
assert_eq!(response.version.unwrap(), 1);
assert_eq!(response.code.unwrap(), 200);
assert_eq!(response.reason.unwrap(), "OK");
assert_eq!(response.headers.len(), 1);
assert_eq!(response.headers[0].name, "foo");
assert_eq!(response.headers[0].value, &b"bar"[..]);
}
}
44 changes: 19 additions & 25 deletions src/simd/avx2.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
use crate::iter::Bytes;

#[inline]
#[target_feature(enable = "avx2", enable = "sse4.2")]
pub(crate) unsafe fn match_uri_vectored(bytes: &[u8]) -> usize {
let mut len = 0usize;
let mut remaining = bytes;
while remaining.len() >= 32 {
let advance = match_url_char_32_avx(remaining);
len = len.saturating_add(advance);
remaining = &bytes[len..];
pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 32 {
let advance = match_url_char_32_avx(bytes.as_ref());
bytes.advance(advance);

if advance != 32 {
return len;
return;
}
}
// do both, since avx2 only works when bytes.len() >= 32
let advance = super::sse42::match_uri_vectored(remaining);
len = len.saturating_add(advance);
len
super::sse42::match_uri_vectored(bytes)
}

#[inline(always)]
Expand Down Expand Up @@ -60,22 +57,17 @@ unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize {
}

#[target_feature(enable = "avx2", enable = "sse4.2")]
pub(crate) unsafe fn match_header_value_vectored(bytes: &[u8]) -> usize {
let mut len = 0usize;
let mut remaining = bytes;
while remaining.len() >= 32 {
let advance = match_header_value_char_32_avx(remaining);
len = len.saturating_add(advance);
remaining = &bytes[len..];
pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 32 {
let advance = match_header_value_char_32_avx(bytes.as_ref());
bytes.advance(advance);

if advance != 32 {
return len;
return;
}
}
// do both, since avx2 only works when bytes.len() >= 32
let advance = super::sse42::match_header_value_vectored(remaining);
len = len.saturating_add(advance);
len
super::sse42::match_header_value_vectored(bytes)
}

#[inline(always)]
Expand Down Expand Up @@ -146,7 +138,7 @@ fn avx2_code_matches_header_value_chars_table() {
}

#[cfg(test)]
unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &[u8]) -> usize) -> bool {
unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool {
let slice = [
b'_', b'_', b'_', b'_',
b'_', b'_', b'_', b'_',
Expand All @@ -157,9 +149,11 @@ unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &[u8]) -> usize) -> bool
b'_', b'_', byte, b'_',
b'_', b'_', b'_', b'_',
];
let mut bytes = Bytes::new(&slice);

f(&mut bytes);

let pos = f(&slice);
match pos {
match bytes.pos() {
32 => true,
26 => false,
_ => unreachable!(),
Expand Down
30 changes: 15 additions & 15 deletions src/simd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ mod swar;
)
),
)))]
pub(crate) use self::swar::*;
pub use self::swar::*;

#[cfg(all(
httparse_simd,
Expand Down Expand Up @@ -59,7 +59,7 @@ mod runtime;
target_arch = "x86_64",
),
))]
pub(crate) use self::runtime::*;
pub use self::runtime::*;

#[cfg(all(
httparse_simd,
Expand All @@ -72,18 +72,18 @@ pub(crate) use self::runtime::*;
))]
mod sse42_compile_time {
#[inline(always)]
pub(crate) fn match_header_name_vectored(b: &[u8]) -> usize {
super::swar::match_header_name_vectored(b)
pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) {
super::swar::match_header_name_vectored(b);
}

#[inline(always)]
pub(crate) fn match_uri_vectored(b: &[u8]) -> usize {
pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::sse42::match_uri_vectored(b) }
}

#[inline(always)]
pub(crate) fn match_header_value_vectored(b: &[u8]) -> usize {
pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::sse42::match_header_value_vectored(b) }
}
Expand All @@ -98,7 +98,7 @@ mod sse42_compile_time {
target_arch = "x86_64",
),
))]
pub(crate) use self::sse42_compile_time::*;
pub use self::sse42_compile_time::*;

#[cfg(all(
httparse_simd,
Expand All @@ -110,18 +110,18 @@ pub(crate) use self::sse42_compile_time::*;
))]
mod avx2_compile_time {
#[inline(always)]
pub(crate) fn match_header_name_vectored(b: &[u8]) -> usize {
super::swar::match_header_name_vectored(b)
pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) {
super::swar::match_header_name_vectored(b);
}

#[inline(always)]
pub(crate) fn match_uri_vectored(b: &[u8]) -> usize {
pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::avx2::match_uri_vectored(b) }
}

#[inline(always)]
pub(crate) fn match_header_value_vectored(b: &[u8]) -> usize {
pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::avx2::match_header_value_vectored(b) }
}
Expand All @@ -135,7 +135,7 @@ mod avx2_compile_time {
target_arch = "x86_64",
),
))]
pub(crate) use self::avx2_compile_time::*;
pub use self::avx2_compile_time::*;

#[cfg(all(
httparse_simd,
Expand All @@ -149,4 +149,4 @@ mod neon;
target_arch = "aarch64",
httparse_simd_neon_intrinsics,
))]
pub(crate) use self::neon::*;
pub use self::neon::*;
Loading
Loading