forked from rust-lang/flate2-rs
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement a SIMD fast path for CRC checksums
Recently I was profiling Cargo's extraction of tarballs and was quite surprised to learn that 15% of the execution time of tarball extraction was entirely crc32 checksum calculations in miniz. This was quite a surprise to me and led me down a long rabbit hole of figuring out how to speed this up! It turns out Intel's written a paper, "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction", which describes how to implement a CRC-32 value using hardware instructions. Note that these are not the hardware CRC instructions, which I think are a different algorithm. This commit implements this paper in Rust, looking to a few other external implementations for guidance as well. Overall the results are quite promising, and I'm pretty confident in the correctness of this as well. Current results look like: * This SIMD implementation runs at about 25GB/s * The miniz implementation runs at about 450MB/s * The zlib implmentation, on OSX, runs at 25GB/s (seems to implement the same algorithm) * The bundled zlib implmentation (and also the one I found on Linux) runs at 1.4GB/s So this should be ~50 times faster for Cargo (which uses miniz), about 20 times faster for anyone using system zlib on Linux or the bundled zlib, and on part with OSX's zlib performance.
- Loading branch information
1 parent
37a60a7
commit 9b44592
Showing
11 changed files
with
439 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
[package] | ||
name = "flate2-crc" | ||
version = "0.1.0" | ||
authors = ["Alex Crichton <[email protected]>"] | ||
license = "MIT/Apache-2.0" | ||
repository = "https://github.com/alexcrichton/flate2-rs/tree/flate2-crc" | ||
homepage = "https://github.com/alexcrichton/flate2-rs" | ||
documentation = "https://docs.rs/flate2-crc" | ||
description = """ | ||
SIMD acceleration for CRC-32 checksums used in the gzip format | ||
""" | ||
|
||
[dependencies] | ||
cfg-if = "0.1.6" | ||
|
||
[dev-dependencies] | ||
miniz-sys = { path = '../miniz-sys' } | ||
rand = "0.6" | ||
libz-sys = "1.0" | ||
rayon = "1.0.3" | ||
quickcheck = "0.7" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#![feature(test)] | ||
|
||
extern crate flate2_crc; | ||
extern crate rand; | ||
extern crate test; | ||
extern crate miniz_sys; | ||
extern crate libz_sys; | ||
|
||
use rand::{thread_rng, RngCore}; | ||
|
||
fn flate2_crc(data: &[u8]) -> u32 { | ||
flate2_crc::Hardware::detect().calculate(0, data, |crc, data| { | ||
unsafe { | ||
miniz_sys::mz_crc32(crc as u64, data.as_ptr(), data.len()) as u32 | ||
} | ||
}) | ||
} | ||
|
||
fn miniz(data: &[u8]) -> u32 { | ||
unsafe { | ||
miniz_sys::mz_crc32(0, data.as_ptr(), data.len()) as u32 | ||
} | ||
} | ||
|
||
fn zlib(data: &[u8]) -> u32 { | ||
unsafe { | ||
libz_sys::crc32(0, data.as_ptr(), data.len() as u32) as u32 | ||
} | ||
} | ||
|
||
macro_rules! benches { | ||
($($f:ident => ($small:ident, $medium:ident, $large:ident),)*) => ($( | ||
#[bench] | ||
fn $small(b: &mut test::Bencher) { | ||
let mut rng = thread_rng(); | ||
let mut buf = vec![0u8; 8]; | ||
rng.fill_bytes(&mut buf); | ||
|
||
b.bytes = 8; | ||
b.iter(|| $f(&buf)); | ||
} | ||
|
||
#[bench] | ||
fn $medium(b: &mut test::Bencher) { | ||
let mut rng = thread_rng(); | ||
let mut buf = vec![0u8; 65_000]; | ||
rng.fill_bytes(&mut buf); | ||
|
||
b.bytes = 65_000; | ||
b.iter(|| $f(&buf)); | ||
} | ||
|
||
#[bench] | ||
fn $large(b: &mut test::Bencher) { | ||
let mut rng = thread_rng(); | ||
let mut buf = vec![0u8; 1_000_000]; | ||
rng.fill_bytes(&mut buf); | ||
|
||
b.bytes = 1_000_000; | ||
b.iter(|| $f(&buf)); | ||
} | ||
)*) | ||
} | ||
|
||
benches! { | ||
flate2_crc => (flate2_crc_8, flate2_crc_65000, flate2_crc_1000000), | ||
miniz => (miniz_8, miniz_65000, miniz_1000000), | ||
zlib => (zlib_8, zlib_65000, zlib_1000000), | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
use std::env; | ||
use std::process::Command; | ||
use std::str; | ||
|
||
fn main() { | ||
println!("cargo:rerun-if-changed=build.rs"); | ||
|
||
let minor = match rustc_minor_version() { | ||
Some(n) => n, | ||
None => return, | ||
}; | ||
|
||
if minor >= 27 { | ||
println!("cargo:rustc-cfg=simd"); | ||
} | ||
} | ||
|
||
fn rustc_minor_version() -> Option<u32> { | ||
macro_rules! otry { | ||
($e:expr) => { | ||
match $e { | ||
Some(e) => e, | ||
None => return None, | ||
} | ||
}; | ||
} | ||
let rustc = otry!(env::var_os("RUSTC")); | ||
let output = otry!(Command::new(rustc).arg("--version").output().ok()); | ||
let version = otry!(str::from_utf8(&output.stdout).ok()); | ||
let mut pieces = version.split('.'); | ||
if pieces.next() != Some("rustc 1") { | ||
return None; | ||
} | ||
otry!(pieces.next()).parse().ok() | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
// Note that this isn't really intended to be a user-facing crate, that's | ||
// `flate2::Crc` | ||
|
||
#[macro_use] | ||
extern crate cfg_if; | ||
|
||
#[cfg(test)] | ||
#[macro_use] | ||
extern crate quickcheck; | ||
|
||
cfg_if! { | ||
if #[cfg(not(simd))] { | ||
mod other; | ||
use self::other as imp; | ||
} else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { | ||
mod x86; | ||
use self::x86 as imp; | ||
} else { | ||
mod other; | ||
use self::other as imp; | ||
} | ||
} | ||
|
||
#[derive(Debug)] | ||
pub struct Hardware(bool); | ||
|
||
impl Hardware { | ||
#[inline] | ||
pub fn detect() -> Hardware { | ||
Hardware(imp::detect()) | ||
} | ||
|
||
#[inline] | ||
pub fn calculate( | ||
&self, | ||
crc: u32, | ||
data: &[u8], | ||
fallback: fn(u32, &[u8]) -> u32, | ||
) -> u32 { | ||
if self.0 { | ||
unsafe { imp::calculate(crc, data, fallback) } | ||
} else { | ||
fallback(crc, data) | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
extern crate miniz_sys; | ||
extern crate rand; | ||
extern crate rayon; | ||
|
||
use self::rand::Rng; | ||
use self::rayon::prelude::*; | ||
use super::Hardware; | ||
|
||
fn fallback(a: u32, b: &[u8]) -> u32 { | ||
unsafe { | ||
miniz_sys::mz_crc32(a as _, b.as_ptr(), b.len()) as u32 | ||
} | ||
} | ||
|
||
fn random_chunks(iters: usize, lo: usize, hi: usize) { | ||
let hardware = Hardware::detect(); | ||
|
||
(0..iters) | ||
.into_par_iter() | ||
.for_each_with(Vec::new(), |data, _| { | ||
let mut rng = rand::thread_rng(); | ||
let init = rng.gen::<u32>(); | ||
let len = rng.gen_range(lo, hi); | ||
data.resize(len, 0u8); | ||
rng.fill(&mut data[..]); | ||
|
||
assert_eq!( | ||
fallback(init, &data), | ||
hardware.calculate(init, &data, fallback), | ||
); | ||
}); | ||
} | ||
|
||
#[test] | ||
fn random_small() { | ||
random_chunks(1000, 0, 256); | ||
} | ||
|
||
#[test] | ||
fn random_med() { | ||
random_chunks(1000, 256, 16 * 1024); | ||
} | ||
|
||
#[test] | ||
fn random_large() { | ||
random_chunks(1000, 0, 1024 * 1024); | ||
} | ||
|
||
quickcheck! { | ||
fn prop(crc: u32, xs: Vec<u8>) -> bool { | ||
fallback(crc, &xs) == Hardware::detect().calculate(crc, &xs, fallback) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#[inline] | ||
pub fn detect() -> bool { | ||
false | ||
} | ||
|
||
pub unsafe fn calculate( | ||
_crc: u32, | ||
_data: &[u8], | ||
_fallback: fn(u32, &[u8]) -> u32, | ||
) -> u32 { | ||
panic!() | ||
} |
Oops, something went wrong.