Skip to content

Commit

Permalink
add fuzzer, fix bug (#7)
Browse files Browse the repository at this point in the history
Add initial fuzzer setup, runs `cargo fuzz` on a schedule like Vortex
repo.

There was an out-of-bound access in the `compress` function when loading
the last 8 byte word. I replaced it with a copy that is safer while also
preserving our existing performance.
  • Loading branch information
a10y authored Aug 15, 2024
1 parent b8f5b87 commit 1b58639
Show file tree
Hide file tree
Showing 8 changed files with 203 additions and 18 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/fuzz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Fuzz

on:
schedule:
- cron: "0 0 * * *" # daily
workflow_dispatch:

jobs:
fuzz:
name: "fuzz"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install cargo fuzz
run: cargo install cargo-fuzz
- name: Run fuzzing target
run: cargo fuzz run fuzz_compress -- -max_total_time=600
continue-on-error: true
- name: Archive crash artifacts
uses: actions/upload-artifact@v4
with:
name: fuzzing-crash-artifacts
path: fuzz/artifacts
- name: Archive fuzzing corpus
uses: actions/upload-artifact@v4
with:
name: fuzzing-corpus
path: fuzz/corpus
4 changes: 4 additions & 0 deletions fuzz/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
target
corpus
artifacts
coverage
70 changes: 70 additions & 0 deletions fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[package]
name = "fsst-rs-fuzz"
version = "0.0.0"
publish = false
edition = "2021"

[package.metadata]
cargo-fuzz = true

[dependencies]
libfuzzer-sys = "0.4"

[dependencies.fsst-rs]
path = ".."

[[bin]]
name = "fuzz_train"
path = "fuzz_targets/fuzz_train.rs"
test = false
doc = false
bench = false

[[bin]]
name = "fuzz_compress"
path = "fuzz_targets/fuzz_compress.rs"
test = false
doc = false
bench = false
10 changes: 10 additions & 0 deletions fuzz/fuzz_targets/fuzz_compress.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#![no_main]

use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let table = fsst_rs::train("the quick brown fox jumped over the lazy dog".as_bytes());
let compress = table.compress(data);
let decompress = table.decompress(&compress);
assert_eq!(&decompress, data);
});
7 changes: 7 additions & 0 deletions fuzz/fuzz_targets/fuzz_train.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#![no_main]

use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let _ = fsst_rs::train(data);
});
68 changes: 53 additions & 15 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,10 +386,25 @@ impl SymbolTable {
remaining_bytes.is_positive(),
"in_ptr exceeded in_end, should not be possible"
);

// Shift off the remaining bytes
let mut last_word = unsafe { (in_ptr as *const u64).read_unaligned() };
last_word = mask_prefix(last_word, remaining_bytes as usize);
let remaining_bytes = remaining_bytes as usize;

// Load the last `remaining_byte`s of data into a final world. We then replicate the loop above,
// but shift data out of this word rather than advancing an input pointer and potentially reading
// unowned memory.
let mut last_word = unsafe {
match remaining_bytes {
0 => 0,
1 => extract_u64::<1>(in_ptr),
2 => extract_u64::<2>(in_ptr),
3 => extract_u64::<3>(in_ptr),
4 => extract_u64::<4>(in_ptr),
5 => extract_u64::<5>(in_ptr),
6 => extract_u64::<6>(in_ptr),
7 => extract_u64::<7>(in_ptr),
8 => extract_u64::<8>(in_ptr),
_ => unreachable!("remaining bytes must be <= 8"),
}
};

while in_ptr < in_end && out_ptr < out_end {
unsafe {
Expand Down Expand Up @@ -466,17 +481,6 @@ impl SymbolTable {
}
}

/// Mask the word, keeping only the `prefix_bytes` front.
fn mask_prefix(word: u64, prefix_bytes: usize) -> u64 {
let mask = if prefix_bytes == 0 {
0
} else {
u64::MAX >> (8 * (8 - prefix_bytes))
};

word & mask
}

fn advance_8byte_word(word: u64, bytes: usize) -> u64 {
// shift the word off the right-end, because little endian means the first
// char is stored in the LSB.
Expand All @@ -499,3 +503,37 @@ fn compare_masked(left: u64, right: u64, ignored_bits: u16) -> bool {

(left & mask) == right
}

/// This is a function that will get monomorphized based on the value of `N` to do
/// a load of `N` values from the pointer in a minimum number of instructions into
/// an output `u64`.
unsafe fn extract_u64<const N: usize>(ptr: *const u8) -> u64 {
match N {
1 => ptr.read() as u64,
2 => (ptr as *const u16).read_unaligned() as u64,
3 => {
let low = ptr.read() as u64;
let high = (ptr.byte_add(1) as *const u16).read_unaligned() as u64;
high << 8 | low
}
4 => (ptr as *const u32).read_unaligned() as u64,
5 => {
let low = (ptr as *const u32).read_unaligned() as u64;
let high = ptr.byte_add(4).read() as u64;
high << 32 | low
}
6 => {
let low = (ptr as *const u32).read_unaligned() as u64;
let high = (ptr.byte_add(4) as *const u16).read_unaligned() as u64;
high << 32 | low
}
7 => {
let low = (ptr as *const u32).read_unaligned() as u64;
let mid = (ptr.byte_add(4) as *const u16).read_unaligned() as u64;
let high = ptr.byte_add(6).read() as u64;
(high << 48) | (mid << 32) | low
}
8 => (ptr as *const u64).read_unaligned(),
_ => unreachable!("N must be <= 8"),
}
}
6 changes: 3 additions & 3 deletions tests/correctness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@ fn test_one_byte() {
#[test]
fn test_zeros() {
println!("training zeros");
let training_data: Vec<u8> = vec![0, 1, 2, 3, 4];
let training_data: Vec<u8> = vec![0, 1, 2, 3, 4, 0];
let trained = fsst_rs::train(&training_data);
println!("compressing with zeros");
let compressed = trained.compress(&[0, 4]);
let compressed = trained.compress(&[4, 0]);
println!("decomperssing with zeros");
assert_eq!(trained.decompress(&compressed), &[0, 4]);
assert_eq!(trained.decompress(&compressed), &[4, 0]);
println!("done");
}

Expand Down

0 comments on commit 1b58639

Please sign in to comment.