From 3c96a8eda720943798faa8987ebb17d07710c093 Mon Sep 17 00:00:00 2001 From: rsk0315 Date: Fri, 16 Feb 2024 20:00:16 +0900 Subject: [PATCH] b --- nekolib-src/ds/rs01_dict/Cargo.toml | 3 + .../ds/rs01_dict/benches/bench_selects.rs | 149 ++++++++++-------- nekolib-src/naive/bit-vector/src/lib.rs | 20 +-- 3 files changed, 97 insertions(+), 75 deletions(-) diff --git a/nekolib-src/ds/rs01_dict/Cargo.toml b/nekolib-src/ds/rs01_dict/Cargo.toml index a91a8be1e5..2ae89dd88c 100644 --- a/nekolib-src/ds/rs01_dict/Cargo.toml +++ b/nekolib-src/ds/rs01_dict/Cargo.toml @@ -6,6 +6,9 @@ edition = "2021" [dependencies] criterion = "0.5.1" +[dev-dependencies] +bit-vector = { path = "../../naive/bit-vector" } + [[bench]] name = "bench_selects" harness = false diff --git a/nekolib-src/ds/rs01_dict/benches/bench_selects.rs b/nekolib-src/ds/rs01_dict/benches/bench_selects.rs index e0d33dbb17..6b1b823b93 100644 --- a/nekolib-src/ds/rs01_dict/benches/bench_selects.rs +++ b/nekolib-src/ds/rs01_dict/benches/bench_selects.rs @@ -1,60 +1,12 @@ +use bit_vector::{Rs01DictNLl, Rs01DictNlC}; use criterion::{ black_box, criterion_group, criterion_main, BenchmarkId, Criterion, }; use rs01_dict::Rs01Dict; -use crate::benchmarks::select_word; - -mod benchmarks { - pub fn select_word(mut w: u64, mut i: u32) -> u32 { - if !X { - w = !w; - } - let mut res = 0; - for lg2 in (0..6).rev() { - let len = 1 << lg2; - let mask = !(!0 << len); - let count = (w & mask).count_ones(); - if count <= i { - w >>= len; - i -= count; - res += len; - } - } - res - } -} - fn bench_selects(c: &mut Criterion) { - let w = 0x_3046_2FB7_58C1_EDA9_u64; - let a: Vec<_> = (0..64).map(|i| w >> i & 1 != 0).collect(); - - let rs = Rs01Dict::new(&a); - let mut group = c.benchmark_group("select"); - for i in 0..32 { - let actual = rs.select1(i) as u32; - let expected = select_word::(w, i as _); - assert_eq!(expected, actual); - } - - group - .bench_function(BenchmarkId::new("mofr", w), |b| { - b.iter(|| { - for i in 0..32 { - black_box(rs.select1(i)); - } - }) - }) - .bench_function(BenchmarkId::new("word", w), |b| { - b.iter(|| { - for i in 0..32 { - black_box(select_word::(w, i)); - } - }) - }); - // % bc <<< "obase=16; ibase=2; $(gshuf -re {0,1}{0,1}{0,1}{0,1} -n$((20*16)))" \ // | tr -d \\n \ // | fold -w4 \ @@ -196,47 +148,112 @@ fn bench_selects(c: &mut Criterion) { a.iter().flat_map(|&w| (0..64).map(move |i| w >> i & 1 != 0)).collect(); let rs = Rs01Dict::new(&a); + let rs_nlc = Rs01DictNlC::new(&a); + let rs_nll = Rs01DictNLl::new(&a); - let expected1: Vec<_> = (0..a.len()).filter(|&i| a[i]).collect(); - let actual1: Vec<_> = (0..expected1.len()).map(|i| rs.select1(i)).collect(); - assert_eq!(actual1, expected1); + let expected_select0 = || (0..a.len()).filter(|&i| !a[i]); + let count0 = expected_select0().count(); + eprintln!("count0: {count0}"); - let count1 = expected1.len(); + let expected_select1 = || (0..a.len()).filter(|&i| a[i]); + let count1 = expected_select1().count(); eprintln!("count1: {count1}"); - let expected0: Vec<_> = (0..a.len()).filter(|&i| !a[i]).collect(); - let actual0: Vec<_> = (0..expected0.len()).map(|i| rs.select0(i)).collect(); - assert_eq!(actual0, expected0); + assert!((0..count0).map(|i| rs.select0(i)).eq(expected_select0())); + assert!((0..count0).map(|i| rs_nlc.select0(i)).eq(expected_select0())); + assert!((0..count0).map(|i| rs_nll.select0(i)).eq(expected_select0())); - let count0 = expected0.len(); - eprintln!("count0: {count0}"); + assert!((0..count1).map(|i| rs.select1(i)).eq(expected_select1())); + assert!((0..count1).map(|i| rs_nlc.select1(i)).eq(expected_select1())); + assert!((0..count1).map(|i| rs_nll.select1(i)).eq(expected_select1())); + + assert!((0..count0).map(|i| rs.select0(i)).eq(expected_select0())); + assert!((0..count0).map(|i| rs_nlc.select0(i)).eq(expected_select0())); + assert!((0..count0).map(|i| rs_nll.select0(i)).eq(expected_select0())); + + assert!((0..count1).map(|i| rs.select1(i)).eq(expected_select1())); + assert!((0..count1).map(|i| rs_nlc.select1(i)).eq(expected_select1())); + assert!((0..count1).map(|i| rs_nll.select1(i)).eq(expected_select1())); + + let expected_rank0 = || { + (0..a.len()).map(|i| !a[i] as usize).scan(0, |acc, x| { + *acc += x; + Some(*acc) + }) + }; + let expected_rank1 = || { + (0..a.len()).map(|i| a[i] as usize).scan(0, |acc, x| { + *acc += x; + Some(*acc) + }) + }; + + assert!((0..a.len()).map(|i| rs.rank0(i)).eq(expected_rank0())); + assert!((0..a.len()).map(|i| rs_nlc.rank0(i)).eq(expected_rank0())); + assert!((0..a.len()).map(|i| rs_nll.rank0(i)).eq(expected_rank0())); + + assert!((0..a.len()).map(|i| rs.rank1(i)).eq(expected_rank1())); + assert!((0..a.len()).map(|i| rs_nlc.rank1(i)).eq(expected_rank1())); + assert!((0..a.len()).map(|i| rs_nll.rank1(i)).eq(expected_rank1())); group - .bench_function(BenchmarkId::new("mofr", 1), |b| { + .bench_function(BenchmarkId::new("succinct", "rank"), |b| { b.iter(|| { - for i in 0..count1 { - black_box(rs.select1(i)); + for i in 0..a.len() { + black_box(rs.rank0(i)); + } + for i in 0..a.len() { + black_box(rs.rank1(i)); } }) }) - .bench_function(BenchmarkId::new("mofr", 0), |b| { + .bench_function(BenchmarkId::new("naive", "rank"), |b| { + b.iter(|| { + for i in 0..a.len() { + black_box(rs_nlc.rank0(i)); + } + for i in 0..a.len() { + black_box(rs_nlc.rank1(i)); + } + }) + }) + .bench_function(BenchmarkId::new("compact", "rank"), |b| { + b.iter(|| { + for i in 0..a.len() { + black_box(rs_nll.rank0(i)); + } + for i in 0..a.len() { + black_box(rs_nll.rank1(i)); + } + }) + }) + .bench_function(BenchmarkId::new("succinct", "select"), |b| { b.iter(|| { for i in 0..count0 { black_box(rs.select0(i)); } + for i in 0..count1 { + black_box(rs.select1(i)); + } }) }) - .bench_function(BenchmarkId::new("array", 1), |b| { + .bench_function(BenchmarkId::new("naive", "select"), |b| { b.iter(|| { + for i in 0..count0 { + black_box(rs_nlc.select0(i)); + } for i in 0..count1 { - black_box(expected1[i]); + black_box(rs_nlc.select1(i)); } }) }) - .bench_function(BenchmarkId::new("array", 0), |b| { + .bench_function(BenchmarkId::new("compact", "select"), |b| { b.iter(|| { for i in 0..count0 { - black_box(expected0[i]); + black_box(rs_nll.select0(i)); + } + for i in 0..count1 { + black_box(rs_nll.select1(i)); } }) }); diff --git a/nekolib-src/naive/bit-vector/src/lib.rs b/nekolib-src/naive/bit-vector/src/lib.rs index b98ac1e258..5a4d25c527 100644 --- a/nekolib-src/naive/bit-vector/src/lib.rs +++ b/nekolib-src/naive/bit-vector/src/lib.rs @@ -108,7 +108,7 @@ impl RankIndexNC { pub fn new(a: &[bool]) -> Self { let len = a.len(); let n = (len + W - 1) / W; - let mut buf = vec![0_u64; n]; + let mut buf = vec![0_u64; n + 1]; for i in 0..len { if a[i] { buf[i / W] |= 1 << (i % W); @@ -180,11 +180,11 @@ impl SelectIndexNLlInner { match self { Self::Sparse(pos) => pos[i_rem], Self::Dense(Range { start, end }) => { - if r.rank::(0) > i { - return 0; - } let mut lo = *start; let mut hi = *end; + if r.rank::(lo) > i { + return lo; + } while hi - lo > 1 { let mid = lo + (hi - lo) / 2; *(if r.rank::(mid) <= i { &mut lo } else { &mut hi }) = @@ -196,14 +196,16 @@ impl SelectIndexNLlInner { } } -pub struct Rs01DictNLl { +pub type Rs01DictNLl = Rs01DictNLlParam<64, 1024>; + +pub struct Rs01DictNLlParam { rank_index: RankIndexNC, select1_index: SelectIndexNLl, select0_index: SelectIndexNLl, } impl - Rs01DictNLl + Rs01DictNLlParam { pub fn new(a: &[bool]) -> Self { Self { @@ -256,7 +258,7 @@ macro_rules! bitvec { #[test] fn sanity_check_rank() { let a = bitvec!(b"000 010 110 000; 111 001 000 011; 000 000 010 010"); - let rs = Rs01DictNLl::<100, 100>::new(&a); + let rs = Rs01DictNLlParam::<100, 100>::new(&a); let expected1 = [ 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 7, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 11, 11, @@ -277,7 +279,7 @@ fn sanity_check_select_dense() { let a = bitvec!(b"000 010 110; 000 111 001; 000 011 000"); let ones = a.iter().filter(|&&x| x).count(); let zeros = a.len() - ones; - let rs = Rs01DictNLl::<100, 100>::new(&a); + let rs = Rs01DictNLlParam::<100, 100>::new(&a); let expected1: Vec<_> = (0..a.len()).filter(|&i| a[i]).collect(); let expected0: Vec<_> = (0..a.len()).filter(|&i| !a[i]).collect(); let actual1: Vec<_> = (0..ones).map(|i| rs.select1(i)).collect(); @@ -291,7 +293,7 @@ fn sanity_check_select_sparse() { let a = bitvec!(b"001 010 000; 000 000 110"); let ones = a.iter().filter(|&&x| x).count(); let zeros = a.len() - ones; - let rs = Rs01DictNLl::<2, 0>::new(&a); + let rs = Rs01DictNLlParam::<2, 0>::new(&a); let expected1: Vec<_> = (0..a.len()).filter(|&i| a[i]).collect(); let expected0: Vec<_> = (0..a.len()).filter(|&i| !a[i]).collect(); let actual1: Vec<_> = (0..ones).map(|i| rs.select1(i)).collect();