Skip to content

Commit

Permalink
select
Browse files Browse the repository at this point in the history
  • Loading branch information
rsk0315 committed Feb 14, 2024
1 parent c337ad2 commit c884fce
Showing 1 changed file with 95 additions and 17 deletions.
112 changes: 95 additions & 17 deletions nekolib-src/ds/rs01_dict/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,22 @@ const POW2_SMALL: usize = 1 << LEAF_LEN;
const RANK_LOOKUP: [[u16; LEAF_LEN]; POW2_SMALL] =
rank_lookup::<LEAF_LEN, POW2_SMALL>();

const SELECT_BRANCH: usize = 3;
const SELECT_POPCNT: usize = 9;
const SELECT_LG2_POPCNT: usize = 4;
const SELECT_BIT_PATTERNS: usize = 1 << (SELECT_LG2_POPCNT * SELECT_BRANCH);
const SELECT_LEAF_LEN: usize = 3;
const SELECT_POW2_LEAF_LEN: usize = 1 << SELECT_LEAF_LEN;
const SELECT_LOOKUP_TREE: [[(u16, u16); SELECT_POPCNT]; SELECT_BIT_PATTERNS] =
select_lookup_tree::<
SELECT_BIT_PATTERNS,
SELECT_BRANCH,
SELECT_POPCNT,
SELECT_LG2_POPCNT,
>();
const SELECT_LOOKUP_WORD: [[u16; SELECT_LEAF_LEN]; 1 << SELECT_LEAF_LEN] =
select_lookup_word::<SELECT_POW2_LEAF_LEN, SELECT_LEAF_LEN>();

const fn rank_lookup<
const MAX_LEN: usize, // log(n)/2
const BIT_PATTERNS: usize, // sqrt(n)
Expand All @@ -40,24 +56,24 @@ const fn rank_lookup<
table
}

const fn select_lookup<
const fn select_lookup_tree<
const BIT_PATTERNS: usize, // 2^(branch * large)
const BRANCH: usize, // sqrt(log(n))
const MAX_ONES: usize, // log(n)^2
const LG2_MAX_ONES: usize, // O(log(log(n)))
>() -> [[u16; MAX_ONES]; BIT_PATTERNS] {
let mut table = [[0; MAX_ONES]; BIT_PATTERNS];
const POPCNT: usize, // log(n)^2
const LG2_POPCNT: usize, // O(log(log(n)))
>() -> [[(u16, u16); POPCNT]; BIT_PATTERNS] {
let mut table = [[(0, 0); POPCNT]; BIT_PATTERNS];
let mut i = 0;
while i < BIT_PATTERNS {
let mut j = 0;
let mut index = 0;
while j < BRANCH {
// [0011, 0100, 0010] (0b_0010_0100_0011)
// [0, 0, 0, 1, 1, 1, 1, 2, 2, 3, ...]
let count = i >> (j * LG2_MAX_ONES) & !(!0 << LG2_MAX_ONES);
let count = i >> (j * LG2_POPCNT) & !(!0 << LG2_POPCNT);
let mut k = 0;
while k < count && index < MAX_ONES {
table[i][index] = j as u16;
while k < count && index < POPCNT {
table[i][index] = (j as u16, (index - k) as u16);
index += 1;
k += 1;
}
Expand All @@ -68,6 +84,27 @@ const fn select_lookup<
table
}

const fn select_lookup_word<
const BIT_PATTERNS: usize, // 2^leaflen
const LEAF_LEN: usize, // log(n)/2
>() -> [[u16; LEAF_LEN]; BIT_PATTERNS] {
let mut table = [[0; LEAF_LEN]; BIT_PATTERNS];
let mut i = 0;
while i < BIT_PATTERNS {
let mut j = 0;
let mut count = 0;
while j < LEAF_LEN {
if i >> j & 1 != 0 {
table[i][count] = j as u16;
count += 1;
}
j += 1;
}
i += 1;
}
table
}

struct RankIndex<const LARGE: usize, const SMALL: usize> {
buf: Vec<u64>,

Expand Down Expand Up @@ -198,7 +235,7 @@ enum SelectIndexInner<
Sparse(Vec<usize>),

/// less than $`\log(n)^4`$-bit blocks.
Dense(SimpleBitVec),
Dense(Vec<SimpleBitVec>, SimpleBitVec),
}

struct SelectIndex<
Expand Down Expand Up @@ -262,13 +299,37 @@ impl<
tree.push(tmp);
last = cur;
}
tree.push(last);
tree.reverse();
Self::Dense(tree, a)
}

fn select(&self, i: usize) -> usize {
match self {
Self::Sparse(index) => index[i],
Self::Dense(tree, buf) => {
let mut i = i;
let mut cur = 0;
let mut off = 0;
let len = LG2_POPCNT * BRANCH;
for level in tree {
let w = level.get(cur..level.len().min(cur + len)) as usize;
// eprintln!("range: {:?}, {w:01$b}", cur..cur + len, len);
let (br, count) = SELECT_LOOKUP_TREE[w][i];
// eprintln!(" -> {:?}", (br, count));
cur = (cur + LG2_POPCNT * br as usize) * BRANCH;
off = off * BRANCH + br as usize;
i -= count as usize;
}
// eprintln!("cur: {cur}, i: {i}");

let start = cur / (BRANCH * LG2_POPCNT) * LEAF_LEN;
let end = start + LEAF_LEN;
let leaf = buf.get(start..end);
// eprintln!("{leaf:00$b}", LEAF_LEN);

let mut res = SimpleBitVec::new();
while let Some(level) = tree.pop() {
res.push_vec(level);
off * LEAF_LEN + SELECT_LOOKUP_WORD[leaf as usize][i] as usize
}
}
Self::Dense(res)
}
}

Expand Down Expand Up @@ -297,6 +358,10 @@ impl<
}
Self { ds: res }
}

fn select(&self, i: usize) -> usize {
self.ds[i / POPCNT].select(i % POPCNT)
}
}

macro_rules! bitvec {
Expand Down Expand Up @@ -324,8 +389,10 @@ fn test_rank_lookup() {

#[test]
fn test_select_lookup() {
let table = select_lookup::<4096, 3, 16, 4>();
assert_eq!(&table[0b_0010_0100_0011][0..9], [0, 0, 0, 1, 1, 1, 1, 2, 2]);
let table = select_lookup_tree::<4096, 3, 16, 4>();
let tmp: [_; 9] = table[0b_0010_0100_0011][0..9].try_into().unwrap();
assert_eq!(tmp.map(|x| x.0), [0, 0, 0, 1, 1, 1, 1, 2, 2]);
assert_eq!(tmp.map(|x| x.1), [0, 0, 0, 3, 3, 3, 3, 7, 7]);
}

#[test]
Expand All @@ -334,12 +401,23 @@ fn sanity_check_rank() {
let b = compress_vec_bool::<3>(&a);
let rp = RankIndex::<12, 3>::new(b.clone());
for i in 0..a.len() {
eprintln!("{i} -> {}", rp.rank1(i));
eprintln!("rank({i}) -> {}", rp.rank1(i));
}
}

#[test]
fn sanity_check_select() {
let a = bitvec!(b"000 010 110; 000 111 001; 000 011 000");
let sp = SelectIndex::<12, 4, 3, 100, 3>::new::<true>(&a);
for i in 0..SELECT_POW2_LEAF_LEN {
for j in 0..SELECT_LEAF_LEN {
eprintln!(
"table[{i:00$b}][{j}] = {1}",
SELECT_LEAF_LEN, SELECT_LOOKUP_WORD[i][j]
);
}
}
for i in 0..9 {
eprintln!("select({i}) -> {}", sp.select(i));
}
}

0 comments on commit c884fce

Please sign in to comment.