From 9b67893b7dbfe5091b6fd3352d3c60ac651af2a1 Mon Sep 17 00:00:00 2001 From: Joseph Glanville Date: Mon, 23 Mar 2020 10:57:08 +0700 Subject: [PATCH 01/25] WIP: Run container --- src/bitmap/container.rs | 24 ++- src/bitmap/fmt.rs | 5 +- src/bitmap/inherent.rs | 18 ++ src/bitmap/serialization.rs | 171 +++++++++++++--- src/bitmap/store.rs | 387 +++++++++++++++++++++++++++++++++--- tests/bitmapwithruns.bin | Bin 0 -> 48056 bytes tests/clone.rs | 13 ++ tests/lib.rs | 22 ++ tests/serialization.rs | 34 +++- tests/size_hint.rs | 28 +++ 10 files changed, 635 insertions(+), 67 deletions(-) create mode 100644 tests/bitmapwithruns.bin diff --git a/src/bitmap/container.rs b/src/bitmap/container.rs index e80cdef2..22f04427 100644 --- a/src/bitmap/container.rs +++ b/src/bitmap/container.rs @@ -3,7 +3,8 @@ use std::fmt; use super::store::{self, Store}; use super::util; -const ARRAY_LIMIT: u64 = 4096; +pub const ARRAY_LIMIT: u64 = 4096; +pub const RUN_MAX_SIZE: u64 = 2048; #[derive(PartialEq, Clone)] pub struct Container { @@ -103,7 +104,7 @@ impl Container { self.store.max() } - fn ensure_correct_store(&mut self) { + fn ensure_correct_store(&mut self) -> bool { let new_store = match (&self.store, self.len) { (store @ &Store::Bitmap(..), len) if len <= ARRAY_LIMIT => Some(store.to_array()), (store @ &Store::Array(..), len) if len > ARRAY_LIMIT => Some(store.to_bitmap()), @@ -111,6 +112,25 @@ impl Container { }; if let Some(new_store) = new_store { self.store = new_store; + true + } else { + false + } + } + + pub fn optimize(&mut self) -> bool { + match self.store { + Store::Array(..) | Store::Bitmap(..) => { + let num_runs = self.store.count_runs(); + if num_runs <= RUN_MAX_SIZE && num_runs <= self.len / 2 { + // convert to run container + self.store = self.store.to_run(); + true + } else { + self.ensure_correct_store() + } + } + Store::Run(..) => self.ensure_correct_store(), } } } diff --git a/src/bitmap/fmt.rs b/src/bitmap/fmt.rs index 7dca8170..bb4c66be 100644 --- a/src/bitmap/fmt.rs +++ b/src/bitmap/fmt.rs @@ -9,10 +9,11 @@ impl fmt::Debug for RoaringBitmap { } else { write!( f, - "RoaringBitmap<{:?} values between {:?} and {:?}>", + "RoaringBitmap<{:?} values between {:?} and {:?} in {:?} containers>", self.len(), self.min().unwrap(), - self.max().unwrap() + self.max().unwrap(), + self.containers.len(), ) } } diff --git a/src/bitmap/inherent.rs b/src/bitmap/inherent.rs index c1ef81c9..935c7fd6 100644 --- a/src/bitmap/inherent.rs +++ b/src/bitmap/inherent.rs @@ -247,6 +247,24 @@ impl RoaringBitmap { .last() .map(|tail| util::join(tail.key, tail.max())) } + + // TODO(jpg) actually come up with example that illustrates creation of run containers + /// Optimizes the container storage for this bitmap. + /// Returns true if the container storage was modified, false if not. + /// + /// # Examples + /// use roaring::RoaringBitmap; + /// + /// let mut rb = RoaringBitmap::from_iter(1000..100000) + /// rb.optimize() + /// ``` + pub fn optimize(&mut self) -> bool { + let mut changed = false; + for container in &mut self.containers { + changed |= container.optimize() + } + changed + } } impl Default for RoaringBitmap { diff --git a/src/bitmap/serialization.rs b/src/bitmap/serialization.rs index 3ecff394..2689ed41 100644 --- a/src/bitmap/serialization.rs +++ b/src/bitmap/serialization.rs @@ -3,12 +3,25 @@ use std::io; use super::container::Container; use super::store::Store; +use crate::bitmap::container::ARRAY_LIMIT; +use crate::bitmap::store::{Interval, BITMAP_LENGTH}; use crate::RoaringBitmap; const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346; const SERIAL_COOKIE: u16 = 12347; -// TODO: Need this once run containers are supported -// const NO_OFFSET_THRESHOLD: u8 = 4; +const NO_OFFSET_THRESHOLD: usize = 4; + +// Sizes of header structures +const COOKIE_BYTES: usize = 4; +const SIZE_BYTES: usize = 4; +const DESCRIPTION_BYTES: usize = 4; +const OFFSET_BYTES: usize = 4; + +// Sizes of container structures +const BITMAP_BYTES: usize = BITMAP_LENGTH * 8; +const ARRAY_ELEMENT_BYTES: usize = 2; +const RUN_NUM_BYTES: usize = 2; +const RUN_ELEMENT_BYTES: usize = 4; impl RoaringBitmap { /// Return the size in bytes of the serialized output. @@ -27,17 +40,23 @@ impl RoaringBitmap { /// assert_eq!(rb1, rb2); /// ``` pub fn serialized_size(&self) -> usize { + let mut has_run_containers = false; + let size = self.containers.len(); let container_sizes: usize = self .containers .iter() .map(|container| match container.store { - Store::Array(ref values) => 8 + values.len() * 2, - Store::Bitmap(..) => 8 + 8 * 1024, + Store::Array(ref values) => values.len() * ARRAY_ELEMENT_BYTES, + Store::Bitmap(..) => BITMAP_BYTES, + Store::Run(ref intervals) => { + has_run_containers = true; + RUN_NUM_BYTES + (RUN_ELEMENT_BYTES * intervals.len()) + } }) .sum(); // header + container sizes - 8 + container_sizes + header_size(size, has_run_containers) + container_sizes } /// Serialize this bitmap into [the standard Roaring on-disk format][format]. @@ -58,27 +77,61 @@ impl RoaringBitmap { /// assert_eq!(rb1, rb2); /// ``` pub fn serialize_into(&self, mut writer: W) -> io::Result<()> { - writer.write_u32::(SERIAL_COOKIE_NO_RUNCONTAINER)?; - writer.write_u32::(self.containers.len() as u32)?; + let has_run_containers = self.containers.iter().any(|c| { + if let Store::Run(_) = c.store { + true + } else { + false + } + }); + let size = self.containers.len(); + + // Depending on if run containers are present or not write the appropriate header + if has_run_containers { + // The new format stores the container count in the most significant bits of the header + let cookie = SERIAL_COOKIE as u32 | ((size as u32 - 1) << 16); + writer.write_u32::(cookie)?; + // It is then followed by a bitset indicating which containers are run containers + let run_container_bitmap_size = (size + 7) / 8; + let mut run_container_bitmap = vec![0; run_container_bitmap_size]; + for (i, container) in self.containers.iter().enumerate() { + if let Store::Run(_) = container.store { + run_container_bitmap[i / 8] |= 1 << (i % 8); + } + } + writer.write_all(&run_container_bitmap)?; + } else { + // Write old format, cookie followed by container count + writer.write_u32::(SERIAL_COOKIE_NO_RUNCONTAINER)?; + writer.write_u32::(size as u32)?; + } + // Write the container descriptions for container in &self.containers { writer.write_u16::(container.key)?; writer.write_u16::((container.len - 1) as u16)?; } - let mut offset = 8 + 8 * self.containers.len() as u32; - for container in &self.containers { - writer.write_u32::(offset)?; - match container.store { - Store::Array(ref values) => { - offset += values.len() as u32 * 2; - } - Store::Bitmap(..) => { - offset += 8 * 1024; + // Write offsets if there are no runs or NO_OFFSET_THRESHOLD containers is reached + if !has_run_containers || size >= NO_OFFSET_THRESHOLD { + let mut offset = header_size(size, has_run_containers) as u32; + for container in &self.containers { + writer.write_u32::(offset)?; + match container.store { + Store::Array(ref values) => { + offset += (values.len() * ARRAY_ELEMENT_BYTES) as u32; + } + Store::Bitmap(..) => { + offset += BITMAP_BYTES as u32; + } + Store::Run(ref intervals) => { + offset += (RUN_NUM_BYTES + (intervals.len() * RUN_ELEMENT_BYTES)) as u32; + } } } } + // Finally serialize each of the containers for container in &self.containers { match container.store { Store::Array(ref values) => { @@ -91,6 +144,13 @@ impl RoaringBitmap { writer.write_u64::(value)?; } } + Store::Run(ref intervals) => { + writer.write_u16::(intervals.len() as u16)?; + for iv in intervals { + writer.write_u16::(iv.start)?; + writer.write_u16::(iv.end - iv.start)?; + } + } } } @@ -116,20 +176,28 @@ impl RoaringBitmap { /// assert_eq!(rb1, rb2); /// ``` pub fn deserialize_from(mut reader: R) -> io::Result { - let (size, has_offsets) = { + // First read the cookie to determine which version of the format we are reading + let (size, has_offsets, has_run_containers) = { let cookie = reader.read_u32::()?; if cookie == SERIAL_COOKIE_NO_RUNCONTAINER { - (reader.read_u32::()? as usize, true) + (reader.read_u32::()? as usize, true, false) } else if (cookie as u16) == SERIAL_COOKIE { - return Err(io::Error::new( - io::ErrorKind::Other, - "run containers are unsupported", - )); + let size = ((cookie >> 16) + 1) as usize; + (size, size >= NO_OFFSET_THRESHOLD, true) } else { return Err(io::Error::new(io::ErrorKind::Other, "unknown cookie value")); } }; + // Read the run container bitmap if necessary + let run_container_bitmap = if has_run_containers { + let mut bitmap = vec![0u8; (size + 7) / 8]; + reader.read_exact(&mut bitmap)?; + Some(bitmap) + } else { + None + }; + if size > u16::max_value() as usize { return Err(io::Error::new( io::ErrorKind::Other, @@ -137,39 +205,80 @@ impl RoaringBitmap { )); } - let mut description_bytes = vec![0u8; size * 4]; + // Read the container descriptions + let mut description_bytes = vec![0u8; size * DESCRIPTION_BYTES]; reader.read_exact(&mut description_bytes)?; let description_bytes = &mut &description_bytes[..]; + // Read the offsets if present if has_offsets { - let mut offsets = vec![0u8; size * 4]; + let mut offsets = vec![0u8; size * OFFSET_BYTES]; reader.read_exact(&mut offsets)?; drop(offsets); // Not useful when deserializing into memory } let mut containers = Vec::with_capacity(size); - for _ in 0..size { + // Read each of the containers + for i in 0..size { let key = description_bytes.read_u16::()?; - let len = u64::from(description_bytes.read_u16::()?) + 1; + let cardinality = u64::from(description_bytes.read_u16::()?) + 1; - let store = if len <= 4096 { - let mut values = Vec::with_capacity(len as usize); - for _ in 0..len { + // If the run container bitmap is present, check if this container is a run container + let is_run_container = match run_container_bitmap { + Some(ref bm) => bm[i / 8] & (1 << (i % 8)) != 0, + None => false, + }; + + let store = if is_run_container { + let runs = reader.read_u16::()?; + let mut intervals = Vec::with_capacity(runs as usize); + for _ in 0..runs { + let start = reader.read_u16::()?; + let run_len = reader.read_u16::()?; + let end = start + run_len; + intervals.push(Interval { start, end }) + } + Store::Run(intervals) + } else if cardinality <= ARRAY_LIMIT { + let mut values = Vec::with_capacity(cardinality as usize); + for _ in 0..cardinality { values.push(reader.read_u16::()?); } Store::Array(values) } else { - let mut values = Box::new([0; 1024]); + let mut values = Box::new([0; BITMAP_LENGTH]); for value in values.iter_mut() { *value = reader.read_u64::()?; } Store::Bitmap(values) }; - containers.push(Container { key, len, store }); + containers.push(Container { + key, + len: cardinality, + store, + }); } Ok(RoaringBitmap { containers }) } } + +fn header_size(size: usize, has_run_containers: bool) -> usize { + if has_run_containers { + // New format encodes the size (number of containers) into the 4 byte cookie + // Additionally a bitmap is included marking which containers are run containers + let run_container_bitmap_size = (size + 7) / 8; + // New format conditionally includes offsets if there are 4 or more containers + if size >= NO_OFFSET_THRESHOLD { + COOKIE_BYTES + ((DESCRIPTION_BYTES + OFFSET_BYTES) * size) + run_container_bitmap_size + } else { + COOKIE_BYTES + (DESCRIPTION_BYTES * size) + run_container_bitmap_size + } + } else { + // Old format encodes cookie followed by container count + // It also always includes the offsets + COOKIE_BYTES + SIZE_BYTES + ((DESCRIPTION_BYTES + OFFSET_BYTES) * size) + } +} diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 3aaa3110..1e21a1d7 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -1,14 +1,32 @@ use std::borrow::Borrow; use std::cmp::Ordering::{Equal, Greater, Less}; -use std::slice; use std::vec; +use std::{fmt, slice}; -const BITMAP_LENGTH: usize = 1024; +use self::Store::{Array, Bitmap, Run}; + +pub const BITMAP_LENGTH: usize = 1024; + +#[derive(PartialEq, Clone, Debug)] +pub struct Interval { + pub start: u16, + pub end: u16, +} + +impl Interval { + pub fn new(start: u16, end: u16) -> Interval { + Interval { start, end } + } + + pub fn run_len(&self) -> u64 { + (self.end - self.start) as u64 + 1 + } +} -use self::Store::{Array, Bitmap}; pub enum Store { Array(Vec), Bitmap(Box<[u64; BITMAP_LENGTH]>), + Run(Vec), } pub enum Iter<'a> { @@ -16,6 +34,7 @@ pub enum Iter<'a> { Vec(vec::IntoIter), BitmapBorrowed(BitmapIter<&'a [u64; BITMAP_LENGTH]>), BitmapOwned(BitmapIter>), + Run(RunIter), } pub struct BitmapIter> { @@ -24,6 +43,12 @@ pub struct BitmapIter> { bits: B, } +pub struct RunIter { + run: usize, + offset: u64, + intervals: Vec, +} + impl Store { pub fn insert(&mut self, index: u16) -> bool { match *self { @@ -40,6 +65,43 @@ impl Store { false } } + Run(ref mut vec) => { + vec.binary_search_by_key(&index, |iv| iv.start) + .map_err(|loc| { + // Value is beyond end of interval + if vec[loc].end < index { + // If immediately follows this interval + if index == vec[loc].end - 1 { + if loc < vec.len() && index == vec[loc + 1].start { + // Merge with following interval + vec[loc].end = vec[loc + 1].end; + vec.remove(loc + 1); + return; + } + // Extend end of this interval by 1 + vec[loc].end += 1 + } else { + // Otherwise create new standalone interval + vec.insert(loc, Interval::new(index, index)); + } + } else if vec[loc].start == index + 1 { + // Value immediately precedes interval + if loc > 0 && vec[loc - 1].end == &index - 1 { + // Merge with preceding interval + vec[loc - 1].end = vec[loc].end; + vec.remove(loc); + return; + } + vec[loc].start -= 1; + } else if loc > 0 && index - 1 == vec[loc - 1].end { + // Immediately follows the previous interval + vec[loc - 1].end += 1 + } else { + vec.insert(loc, Interval::new(index, index)); + } + }) + .is_err() + } } } @@ -55,6 +117,27 @@ impl Store { false } } + Run(ref mut vec) => vec + .binary_search_by_key(&index, |iv| iv.start) + .map(|loc| { + if index == vec[loc].start && index == vec[loc].end { + // Remove entire run if it only contains this value + vec.remove(loc); + } else if index == vec[loc].end { + // Value is last in this interval + vec[loc].end -= 1; + } else if index == vec[loc].start { + // Value is first in this interval + vec[loc].start += 1; + } else { + // Value lies inside the interval, we need to split it + // First shrink the current interval + vec[loc].end = index - 1; + // Then insert a new index leaving gap where value was removed + vec.insert(loc + 1, Interval::new(index + 1, vec[loc].end)); + } + }) + .is_ok(), } } @@ -105,6 +188,8 @@ impl Store { bits[end_key] &= !(!0u64).wrapping_shr(64 - end_bit); u64::from(removed) } + // TODO(jpg): Remove range + Run(ref mut _intervals) => unimplemented!(), } } @@ -112,6 +197,9 @@ impl Store { match *self { Array(ref vec) => vec.binary_search(&index).is_ok(), Bitmap(ref bits) => bits[key(index)] & (1 << bit(index)) != 0, + Run(ref intervals) => intervals + .binary_search_by_key(&index, |iv| iv.start) + .is_ok(), } } @@ -136,6 +224,13 @@ impl Store { (&Array(ref vec), store @ &Bitmap(..)) | (store @ &Bitmap(..), &Array(ref vec)) => { vec.iter().all(|&i| !store.contains(i)) } + // TODO(jpg) is_disjoint + (&Run(ref _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&Run(ref _intervals), &Array(ref _vec)) | (&Array(ref _vec), &Run(ref _intervals)) => { + unimplemented!() + } + (&Run(ref _intervals), _store @ &Bitmap(..)) + | (_store @ &Bitmap(..), &Run(ref _intervals)) => unimplemented!(), } } @@ -159,12 +254,22 @@ impl Store { } } } + (&Array(ref vec), store @ &Bitmap(..)) => vec.iter().all(|&i| store.contains(i)), + // TODO(jpg) is_subset array, run + (&Array(ref _vec), &Run(ref _intervals)) => unimplemented!(), + (&Bitmap(ref bits1), &Bitmap(ref bits2)) => bits1 .iter() .zip(bits2.iter()) .all(|(&i1, &i2)| (i1 & i2) == i1), - (&Array(ref vec), store @ &Bitmap(..)) => vec.iter().all(|&i| store.contains(i)), (&Bitmap(..), &Array(..)) => false, + // TODO(jpg) is subset bitmap, run + (&Bitmap(..), &Run(ref _vec)) => unimplemented!(), + + // TODO(jpg) is_subset run, * + (&Run(ref _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&Run(ref _intervals), &Array(ref _vec)) => unimplemented!(), + (&Run(ref _intervals), _store @ &Bitmap(..)) => unimplemented!(), } } @@ -182,6 +287,13 @@ impl Store { } Array(vec) } + Run(ref intervals) => Array( + intervals + .iter() + .map(|iv| iv.start..iv.end) + .flatten() + .collect(), + ), } } @@ -195,6 +307,78 @@ impl Store { Bitmap(bits) } Bitmap(..) => panic!("Cannot convert bitmap to bitmap"), + Run(ref intervals) => { + let mut bits = Box::new([0; BITMAP_LENGTH]); + for iv in intervals { + for index in iv.start..iv.end { + bits[key(index)] |= 1 << bit(index); + } + } + Bitmap(bits) + } + } + } + + pub fn to_run(&self) -> Self { + match *self { + Array(ref vec) => { + let mut intervals = Vec::new(); + let mut start = *vec.first().unwrap(); + for (idx, &v) in vec[1..].iter().enumerate() { + if v - vec[idx] > 1 { + intervals.push(Interval::new(start, vec[idx])); + start = v + } + } + intervals.push(Interval::new(start, *vec.last().unwrap())); + Run(intervals) + } + Bitmap(ref bits) => { + let mut current = bits[0]; + let mut i = 0u16; + let mut start; + let mut last; + + let mut intervals = Vec::new(); + + loop { + // Skip over empty words + while current == 0 && i < BITMAP_LENGTH as u16 - 1 { + i += 1; + current = bits[i as usize]; + } + // Reached end of the bitmap without finding anymore bits set + if current == 0 { + break; + } + let current_start = current.trailing_zeros() as u16; + start = 64 * i + current_start; + + // Pad LSBs with 1s + current |= current - 1; + + // Find next 0 + while current == std::u64::MAX && i < BITMAP_LENGTH as u16 - 1 { + i += 1; + current = bits[i as usize]; + } + + // Run continues until end of this container + if current == std::u64::MAX { + intervals.push(Interval::new(start, std::u16::MAX)); + break; + } + + let current_last = (!current).trailing_zeros() as u16; + last = 64 * i + current_last; + intervals.push(Interval::new(start, last - 1)); + + // pad LSBs with 0s + current &= current + 1; + } + Run(intervals) + } + Run(ref _intervals) => panic!("Cannot convert run to run"), } } @@ -216,17 +400,28 @@ impl Store { } vec1.extend(iter2); } - (ref mut this @ &mut Bitmap(..), &Array(ref vec)) => { - for &index in vec { - this.insert(index); - } + (this @ &mut Array(..), &Bitmap(..)) => { + *this = this.to_bitmap(); + this.union_with(other); } + // TODO(jpg) union_with array, run + (&mut Array(ref mut _vec), &Run(ref _intervals)) => {} (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { *index1 |= index2; } } - (this @ &mut Array(..), &Bitmap(..)) => { + (ref mut this @ &mut Bitmap(..), &Array(ref vec)) => { + for &index in vec { + this.insert(index); + } + } + // TODO(jpg) union_with bitmap, run + (ref mut _this @ &mut Bitmap(..), &Run(ref _intervals)) => unimplemented!(), + // TODO(jpg) union_with run, * + (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (this @ &mut Run(..), &Bitmap(..)) => { *this = this.to_bitmap(); this.union_with(other); } @@ -254,11 +449,6 @@ impl Store { } } } - (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { - for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { - *index1 &= index2; - } - } (&mut Array(ref mut vec), store @ &Bitmap(..)) => { for i in (0..(vec.len())).rev() { if !store.contains(vec[i]) { @@ -266,11 +456,24 @@ impl Store { } } } + // TODO(jpg) intersect_with array, run + (&mut Array(ref mut _intervals1), &Run(ref _intervals2)) => {} + (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { + for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { + *index1 &= index2; + } + } (this @ &mut Bitmap(..), &Array(..)) => { let mut new = other.clone(); new.intersect_with(this); *this = new; } + // TODO(jpg) intersect_with bitmap, run + (_this @ &mut Bitmap(..), &Run(..)) => unimplemented!(), + // TODO(jpg) intersect_with run, * + (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (&mut Run(ref mut _intervals), _store @ &Bitmap(..)) => unimplemented!(), } } @@ -296,6 +499,16 @@ impl Store { } } } + (&mut Array(ref mut vec), store @ &Bitmap(..)) => { + for i in (0..vec.len()).rev() { + if store.contains(vec[i]) { + vec.remove(i); + } + } + } + // TODO(jpg) difference_with array, run + (&mut Array(ref mut _vec), &Run(ref _intervals)) => unimplemented!(), + (ref mut this @ &mut Bitmap(..), &Array(ref vec2)) => { for index in vec2.iter() { this.remove(*index); @@ -306,13 +519,13 @@ impl Store { *index1 &= !*index2; } } - (&mut Array(ref mut vec), store @ &Bitmap(..)) => { - for i in (0..vec.len()).rev() { - if store.contains(vec[i]) { - vec.remove(i); - } - } - } + // TODO(jpg) difference_with bitmap, run + (ref mut _this @ &mut Bitmap(..), &Run(ref _intervals)) => unimplemented!(), + + // TODO(jpg) difference_with run, * + (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (&mut Run(ref mut _vec), _store @ &Bitmap(..)) => unimplemented!(), } } @@ -344,6 +557,18 @@ impl Store { vec1.extend(iter2.cloned()); } } + (this @ &mut Array(..), &Bitmap(..)) => { + let mut new = other.clone(); + new.symmetric_difference_with(this); + *this = new; + } + // TODO(jpg) symmetric_difference_with array, run + (&mut Array(ref mut _vec), &Run(ref _intervals)) => {} + (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { + for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { + *index1 ^= index2; + } + } (ref mut this @ &mut Bitmap(..), &Array(ref vec2)) => { for index in vec2.iter() { if this.contains(*index) { @@ -353,16 +578,12 @@ impl Store { } } } - (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { - for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { - *index1 ^= index2; - } - } - (this @ &mut Array(..), &Bitmap(..)) => { - let mut new = other.clone(); - new.symmetric_difference_with(this); - *this = new; - } + // TODO(jpg) symmetric_difference_with bitmap, run + (ref mut _this @ &mut Bitmap(..), &Run(ref _vec)) => unimplemented!(), + // TODO(jpg) symmetric_difference_with run, * + (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (_this @ &mut Run(..), &Bitmap(..)) => unimplemented!(), } } @@ -370,6 +591,7 @@ impl Store { match *self { Array(ref vec) => vec.len() as u64, Bitmap(ref bits) => bits.iter().map(|bit| u64::from(bit.count_ones())).sum(), + Run(ref intervals) => intervals.iter().map(|iv| iv.run_len() as u64).sum(), } } @@ -382,6 +604,7 @@ impl Store { .find(|&(_, &bit)| bit != 0) .map(|(index, bit)| index * 64 + (bit.trailing_zeros() as usize)) .unwrap() as u16, + Run(ref intervals) => intervals.first().unwrap().start, } } @@ -395,6 +618,39 @@ impl Store { .find(|&(_, &bit)| bit != 0) .map(|(index, bit)| index * 64 + (63 - bit.leading_zeros() as usize)) .unwrap() as u16, + Run(ref intervals) => intervals.last().unwrap().end, + } + } + + pub fn count_runs(&self) -> u64 { + match *self { + Array(ref vec) => { + vec.iter() + .fold((-2, 0u64), |(prev, runs), &v| { + let new = v as i32; + if prev + 1 != new { + (new, runs + 1) + } else { + (new, runs) + } + }) + .1 + } + Bitmap(ref bits) => { + let mut num_runs = 0u64; + + for i in 0..BITMAP_LENGTH - 1 { + let word = bits[i]; + let next_word = bits[i + 1]; + num_runs += + ((word << 1) & !word).count_ones() as u64 + ((word >> 63) & !next_word); + } + + let last = bits[BITMAP_LENGTH - 1]; + num_runs += ((last << 1) & !last).count_ones() as u64 + (last >> 63); + num_runs + } + Run(ref intervals) => intervals.len() as u64, } } } @@ -406,6 +662,7 @@ impl<'a> IntoIterator for &'a Store { match *self { Array(ref vec) => Iter::Array(vec.iter()), Bitmap(ref bits) => Iter::BitmapBorrowed(BitmapIter::new(&**bits)), + Run(ref intervals) => Iter::Run(RunIter::new(intervals.to_vec())), } } } @@ -417,6 +674,7 @@ impl IntoIterator for Store { match self { Array(vec) => Iter::Vec(vec.into_iter()), Bitmap(bits) => Iter::BitmapOwned(BitmapIter::new(bits)), + Run(intervals) => Iter::Run(RunIter::new(intervals)), } } } @@ -428,6 +686,7 @@ impl PartialEq for Store { (&Bitmap(ref bits1), &Bitmap(ref bits2)) => { bits1.iter().zip(bits2.iter()).all(|(i1, i2)| i1 == i2) } + (&Run(ref intervals1), &Run(ref intervals2)) => intervals1 == intervals2, _ => false, } } @@ -438,7 +697,43 @@ impl Clone for Store { match *self { Array(ref vec) => Array(vec.clone()), Bitmap(ref bits) => Bitmap(Box::new(**bits)), + Run(ref intervals) => Run(intervals.clone().to_vec()), + } + } +} + +impl RunIter { + fn new(intervals: Vec) -> RunIter { + RunIter { + run: 0, + offset: 0, + intervals, + } + } + + fn move_next(&mut self) { + self.offset += 1; + if self.offset == self.intervals[self.run].run_len() { + self.offset = 0; + self.run += 1; + } + } +} + +impl Iterator for RunIter { + type Item = u16; + + fn next(&mut self) -> Option { + if self.run == self.intervals.len() { + return None; } + let result = self.intervals[self.run].start + self.offset as u16; + self.move_next(); + Some(result) + } + + fn size_hint(&self) -> (usize, Option) { + panic!("Should never be called (roaring::Iter caches the size_hint itself)") } } @@ -493,6 +788,7 @@ impl<'a> Iterator for Iter<'a> { Iter::Vec(ref mut inner) => inner.next(), Iter::BitmapBorrowed(ref mut inner) => inner.next(), Iter::BitmapOwned(ref mut inner) => inner.next(), + Iter::Run(ref mut inner) => inner.next(), } } @@ -510,3 +806,32 @@ fn key(index: u16) -> usize { fn bit(index: u16) -> usize { index as usize % 64 } + +impl fmt::Debug for Store { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + match self { + Array(_) => format!( + "Array<{} values from {} to {}>", + self.len(), + self.min(), + self.max() + ) + .fmt(formatter), + Bitmap(_) => format!( + "Bitmap<{} bits set from {} to {}>", + self.len(), + self.min(), + self.max() + ) + .fmt(formatter), + Run(intervals) => format!( + "Run<{} runs totalling {} values from {} to {}>", + intervals.len(), + self.len(), + self.min(), + self.max() + ) + .fmt(formatter), + } + } +} diff --git a/tests/bitmapwithruns.bin b/tests/bitmapwithruns.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ed243753e169295a32d6251db66180f23ceac06 GIT binary patch literal 48056 zcmeIuQyb)3w5Z{vVpUYBB$bL&skUv~wr$(CZQHhO+qPG)cJFo0f7tsf^BLb5*YnMt zfdl{y015&iU;{8fdI(UZDhgDsiv4%-|000=3<=DyNCAKU(!k#=^#A~P^j|*sU-o|$ z{I7=pHSoU{0>HA}0I=r2HvJa>ynrrXJE#Th2(g1)Ln~mWaJl5SR1eaf$h0Bbj9dfq zwJ21fSdLOwIlR(`syAvLsJo)!gr+@OHfUR-V}`CuPoVFM0T+hs7_nf?hzT90)R<9V zPKpKGl5fi&tUj@R!{!;=2kdUKzrx`h$F0-&`45+FTsv@U#k~oSdOU0Js>HhtpOmk_ zj~Bl$fIE-{#0aJXUqdROrqDV2oA)5ai8LEB%*ZkzM~gfa3gjqamB7nU@R~Jm<&t1Go*`10DiTfaky~;4Sb0_zZjnz5~C2zd#5? zKpdpNFvx=uPzGb54qBiK`d~^h4VWIx1ZD+ufVshZU_r15SR5<`mIW(-mBDIYO|TAF zA8Z6R1zUiv!FFIrunX87>;?7(2Y`dYVch zsvW8qY8Yw~Y94A8Y8&bh>Ky78>KW=2>K_^u8X6iA8XX!Jni!fAnjV@Jnj2aWS{zyy zS{YgsS|8dJ+8Wvs+8x>#Iv6?H+nJ`auJsA<%GW6f_o^08NIbK{KH_(0phS zv=mwat%lY?8=)=Gc4!y07dikPhK@lep)=5V=n`}lx&hsW?m-WsC(v`~74#PR0DXqO zLf@fZ&|fG7BQOqAa2V#{2rR=fScfgxg?%_BoCZ!0XM(fBIpExIKDZ!U1TGGjg3H1c z;L30{xF%c&t`9eYo5C&N)^IzxBise<4)=ol!UN#J@Gy8JJO&;QPlBhyGvL|qJa{3z z1YQoWg4e(i!Q7^hEj~{gFY)P-Fx$8X1R7M5Z9qky*%GWC5}m zS%$1c)*$PVO~_Vc2eKR4ha5zXAjgqY$XVn9av8aX+(hmm_mM})Q{)Bm8hM9&M7|*Z zB0rGd2!KK;iV`S|vZ#PcsDf&!f!e5tCec)AS~LTi8O?^~MDw8e(L!iZv;;r(I93WPi&elX zW7V*lSRJfB)(C5gwZK|q?XZqm7pyzh3+sywzy@Q(u#wmpY&|73?~83%iRwz#e1Iu$R~y>^=4g`v?1m z{lxxYAP(agPT~yC;UXTzRXmQHxPvF~6nJVp9i9=-f@jBb;d$`_cwxL4UJ@^Zm&YsN zRq+~lZM+`d5O0Dv$6MiT@eX)ryc^yV?}PWp2jN5U5%_3)96k}Bf=|b1;dAi?_+oq+ zz7k)9ug5pxTk#$EZhRkp5I=$+$4}vB@eBB6{2G1}zk}b$AK_2&7x-)Z9sUvjg8z&E zz<=Wa0TC!c5H!IO0wEC!p%DgQ6CRNyQW0s13`Axk8vMJeuY)!T!JCa?FOpZt>*Ou+F8P3bOgr4~?&sb$nk zY7MoX+C*)oc2K*iebhnf2z8t~MV+NCP?xD|)J^IRb)R}fJ*8eyuc>#`N9qgpFZF}^ zO#w7SqclO&G)oJ#L@TsL8?;S(bdpX*r=>H{ndxkFPC5^rpDsifrAyGI>2h>Mx(Z#L zu0_|S8_2>r*dJDas-bL@F5739{WAsV-41J!yL|>(E(6{M(^h5dy{hWS9zokFWpXsmkclsCo zmku!qgEJHpW_TvT$V`mU8H;flpGnE2VbU|1n5;|=CO4ChDaaIIiZi8{vP=c0GECE6T?%&)5EjEbHfY5 zi^I#pE5mET>%*JETf;lTyTkj!2g66g$HS+>XTul5m&4b>H^X)aV-413JvPavV$-r2*vxD;HYb~h&CeEMi?Suy z(rh`lB3p&6&eme@oHvdxkyFUShAZH`v?kJ@z5{gniDwV&AeK z*w5@&_B;EF{mX_pgu^+C3v)ad;bbnx>72#6oX@4?(s1dyOk7qj2bY`6#}(v?aK*V& zTv@IHSDCBE)#U1M^|?k|Q?3Qqnrp{(~#&F}gN!(O!1~;3V z$1UWRaLc(>+*)n}x0&0FZ0*=W^Y8eN z{1^UT{s;e?2Lwn!1wx<&RuBY9Py|gd1Y7Wgq>xHTD`XHd3)zI6LLMQ%P)H~$ln_b_ z<%Eht6`{INOQD@+h33)6&|!W?0~ zut-=ctPoZU>x7NM7Gb-vOV}$M5Dp8+gphr$!#x$sJOD|`?> z3txrr!Y|>k5E2m)7b!6;@?u1k#h9pzmgtJUm{Lq5rWZ4bS;ZV;ZZV%&P%I)A7fXp{ z#R_6&v6@&@tRvPJ8;MQD7Gi6$o!C+AB6b&hiG9TZ;$U%@I8q!Vju$71Q^gtLY;m5r zP+TG|7gvdE#SP+SahteP+#~K64~a*`6XI#{oOn^ZB3>77iFd^Z;$!id_)>f$z861< z|A^njpW+`8jKC2rLPnSf7ZD@Th#HAU%!m_7L{dajN76+yMzTb*M{-5-MhZja*aMs`GYNA^VyMvg>|M@~h~MlM7yN3KO~M(#xJM;=9< zMqWf-N8Uv~M!rP;jr@rGjsOxQp%Nj{5-SOkBq@?68Imn|Qc_AKrIj*BnWbz}PAQL+ zUn(ROl}bpZrE*e5sftuxswLHx8c2<$W>QP3jnrQ1Bz2W~NWG?`qo1PxM88FUM*l=X8J01blo^?mML8;~a$GiLM^4Bo>Xd_+DjpOVkY7v#(G zHTkA|N4_sVlAp>ij{qEbbvuGCWMDh-szN;9RU(ne{obW*x1J(S)`KV_gY zL>aD(QpPG1l*!68Wu`JmnXfETmMSZh)yg_$qq0TWuIy6wDhHIq$}#1naz;6?TvDzo zH{YDM0u{fQr;>bl+Vgn<-77r`KyFfM8#D~4XeBwQDrrz>Z+x>s;{P0)2Qjy zOlnp&hnic>rxsL;sKwP%YFV{{T3M~8)>P}L_0>jdQ?-TKT5YFxRJ*9%)m~~}b$~ip z9j1;{$Ef4gN$ON}hB{lFr!G{NsLRz=>RNS!x>?<(?o{`v`_)70QT2p+T0N&;RIjMl z)m!Ra^?~|WeWt!t->C1^PwGGFH}$9bM+IYW42zL5CdS3YSTv@_;xRMk#1gR-vDC42 zv5c`SvFx#2vAnSYvBI%pv68VevGTD>v8u5evD&eEv4*iGvF5Q>v9_@evCgq>v7WI$ zvHr0^v7xaMvC*+{v5B!MvFWi{vAMAYvBj}vv6ZnkvGuV{v8}NkvE8wKv4gQAvE#8* zv9qxYvCFY*v750wvHP({v8S;YvDdM8v5&DYv43MfV!va625G29Xtc&^f+lH-rfG&| zYo3Ro7~1b+rasW38FiQfs5N*E(rk zwH{h;t)Dhf8=?)@MrmWU3EE_Bnl@9Lqs`YAX-l;g+G=f`wo%)nZP#{bd$j}FVeOc9 zQahua*Dh&SwHw-P?Vk2fd!jwpUTJT&587w#tM*;{rTx`HaU_n%sdzZf$0Kn$9*gU7 zEAGbqc*=O1c=~vzc-DB1cQ>zc++@`c*-`11Iw_}ch}_~!Vw_|EvA z`2P5z_|f=@`04n$_{I2@`1Sa$_}%z}_~ZDq_{;d4`1|;$_&@P)@t^TOaZrbKOeb|l z=X6nz>Z%^sP2JHGdI~+Yo=(rGXVJ6kx%9kx0llzZOfRXI(aYTrX zo9nIgwt5G>v))bbsrS+Q>x1;6`UrirK2D#gPtm9Av-G+80)4T*Okb(5(bwyn^sV|1 zeYd_(Kd2wkkL#!Ov-$=7vVKj!so&A>>yPxO`V0NF{!ag>f6@Qdf9St;z<>5V|#sp)sG0m81%rWL0i;Shl3S+gg&e&*dF}54K zjJ?JIdRd-Id|kNM5~Y5p-m3$`$ev>1!C zL@R2kR@^cz$4Xc!tkhOIE2EXg%5LSd@>&I~!d5Y>q*cZ$Z&k9YS~aZNRz0hs)x>IU zwX)h;9jwk)H>;=B$LenlvW8kCtkKpuYoay9nr_Xq=2{D^#nv)wrM1RdZ*8)+T05-W z);{Z?b;LSuowCka7p%+HHS4Bz$GUGlvYuKmtk>2%>!bC>`q%nl{k8xbvQe9`X`8hL zTe1~fvklv}Jv(WqveVib?96sHJExt;&Tkj8i`pgZ(snt!qFu$VZr8Hw+70Z+b~C%B z-NtTjce1-H|*Q?J^P{k#C~qSvftVt?9cXB`@8+i{%eOE#K9fP2|K(K zabzdv=#J&Mj_;&&(m3gzOiorOhm+gM=M;2`IK`b(PFbgdQ`xEJ)O6}N^_@meQ>TT~ z+G*!>bh~!`x z`<+A1QRjqn+BxT3bgnqpom;ihm?yXoAFZWcGYo6F7X7H|u@#oUr^8MnM!$*tV$JGW*+nyW`x6?i6>rJIkHxE^rsS%iNXj8h5?B$=&MiaCf`= z+=K2B_qcn?J?ma@FT2;=o9-R=zWd01>b`JayYJkO?icr8_lNu21w6<@J;I|s))PF* zQ#{QxJlpfUq?gJ|>t*mVd)d64ULG&MSI8^smGDY?<-Ces6|cHi%d6`(@EUu~yp~=Y zuf5mF>+1FJdVBr6f!+{rxHrlh>rL<`d(*s`-W+efx5!)St?*WR>%5KL7H_+^%iHT6 z@D6*&yp!G;@4R=(yXxKWZhQBwWM(dtbfp-Y@U37fK)rJV7PG2|f`? z$cb1&Pgn^z;U`ii(j?L+G9|JmawKvm@+Ar;iX@6BN+rrBDkLf=swHYB>LltX8YP-0 zS|nO0+9f(Bx+J|;LZGd|~we$-d}xNrK7pYT)osr__*Mn8+6-OuIc^$Ylg{bGJezl>krujE(tYxuSO zdVWK{iQn9B<+t@a_?`W3eow!T-`^kP5A{d*qy2IIM1P7u-Jj*p^%wYy{bl}2e~rK1 z-{f!gclf*gef~lJh=1Ha<)8I0_?P`_{!Ramf8T%PKlNYuul;xaNB@ieum8jU?E^_D zi6)68on(_jQc5aGEomg}q?b%4Qzg?TGbA%7vn6vT^Ca^p3nhytOC(Dt%Oxu&t0b!@ zYbEO@8zdVin69`4vF`0RH~{|2xb70P7F}00000 literal 0 HcmV?d00001 diff --git a/tests/clone.rs b/tests/clone.rs index fca6e199..b70307bc 100644 --- a/tests/clone.rs +++ b/tests/clone.rs @@ -42,3 +42,16 @@ fn bitmaps() { assert_eq!(clone, original); } + +#[test] +fn runs() { + let mut original = RoaringBitmap::from_iter( + (0..6000) + .chain(1_000_000..1_012_000) + .chain(2_000_000..2_010_000), + ); + original.optimize(); + let clone = original.clone(); + + assert_eq!(clone, original); +} diff --git a/tests/lib.rs b/tests/lib.rs index 88b6932c..c3610c80 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -139,3 +139,25 @@ fn to_array() { assert_eq!(bitmap.contains(i), false); } } + +#[test] +fn optimize_array() { + let mut bitmap = RoaringBitmap::from_iter(0..1000); + assert!(bitmap.optimize()); + let mut bitmap = RoaringBitmap::from_iter(0..5000); + assert!(bitmap.optimize()); +} + +#[test] +fn optimize_bitmap() { + let mut bitmap = RoaringBitmap::from_iter(0..5000); + assert!(bitmap.optimize()); +} + +#[test] +fn optimize_run() { + let mut bitmap = RoaringBitmap::from_iter(0..1000); + assert!(bitmap.optimize()); + // Calling optimize a second time should return false as no changes will be made + assert!(!bitmap.optimize()); +} diff --git a/tests/serialization.rs b/tests/serialization.rs index bb20b4f8..98779815 100644 --- a/tests/serialization.rs +++ b/tests/serialization.rs @@ -6,6 +6,7 @@ use roaring::RoaringBitmap; // Test data from https://github.com/RoaringBitmap/RoaringFormatSpec/tree/master/testdata static BITMAP_WITHOUT_RUNS: &[u8] = include_bytes!("bitmapwithoutruns.bin"); +static BITMAP_WITH_RUNS: &[u8] = include_bytes!("bitmapwithruns.bin"); fn test_data_bitmap() -> RoaringBitmap { RoaringBitmap::from_iter( @@ -24,13 +25,24 @@ fn serialize_and_deserialize(bitmap: &RoaringBitmap) -> RoaringBitmap { } #[test] -fn test_deserialize_from_provided_data() { +fn test_deserialize_without_runs_from_provided_data() { assert_eq!( RoaringBitmap::deserialize_from(&mut &BITMAP_WITHOUT_RUNS[..]).unwrap(), test_data_bitmap() ); } +#[test] +fn test_deserialize_with_runs_from_provided_data() { + let mut expected = test_data_bitmap(); + // Call optimize to create run containers + expected.optimize(); + assert_eq!( + RoaringBitmap::deserialize_from(&mut &BITMAP_WITH_RUNS[..]).unwrap(), + expected + ); +} + #[test] fn test_serialize_into_provided_data() { let bitmap = test_data_bitmap(); @@ -39,6 +51,16 @@ fn test_serialize_into_provided_data() { assert!(BITMAP_WITHOUT_RUNS == &buffer[..]); } +#[test] +fn test_serialize_with_runs_into_provided_data() { + let mut bitmap = test_data_bitmap(); + // Call optimize to create run containers + bitmap.optimize(); + let mut buffer = vec![]; + bitmap.serialize_into(&mut buffer).unwrap(); + assert!(BITMAP_WITH_RUNS == &buffer[..]); +} + #[test] fn test_empty() { let original = RoaringBitmap::new(); @@ -520,3 +542,13 @@ fn test_strange() { let new = serialize_and_deserialize(&original); assert_eq!(original, new); } + +#[test] +fn test_runs() { + let mut original = RoaringBitmap::from_iter((1000..3000).chain(70000..77000)); + original.optimize(); + let new = serialize_and_deserialize(&original); + assert_eq!(original.len(), new.len()); + assert_eq!(original.min(), new.min()); + assert_eq!(original.max(), new.max()); +} diff --git a/tests/size_hint.rs b/tests/size_hint.rs index 30a30822..061c1ea2 100644 --- a/tests/size_hint.rs +++ b/tests/size_hint.rs @@ -25,6 +25,18 @@ fn bitmap() { assert_eq!((0, Some(0)), iter.size_hint()); } +#[test] +fn run() { + let mut bitmap = RoaringBitmap::from_iter(0..6000); + bitmap.optimize(); + let mut iter = bitmap.iter(); + assert_eq!((6000, Some(6000)), iter.size_hint()); + iter.by_ref().take(3000).for_each(drop); + assert_eq!((3000, Some(3000)), iter.size_hint()); + iter.by_ref().for_each(drop); + assert_eq!((0, Some(0)), iter.size_hint()); +} + #[test] fn arrays() { let bitmap = RoaringBitmap::from_iter( @@ -58,3 +70,19 @@ fn bitmaps() { iter.by_ref().for_each(drop); assert_eq!((0, Some(0)), iter.size_hint()); } + +#[test] +fn runs() { + let mut bitmap = RoaringBitmap::from_iter( + (0..2000) + .chain(1_000_000..1_002_000) + .chain(2_000_000..2_001_000), + ); + bitmap.optimize(); + let mut iter = bitmap.iter(); + assert_eq!((5000, Some(5000)), iter.size_hint()); + iter.by_ref().take(3000).for_each(drop); + assert_eq!((2000, Some(2000)), iter.size_hint()); + iter.by_ref().for_each(drop); + assert_eq!((0, Some(0)), iter.size_hint()); +} From 3124aa4d5a259dd205b6c7c1423a8ff1192f2022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 11 Sep 2020 17:58:04 +0200 Subject: [PATCH 02/25] Fix some bugs in the run container implementation --- src/bitmap/store.rs | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 1e21a1d7..6d99eb9b 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -1,5 +1,5 @@ use std::borrow::Borrow; -use std::cmp::Ordering::{Equal, Greater, Less}; +use std::cmp::Ordering::{self, Equal, Greater, Less}; use std::vec; use std::{fmt, slice}; @@ -7,12 +7,22 @@ use self::Store::{Array, Bitmap, Run}; pub const BITMAP_LENGTH: usize = 1024; -#[derive(PartialEq, Clone, Debug)] +#[derive(PartialEq, Copy, Clone, Debug)] pub struct Interval { pub start: u16, pub end: u16, } +fn cmp_index_interval(index: u16, iv: Interval) -> Ordering { + if index < iv.start { + Less + } else if index > iv.end { + Greater + } else { + Less + } +} + impl Interval { pub fn new(start: u16, end: u16) -> Interval { Interval { start, end } @@ -66,7 +76,7 @@ impl Store { } } Run(ref mut vec) => { - vec.binary_search_by_key(&index, |iv| iv.start) + vec.binary_search_by(|iv| cmp_index_interval(index, *iv)) .map_err(|loc| { // Value is beyond end of interval if vec[loc].end < index { @@ -118,23 +128,25 @@ impl Store { } } Run(ref mut vec) => vec - .binary_search_by_key(&index, |iv| iv.start) + .binary_search_by(|iv| cmp_index_interval(index, *iv)) .map(|loc| { if index == vec[loc].start && index == vec[loc].end { // Remove entire run if it only contains this value vec.remove(loc); } else if index == vec[loc].end { // Value is last in this interval - vec[loc].end -= 1; + vec[loc].end = index - 1; } else if index == vec[loc].start { // Value is first in this interval - vec[loc].start += 1; + vec[loc].start = index + 1; } else { // Value lies inside the interval, we need to split it - // First shrink the current interval + // First construct a new interval with the right part + let new_interval = Interval::new(index + 1, vec[loc].end); + // Then shrink the current interval vec[loc].end = index - 1; - // Then insert a new index leaving gap where value was removed - vec.insert(loc + 1, Interval::new(index + 1, vec[loc].end)); + // Then insert the new interval leaving gap where value was removed + vec.insert(loc + 1, new_interval); } }) .is_ok(), @@ -198,7 +210,7 @@ impl Store { Array(ref vec) => vec.binary_search(&index).is_ok(), Bitmap(ref bits) => bits[key(index)] & (1 << bit(index)) != 0, Run(ref intervals) => intervals - .binary_search_by_key(&index, |iv| iv.start) + .binary_search_by(|iv| cmp_index_interval(index, *iv)) .is_ok(), } } From 2068bb6809c17c47052d26d1e1e29f26695b8906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 11 Sep 2020 18:01:14 +0200 Subject: [PATCH 03/25] Fix the to_array/bitmap impl for runs, the end bound is inclusive --- src/bitmap/store.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 6d99eb9b..31fcc90c 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -300,11 +300,7 @@ impl Store { Array(vec) } Run(ref intervals) => Array( - intervals - .iter() - .map(|iv| iv.start..iv.end) - .flatten() - .collect(), + intervals.iter().flat_map(|iv| iv.start..=iv.end).collect() ), } } @@ -322,7 +318,7 @@ impl Store { Run(ref intervals) => { let mut bits = Box::new([0; BITMAP_LENGTH]); for iv in intervals { - for index in iv.start..iv.end { + for index in iv.start..=iv.end { bits[key(index)] |= 1 << bit(index); } } From e605f640b18eded117810ca9cd950778a6e491e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 11 Sep 2020 18:18:12 +0200 Subject: [PATCH 04/25] Rework the array bitmap intersect_with using Vec::retain --- src/bitmap/store.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 31fcc90c..ee65b61b 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -458,11 +458,7 @@ impl Store { } } (&mut Array(ref mut vec), store @ &Bitmap(..)) => { - for i in (0..(vec.len())).rev() { - if !store.contains(vec[i]) { - vec.remove(i); - } - } + vec.retain(|i| store.contains(*i)); } // TODO(jpg) intersect_with array, run (&mut Array(ref mut _intervals1), &Run(ref _intervals2)) => {} From 9321618e732ff287a7dc3e10b2bfbe22f25c9efd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 11 Sep 2020 18:19:52 +0200 Subject: [PATCH 05/25] Implement the array run intersect_with operation --- src/bitmap/store.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index ee65b61b..38c974d3 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -460,8 +460,9 @@ impl Store { (&mut Array(ref mut vec), store @ &Bitmap(..)) => { vec.retain(|i| store.contains(*i)); } - // TODO(jpg) intersect_with array, run - (&mut Array(ref mut _intervals1), &Run(ref _intervals2)) => {} + (&mut Array(ref mut vec), run @ &Run(..)) => { + vec.retain(|i| run.contains(*i)); + } (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { *index1 &= index2; From a62fc7d059ed0d9cfea273f9f4b3c26a3e4e818a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 11 Sep 2020 18:57:04 +0200 Subject: [PATCH 06/25] Implement the run array intersect_with operation --- src/bitmap/store.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 38c974d3..4c125105 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -477,7 +477,11 @@ impl Store { (_this @ &mut Bitmap(..), &Run(..)) => unimplemented!(), // TODO(jpg) intersect_with run, * (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), - (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (this @ &mut Run(..), &Array(..)) => { + let mut new = other.clone(); + new.intersect_with(this); + *this = new; + }, (&mut Run(ref mut _intervals), _store @ &Bitmap(..)) => unimplemented!(), } } From d658f2851f5c05cce422268968637ea2ad9bffd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 11 Sep 2020 20:38:58 +0200 Subject: [PATCH 07/25] Implement the run run union_with operation --- src/bitmap/store.rs | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 4c125105..d6ed9327 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -1,7 +1,6 @@ use std::borrow::Borrow; use std::cmp::Ordering::{self, Equal, Greater, Less}; -use std::vec; -use std::{fmt, slice}; +use std::{cmp, fmt, vec, slice}; use self::Store::{Array, Bitmap, Run}; @@ -426,8 +425,39 @@ impl Store { } // TODO(jpg) union_with bitmap, run (ref mut _this @ &mut Bitmap(..), &Run(ref _intervals)) => unimplemented!(), - // TODO(jpg) union_with run, * - (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&mut Run(ref mut intervals1), &Run(ref intervals2)) => { + let mut merged = Vec::new(); + + let (mut i1, mut i2) = (intervals1.iter(), intervals2.iter()); + let (mut iv1, mut iv2) = (i1.next(), i2.next()); + loop { + // Iterate over two iterators and return the lowest value at each step. + let iv = match (iv1, iv2) { + (None, None) => break, + (Some(v1), None) => { iv1 = i1.next(); v1 }, + (None, Some(v2)) => { iv2 = i2.next(); v2 }, + (Some(v1), Some(v2)) => match v1.start.cmp(&v2.start) { + Equal => { iv1 = i1.next(); iv2 = i2.next(); v1 }, + Less => { iv1 = i1.next(); v1 }, + Greater => { iv2 = i2.next(); v2 }, + }, + }; + + match merged.last_mut() { + // If the list of merged intervals is empty, append the interval. + None => merged.push(*iv), + Some(last) => if last.end < iv.start { + // If the interval does not overlap with the previous, append it. + merged.push(*iv); + } else { + // If there is overlap, so we merge the current and previous intervals. + last.end = cmp::max(last.end, iv.end); + }, + } + } + + *intervals1 = merged; + }, (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), (this @ &mut Run(..), &Bitmap(..)) => { *this = this.to_bitmap(); From 0ded0285a08e8aa433e8d83e8e6a1274957274dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 10:57:08 +0200 Subject: [PATCH 08/25] Implement the run array union_with operation --- src/bitmap/store.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index d6ed9327..5f94cb19 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -458,7 +458,11 @@ impl Store { *intervals1 = merged; }, - (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (ref mut this @ &mut Run(..), &Array(ref vec)) => { + for i in vec { + this.insert(*i); + } + }, (this @ &mut Run(..), &Bitmap(..)) => { *this = this.to_bitmap(); this.union_with(other); From fe8a4ab7137f2102e822279c03bb65166c85d278 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 11:01:50 +0200 Subject: [PATCH 09/25] Implement the array run union_with operation --- src/bitmap/store.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 5f94cb19..618eccc0 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -411,8 +411,11 @@ impl Store { *this = this.to_bitmap(); this.union_with(other); } - // TODO(jpg) union_with array, run - (&mut Array(ref mut _vec), &Run(ref _intervals)) => {} + (this @ &mut Array(..), run @ &Run(..)) => { + let mut new = run.clone(); + new.union_with(this); + *this = new; + } (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { *index1 |= index2; @@ -457,12 +460,12 @@ impl Store { } *intervals1 = merged; - }, + } (ref mut this @ &mut Run(..), &Array(ref vec)) => { for i in vec { this.insert(*i); } - }, + } (this @ &mut Run(..), &Bitmap(..)) => { *this = this.to_bitmap(); this.union_with(other); From 613163fc0c0547231b8e9cacf0f90d944c17dfb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 11:08:23 +0200 Subject: [PATCH 10/25] Implement the bitmap run union_with operation --- src/bitmap/store.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 618eccc0..fd94b1e1 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -411,8 +411,8 @@ impl Store { *this = this.to_bitmap(); this.union_with(other); } - (this @ &mut Array(..), run @ &Run(..)) => { - let mut new = run.clone(); + (this @ &mut Array(..), &Run(..)) => { + let mut new = other.clone(); new.union_with(this); *this = new; } @@ -426,8 +426,10 @@ impl Store { this.insert(index); } } - // TODO(jpg) union_with bitmap, run - (ref mut _this @ &mut Bitmap(..), &Run(ref _intervals)) => unimplemented!(), + (this @ &mut Bitmap(..), &Run(..)) => { + let other = other.to_bitmap(); + this.union_with(&other); + } (&mut Run(ref mut intervals1), &Run(ref intervals2)) => { let mut merged = Vec::new(); From 0a664836772ba30a58ae1224c6169c560f524367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 11:26:44 +0200 Subject: [PATCH 11/25] Implement the run run intersect_with operation --- src/bitmap/store.rs | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index fd94b1e1..6ad93793 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -514,8 +514,36 @@ impl Store { } // TODO(jpg) intersect_with bitmap, run (_this @ &mut Bitmap(..), &Run(..)) => unimplemented!(), - // TODO(jpg) intersect_with run, * - (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&mut Run(ref mut intervals1), &Run(ref intervals2)) => { + let mut merged = Vec::new(); + + let (mut i1, mut i2) = (intervals1.iter(), intervals2.iter()); + let (mut iv1, mut iv2) = (i1.next(), i2.next()); + loop { + if let (Some(v1), Some(v2)) = (iv1, iv2) { + let start = cmp::max(v1.start, v2.start); + let end = cmp::min(v1.end, v2.end); + let iv = Interval::new(start, end); + if iv.run_len() > 0 { + merged.push(iv); + } + } + + // Iterate over two iterators, consuming the lowest first, like merge join. + match (iv1, iv2) { + (None, None) => break, + (Some(v1), None) => iv1 = i1.next(), + (None, Some(v2)) => iv2 = i2.next(), + (Some(v1), Some(v2)) => match v1.start.cmp(&v2.start) { + Equal => { iv1 = i1.next(); iv2 = i2.next(); }, + Less => iv1 = i1.next(), + Greater => iv2 = i2.next(), + }, + } + } + + *intervals1 = merged; + }, (this @ &mut Run(..), &Array(..)) => { let mut new = other.clone(); new.intersect_with(this); From 9af436678706efc495d543a71a5ccd9fd70f11a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 11:33:12 +0200 Subject: [PATCH 12/25] Implement the bitmap run intersect_with operation --- src/bitmap/store.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 6ad93793..515b7889 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -512,8 +512,10 @@ impl Store { new.intersect_with(this); *this = new; } - // TODO(jpg) intersect_with bitmap, run - (_this @ &mut Bitmap(..), &Run(..)) => unimplemented!(), + (this @ &mut Bitmap(..), &Run(..)) => { + let other = other.to_bitmap(); + this.intersect_with(&other); + } (&mut Run(ref mut intervals1), &Run(ref intervals2)) => { let mut merged = Vec::new(); From 9612ae98ffe8b0e193d533a71548e7210aafdd9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 11:33:27 +0200 Subject: [PATCH 13/25] Implement the run bitmap intersect_with operation --- src/bitmap/store.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 515b7889..94e1559c 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -545,13 +545,17 @@ impl Store { } *intervals1 = merged; - }, + } (this @ &mut Run(..), &Array(..)) => { let mut new = other.clone(); new.intersect_with(this); *this = new; - }, - (&mut Run(ref mut _intervals), _store @ &Bitmap(..)) => unimplemented!(), + } + (this @ &mut Run(..), &Bitmap(..)) => { + let mut new = other.clone(); + new.intersect_with(this); + *this = new; + } } } From 4ae8986a9cec36239c99dea5e29ff57b03b32af1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 18:33:19 +0200 Subject: [PATCH 14/25] Simplify the run run intersect_with operation --- src/bitmap/store.rs | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 94e1559c..9566d8db 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -521,26 +521,23 @@ impl Store { let (mut i1, mut i2) = (intervals1.iter(), intervals2.iter()); let (mut iv1, mut iv2) = (i1.next(), i2.next()); - loop { - if let (Some(v1), Some(v2)) = (iv1, iv2) { + + // Iterate over both iterators. + while let (Some(v1), Some(v2)) = (iv1, iv2) { + if v2.start <= v1.end && v1.start <= v2.end { let start = cmp::max(v1.start, v2.start); let end = cmp::min(v1.end, v2.end); let iv = Interval::new(start, end); - if iv.run_len() > 0 { - merged.push(iv); - } + merged.push(iv); } - // Iterate over two iterators, consuming the lowest first, like merge join. - match (iv1, iv2) { - (None, None) => break, - (Some(v1), None) => iv1 = i1.next(), - (None, Some(v2)) => iv2 = i2.next(), - (Some(v1), Some(v2)) => match v1.start.cmp(&v2.start) { - Equal => { iv1 = i1.next(); iv2 = i2.next(); }, - Less => iv1 = i1.next(), - Greater => iv2 = i2.next(), - }, + if v1.end < v2.end { + iv1 = i1.next(); + } else if v1.end > v2.end { + iv2 = i2.next(); + } else { + iv1 = i1.next(); + iv2 = i2.next(); } } From 924d4dbeea361174ffc20705b78c710c74919850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 19:47:58 +0200 Subject: [PATCH 15/25] Implement the remove_range operation for the run store type --- src/bitmap/store.rs | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 9566d8db..d0677162 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -199,8 +199,44 @@ impl Store { bits[end_key] &= !(!0u64).wrapping_shr(64 - end_bit); u64::from(removed) } - // TODO(jpg): Remove range - Run(ref mut _intervals) => unimplemented!(), + // TODO we must test that algorithm + Run(ref mut intervals) => { + let mut count = 0; + let mut search_end = false; + + for iv in intervals.iter_mut() { + if !search_end && cmp_index_interval(start as u16, *iv) == Equal { + count += Interval::new(iv.end, start as u16).run_len(); + iv.end = start as u16; + search_end = true; + } + + if search_end { + // The end bound is non-inclusive therefore we must search for end - 1. + match cmp_index_interval(end as u16 - 1, *iv) { + Less => { + // We invalidate the intervals that are contained in + // the start and end but doesn't touch the bounds. + count += iv.run_len(); + *iv = Interval::new(u16::max_value(), 0); + }, + Equal => { + // We shrink this interval by moving the start of it to be + // the end bound which is non-inclusive. + count += Interval::new(end as u16, iv.start).run_len(); + iv.start = end as u16; + }, + Greater => break, + } + } + } + + // We invalidated the intervals to remove, + // the start is greater than the end. + intervals.retain(|iv| iv.start <= iv.end); + + count + }, } } From d7bcad321b41c392acc12d29b20de2954c6777bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 12 Sep 2020 19:50:34 +0200 Subject: [PATCH 16/25] Implement the run array and array run is_disjoint operation --- src/bitmap/store.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index d0677162..36bcede0 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -273,8 +273,8 @@ impl Store { } // TODO(jpg) is_disjoint (&Run(ref _intervals1), &Run(ref _intervals2)) => unimplemented!(), - (&Run(ref _intervals), &Array(ref _vec)) | (&Array(ref _vec), &Run(ref _intervals)) => { - unimplemented!() + (run @ &Run(..), &Array(ref vec)) | (&Array(ref vec), run @ &Run(..)) => { + vec.iter().all(|&i| !run.contains(i)) } (&Run(ref _intervals), _store @ &Bitmap(..)) | (_store @ &Bitmap(..), &Run(ref _intervals)) => unimplemented!(), From cb69d80b5073729066a6d8e586c02a496e7d00d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 13:20:55 +0200 Subject: [PATCH 17/25] Implement the run run is_disjoint operation --- src/bitmap/store.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 36bcede0..2ebf6106 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -272,7 +272,29 @@ impl Store { vec.iter().all(|&i| !store.contains(i)) } // TODO(jpg) is_disjoint - (&Run(ref _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (&Run(ref intervals1), &Run(ref intervals2)) => { + let (mut i1, mut i2) = (intervals1.iter(), intervals2.iter()); + let (mut iv1, mut iv2) = (i1.next(), i2.next()); + loop { + match (iv1, iv2) { + (Some(v1), Some(v2)) => { + if v2.start <= v1.end && v1.start <= v2.end { + return false; + } + + if v1.end < v2.end { + iv1 = i1.next(); + } else if v1.end > v2.end { + iv2 = i2.next(); + } else { + iv1 = i1.next(); + iv2 = i2.next(); + } + }, + (_, _) => return true, + } + } + }, (run @ &Run(..), &Array(ref vec)) | (&Array(ref vec), run @ &Run(..)) => { vec.iter().all(|&i| !run.contains(i)) } From c77c0f867469838288f78acf3a8f91542494725b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 15:30:24 +0200 Subject: [PATCH 18/25] Simplify the array bitmap difference_with operation --- src/bitmap/store.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 2ebf6106..8981c1e7 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -637,11 +637,7 @@ impl Store { } } (&mut Array(ref mut vec), store @ &Bitmap(..)) => { - for i in (0..vec.len()).rev() { - if store.contains(vec[i]) { - vec.remove(i); - } - } + vec.retain(|i| !store.contains(*i)); } // TODO(jpg) difference_with array, run (&mut Array(ref mut _vec), &Run(ref _intervals)) => unimplemented!(), From 3a9eefdecb5503dd1b96ceb2fd226517b010423b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 15:30:47 +0200 Subject: [PATCH 19/25] Implement the array run difference_with operation --- src/bitmap/store.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 8981c1e7..f80d22ab 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -639,9 +639,9 @@ impl Store { (&mut Array(ref mut vec), store @ &Bitmap(..)) => { vec.retain(|i| !store.contains(*i)); } - // TODO(jpg) difference_with array, run - (&mut Array(ref mut _vec), &Run(ref _intervals)) => unimplemented!(), - + (&mut Array(ref mut vec), run @ &Run(..)) => { + vec.retain(|i| !run.contains(*i)); + } (ref mut this @ &mut Bitmap(..), &Array(ref vec2)) => { for index in vec2.iter() { this.remove(*index); From 183c1bbb76ad8351f1089abfd219bcb55d906baa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 14:58:45 +0200 Subject: [PATCH 20/25] Implement the bitmap run difference_with operation --- src/bitmap/store.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index f80d22ab..02e8b142 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -652,9 +652,11 @@ impl Store { *index1 &= !*index2; } } - // TODO(jpg) difference_with bitmap, run - (ref mut _this @ &mut Bitmap(..), &Run(ref _intervals)) => unimplemented!(), - + (ref mut this @ &mut Bitmap(..), &Run(ref intervals)) => { + for iv in intervals { + this.remove_range(iv.start as u32, iv.end as u32 + 1); + } + } // TODO(jpg) difference_with run, * (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), From 07d0fcc36ae31659740678b273bae3afa12a3eda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 15:35:40 +0200 Subject: [PATCH 21/25] Clippy and fmt pass --- src/bitmap/store.rs | 88 +++++++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 35 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 02e8b142..97121c0b 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -1,6 +1,6 @@ use std::borrow::Borrow; use std::cmp::Ordering::{self, Equal, Greater, Less}; -use std::{cmp, fmt, vec, slice}; +use std::{cmp, fmt, slice, vec}; use self::Store::{Array, Bitmap, Run}; @@ -219,13 +219,13 @@ impl Store { // the start and end but doesn't touch the bounds. count += iv.run_len(); *iv = Interval::new(u16::max_value(), 0); - }, + } Equal => { // We shrink this interval by moving the start of it to be // the end bound which is non-inclusive. count += Interval::new(end as u16, iv.start).run_len(); iv.start = end as u16; - }, + } Greater => break, } } @@ -236,7 +236,7 @@ impl Store { intervals.retain(|iv| iv.start <= iv.end); count - }, + } } } @@ -282,19 +282,19 @@ impl Store { return false; } - if v1.end < v2.end { - iv1 = i1.next(); - } else if v1.end > v2.end { - iv2 = i2.next(); - } else { - iv1 = i1.next(); - iv2 = i2.next(); + match v1.end.cmp(&v2.end) { + Less => iv1 = i1.next(), + Greater => iv2 = i2.next(), + Equal => { + iv1 = i1.next(); + iv2 = i2.next(); + } } - }, + } (_, _) => return true, } } - }, + } (run @ &Run(..), &Array(ref vec)) | (&Array(ref vec), run @ &Run(..)) => { vec.iter().all(|&i| !run.contains(i)) } @@ -356,9 +356,9 @@ impl Store { } Array(vec) } - Run(ref intervals) => Array( - intervals.iter().flat_map(|iv| iv.start..=iv.end).collect() - ), + Run(ref intervals) => { + Array(intervals.iter().flat_map(|iv| iv.start..=iv.end).collect()) + } } } @@ -497,25 +497,43 @@ impl Store { // Iterate over two iterators and return the lowest value at each step. let iv = match (iv1, iv2) { (None, None) => break, - (Some(v1), None) => { iv1 = i1.next(); v1 }, - (None, Some(v2)) => { iv2 = i2.next(); v2 }, + (Some(v1), None) => { + iv1 = i1.next(); + v1 + } + (None, Some(v2)) => { + iv2 = i2.next(); + v2 + } (Some(v1), Some(v2)) => match v1.start.cmp(&v2.start) { - Equal => { iv1 = i1.next(); iv2 = i2.next(); v1 }, - Less => { iv1 = i1.next(); v1 }, - Greater => { iv2 = i2.next(); v2 }, + Equal => { + iv1 = i1.next(); + iv2 = i2.next(); + v1 + } + Less => { + iv1 = i1.next(); + v1 + } + Greater => { + iv2 = i2.next(); + v2 + } }, }; match merged.last_mut() { // If the list of merged intervals is empty, append the interval. None => merged.push(*iv), - Some(last) => if last.end < iv.start { - // If the interval does not overlap with the previous, append it. - merged.push(*iv); - } else { - // If there is overlap, so we merge the current and previous intervals. - last.end = cmp::max(last.end, iv.end); - }, + Some(last) => { + if last.end < iv.start { + // If the interval does not overlap with the previous, append it. + merged.push(*iv); + } else { + // If there is overlap, so we merge the current and previous intervals. + last.end = cmp::max(last.end, iv.end); + } + } } } @@ -589,13 +607,13 @@ impl Store { merged.push(iv); } - if v1.end < v2.end { - iv1 = i1.next(); - } else if v1.end > v2.end { - iv2 = i2.next(); - } else { - iv1 = i1.next(); - iv2 = i2.next(); + match v1.end.cmp(&v2.end) { + Less => iv1 = i1.next(), + Greater => iv2 = i2.next(), + Equal => { + iv1 = i1.next(); + iv2 = i2.next(); + } } } From 3c99804e0758798e84a3cf2f2fdc1306b1fca542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 18:07:05 +0200 Subject: [PATCH 22/25] Implement the run array difference_with operation --- src/bitmap/store.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 97121c0b..18d18ff8 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -661,7 +661,7 @@ impl Store { vec.retain(|i| !run.contains(*i)); } (ref mut this @ &mut Bitmap(..), &Array(ref vec2)) => { - for index in vec2.iter() { + for index in vec2 { this.remove(*index); } } @@ -677,7 +677,11 @@ impl Store { } // TODO(jpg) difference_with run, * (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), - (&mut Run(ref mut _intervals), &Array(ref _vec)) => unimplemented!(), + (ref mut this @ &mut Run(..), &Array(ref vec)) => { + for i in vec { + this.remove(*i); + } + } (&mut Run(ref mut _vec), _store @ &Bitmap(..)) => unimplemented!(), } } From c762f938a6bbec52f6d38f42f504b913c95315db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 16:36:47 +0200 Subject: [PATCH 23/25] Mark array run symmetric_difference_with operation as unimplemented --- src/bitmap/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 18d18ff8..ad628596 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -720,7 +720,7 @@ impl Store { *this = new; } // TODO(jpg) symmetric_difference_with array, run - (&mut Array(ref mut _vec), &Run(ref _intervals)) => {} + (&mut Array(ref mut _vec), &Run(ref _intervals)) => unimplemented!(), (&mut Bitmap(ref mut bits1), &Bitmap(ref bits2)) => { for (index1, &index2) in bits1.iter_mut().zip(bits2.iter()) { *index1 ^= index2; From 9744f12a9f0a829b57aa75fef11ba45a4db3c8df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 16:57:54 +0200 Subject: [PATCH 24/25] Implement the array run is_subset operation --- src/bitmap/store.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index ad628596..949aff54 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -324,8 +324,7 @@ impl Store { } } (&Array(ref vec), store @ &Bitmap(..)) => vec.iter().all(|&i| store.contains(i)), - // TODO(jpg) is_subset array, run - (&Array(ref _vec), &Run(ref _intervals)) => unimplemented!(), + (&Array(ref vec), run @ &Run(..)) => vec.iter().all(|&i| run.contains(i)), (&Bitmap(ref bits1), &Bitmap(ref bits2)) => bits1 .iter() From 67784ad109cafbbb5104d3b76a0511875ab1c997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 13 Sep 2020 18:32:16 +0200 Subject: [PATCH 25/25] Implement the run run difference_with operation --- src/bitmap/store.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/bitmap/store.rs b/src/bitmap/store.rs index 949aff54..930ba83a 100644 --- a/src/bitmap/store.rs +++ b/src/bitmap/store.rs @@ -674,13 +674,17 @@ impl Store { this.remove_range(iv.start as u32, iv.end as u32 + 1); } } - // TODO(jpg) difference_with run, * - (&mut Run(ref mut _intervals1), &Run(ref _intervals2)) => unimplemented!(), + (ref mut this @ &mut Run(..), &Run(ref intervals2)) => { + for iv in intervals2 { + this.remove_range(iv.start as u32, iv.end as u32 + 1); + } + } (ref mut this @ &mut Run(..), &Array(ref vec)) => { for i in vec { this.remove(*i); } } + // TODO(jpg) difference_with run bitmap (&mut Run(ref mut _vec), _store @ &Bitmap(..)) => unimplemented!(), } }