From ea3ae5dffead0d936e1fb8fc7739c8ea397353f9 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Fri, 19 Aug 2022 14:09:44 +0200 Subject: [PATCH] move the multiop to a different module --- src/bitmap/mod.rs | 1 + src/bitmap/multiops.rs | 278 +++++++++++++++++++++++++++++ src/bitmap/ops.rs | 276 +---------------------------- src/treemap/mod.rs | 1 + src/treemap/multiops.rs | 376 ++++++++++++++++++++++++++++++++++++++++ src/treemap/ops.rs | 374 +-------------------------------------- 6 files changed, 658 insertions(+), 648 deletions(-) create mode 100644 src/bitmap/multiops.rs create mode 100644 src/treemap/multiops.rs diff --git a/src/bitmap/mod.rs b/src/bitmap/mod.rs index f99f73eb5..8eac5b8cf 100644 --- a/src/bitmap/mod.rs +++ b/src/bitmap/mod.rs @@ -1,6 +1,7 @@ mod arbitrary; mod container; mod fmt; +mod multiops; mod proptests; mod store; mod util; diff --git a/src/bitmap/multiops.rs b/src/bitmap/multiops.rs new file mode 100644 index 000000000..01065b9d5 --- /dev/null +++ b/src/bitmap/multiops.rs @@ -0,0 +1,278 @@ +use std::{ + borrow::Cow, + convert::Infallible, + mem, + ops::{BitAndAssign, BitOrAssign, BitXorAssign, SubAssign}, +}; + +use retain_mut::RetainMut; + +use crate::{IterExt, RoaringBitmap}; + +use super::{container::Container, store::Store}; + +impl IterExt for I +where + I: IntoIterator, +{ + type Output = RoaringBitmap; + + fn or(self) -> Self::Output { + try_naive_lazy_multi_op_owned(self.into_iter().map(Ok::<_, Infallible>), |a, b| { + BitOrAssign::bitor_assign(a, b) + }) + .unwrap() + } + + fn and(self) -> Self::Output { + try_simple_multi_op_owned(self.into_iter().map(Ok::<_, Infallible>), |a, b| { + BitAndAssign::bitand_assign(a, b) + }) + .unwrap() + } + + fn sub(self) -> Self::Output { + try_simple_multi_op_owned(self.into_iter().map(Ok::<_, Infallible>), |a, b| { + SubAssign::sub_assign(a, b) + }) + .unwrap() + } + + fn xor(self) -> Self::Output { + try_naive_lazy_multi_op_owned(self.into_iter().map(Ok::<_, Infallible>), |a, b| { + BitXorAssign::bitxor_assign(a, b) + }) + .unwrap() + } +} + +impl IterExt> for I +where + I: IntoIterator>, +{ + type Output = Result; + + fn or(self) -> Self::Output { + try_naive_lazy_multi_op_owned(self, |a, b| BitOrAssign::bitor_assign(a, b)) + } + + fn and(self) -> Self::Output { + try_simple_multi_op_owned(self, |a, b| BitAndAssign::bitand_assign(a, b)) + } + + fn sub(self) -> Self::Output { + try_simple_multi_op_owned(self, |a, b| SubAssign::sub_assign(a, b)) + } + + fn xor(self) -> Self::Output { + try_naive_lazy_multi_op_owned(self, |a, b| BitXorAssign::bitxor_assign(a, b)) + } +} + +impl<'a, I> IterExt<&'a RoaringBitmap> for I +where + I: IntoIterator, +{ + type Output = RoaringBitmap; + + fn or(self) -> Self::Output { + try_naive_lazy_multi_op_ref(self.into_iter().map(Ok::<_, Infallible>), |a, b| { + BitOrAssign::bitor_assign(a, b) + }) + .unwrap() + } + + fn and(self) -> Self::Output { + try_simple_multi_op_ref(self.into_iter().map(Ok::<_, Infallible>), |a, b| { + BitAndAssign::bitand_assign(a, b) + }) + .unwrap() + } + + fn sub(self) -> Self::Output { + try_simple_multi_op_ref(self.into_iter().map(Ok::<_, Infallible>), |a, b| { + SubAssign::sub_assign(a, b) + }) + .unwrap() + } + + fn xor(self) -> Self::Output { + try_naive_lazy_multi_op_ref(self.into_iter().map(Ok::<_, Infallible>), |a, b| { + BitXorAssign::bitxor_assign(a, b) + }) + .unwrap() + } +} + +impl<'a, I, E: 'a> IterExt> for I +where + I: IntoIterator>, +{ + type Output = Result; + + fn or(self) -> Self::Output { + try_naive_lazy_multi_op_ref(self, |a, b| BitOrAssign::bitor_assign(a, b)) + } + + fn and(self) -> Self::Output { + try_simple_multi_op_ref(self, |a, b| BitAndAssign::bitand_assign(a, b)) + } + + fn sub(self) -> Self::Output { + try_simple_multi_op_ref(self, |a, b| SubAssign::sub_assign(a, b)) + } + + fn xor(self) -> Self::Output { + try_naive_lazy_multi_op_ref(self, |a, b| BitXorAssign::bitxor_assign(a, b)) + } +} + +#[inline] +fn try_simple_multi_op_owned( + bitmaps: impl IntoIterator>, + op: impl Fn(&mut RoaringBitmap, RoaringBitmap), +) -> Result { + let mut iter = bitmaps.into_iter(); + match iter.next().transpose()? { + Some(mut lhs) => { + for rhs in iter { + if lhs.is_empty() { + return Ok(lhs); + } + op(&mut lhs, rhs?); + } + Ok(lhs) + } + None => Ok(RoaringBitmap::default()), + } +} + +#[inline] +fn try_simple_multi_op_ref<'a, E>( + bitmaps: impl IntoIterator>, + op: impl Fn(&mut RoaringBitmap, &RoaringBitmap), +) -> Result { + let mut iter = bitmaps.into_iter(); + match iter.next().transpose()?.cloned() { + Some(mut lhs) => { + for rhs in iter { + if lhs.is_empty() { + return Ok(lhs); + } + op(&mut lhs, rhs?); + } + + Ok(lhs) + } + None => Ok(RoaringBitmap::default()), + } +} + +#[inline] +fn try_naive_lazy_multi_op_owned( + bitmaps: impl IntoIterator>, + op: impl Fn(&mut Store, &Store), +) -> Result { + let mut iter = bitmaps.into_iter(); + let mut containers = match iter.next().transpose()? { + None => Vec::new(), + Some(v) => v.containers, + }; + + for bitmap in iter { + for mut rhs in bitmap?.containers { + match containers.binary_search_by_key(&rhs.key, |c| c.key) { + Err(loc) => containers.insert(loc, rhs), + Ok(loc) => { + let lhs = &mut containers[loc]; + match (&lhs.store, &rhs.store) { + (Store::Array(..), Store::Array(..)) => lhs.store = lhs.store.to_bitmap(), + (Store::Array(..), Store::Bitmap(..)) => mem::swap(lhs, &mut rhs), + _ => (), + }; + op(&mut lhs.store, &rhs.store); + } + } + } + } + + RetainMut::retain_mut(&mut containers, |container| { + container.ensure_correct_store(); + container.len() > 0 + }); + + Ok(RoaringBitmap { containers }) +} + +#[inline] +fn try_naive_lazy_multi_op_ref<'a, E: 'a>( + bitmaps: impl IntoIterator>, + op: impl Fn(&mut Store, &Store), +) -> Result { + // + // This algorithm operates on bitmaps. It must deal with arrays for which there are not (yet) + // any others with the same key. + // + // 1. Eager cloning would create useless intermediate values that might become bitmaps + // 2. Eager promoting forces disjoint containers to converted back to arrays at the end + // + // This strategy uses COW to lazily promote arrays to bitmaps as they are operated on. + // More memory efficient, negligible wall time difference benchmarks + + // Phase 1. Borrow all the containers from the first element. + let mut iter = bitmaps.into_iter(); + let mut containers: Vec> = match iter.next().transpose()? { + None => Vec::new(), + Some(v) => v.containers.iter().map(Cow::Borrowed).collect(), + }; + + // Phase 2: Operate on the remaining contaners + for bitmap in iter { + for rhs in &bitmap?.containers { + match containers.binary_search_by_key(&rhs.key, |c| c.key) { + Err(loc) => { + // A container not currently in containers. Borrow it. + containers.insert(loc, Cow::Borrowed(rhs)) + } + Ok(loc) => { + // A container that is in containers. Operate on it. + let lhs = &mut containers[loc]; + match (&lhs.store, &rhs.store) { + (Store::Array(..), Store::Array(..)) => { + // We had borrowed an array. Without cloning it, create a new bitmap + // Add all the elements to the new bitmap + let mut store = lhs.store.to_bitmap(); + op(&mut store, &rhs.store); + *lhs = Cow::Owned(Container { key: lhs.key, store }); + } + (Store::Array(..), Store::Bitmap(..)) => { + // We had borrowed an array. Copy the rhs bitmap, add lhs to it + let mut store = rhs.store.clone(); + op(&mut store, &lhs.store); + *lhs = Cow::Owned(Container { key: lhs.key, store }); + } + (Store::Bitmap(..), _) => { + // This might be a owned or borrowed bitmap. + // If it was borrowed it will clone-on-write + op(&mut lhs.to_mut().store, &rhs.store); + } + }; + } + } + } + } + + // Phase 3: Clean up + let containers: Vec = containers + .into_iter() + .map(|c| { + // Any borrowed bitmaps or arrays left over get cloned here + let mut container = c.into_owned(); + container.ensure_correct_store(); + container + }) + .filter(|container| container.len() > 0) + .collect(); + + Ok(RoaringBitmap { containers }) +} diff --git a/src/bitmap/ops.rs b/src/bitmap/ops.rs index fad7b2bd3..04ec428f6 100644 --- a/src/bitmap/ops.rs +++ b/src/bitmap/ops.rs @@ -1,13 +1,11 @@ -use std::borrow::Cow; use std::mem; use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign}; use retain_mut::RetainMut; use crate::bitmap::container::Container; -use crate::bitmap::store::Store; use crate::bitmap::Pairs; -use crate::{IterExt, RoaringBitmap}; +use crate::RoaringBitmap; impl RoaringBitmap { /// Computes the len of the intersection with the specified other bitmap without creating a @@ -441,278 +439,6 @@ impl BitXorAssign<&RoaringBitmap> for RoaringBitmap { } } -impl IterExt for I -where - I: IntoIterator, -{ - type Output = RoaringBitmap; - - fn or(self) -> Self::Output { - try_naive_lazy_multi_op_owned( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - |a, b| BitOrAssign::bitor_assign(a, b), - ) - .unwrap() - } - - fn and(self) -> Self::Output { - try_simple_multi_op_owned( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - |a, b| BitAndAssign::bitand_assign(a, b), - ) - .unwrap() - } - - fn sub(self) -> Self::Output { - try_simple_multi_op_owned( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - |a, b| SubAssign::sub_assign(a, b), - ) - .unwrap() - } - - fn xor(self) -> Self::Output { - try_naive_lazy_multi_op_owned( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - |a, b| BitXorAssign::bitxor_assign(a, b), - ) - .unwrap() - } -} - -impl IterExt> for I -where - I: IntoIterator>, -{ - type Output = Result; - - fn or(self) -> Self::Output { - try_naive_lazy_multi_op_owned(self, |a, b| BitOrAssign::bitor_assign(a, b)) - } - - fn and(self) -> Self::Output { - try_simple_multi_op_owned(self, |a, b| BitAndAssign::bitand_assign(a, b)) - } - - fn sub(self) -> Self::Output { - try_simple_multi_op_owned(self, |a, b| SubAssign::sub_assign(a, b)) - } - - fn xor(self) -> Self::Output { - try_naive_lazy_multi_op_owned(self, |a, b| BitXorAssign::bitxor_assign(a, b)) - } -} - -impl<'a, I> IterExt<&'a RoaringBitmap> for I -where - I: IntoIterator, -{ - type Output = RoaringBitmap; - - fn or(self) -> Self::Output { - try_naive_lazy_multi_op_ref( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - |a, b| BitOrAssign::bitor_assign(a, b), - ) - .unwrap() - } - - fn and(self) -> Self::Output { - try_simple_multi_op_ref(self.into_iter().map(Ok::<_, std::convert::Infallible>), |a, b| { - BitAndAssign::bitand_assign(a, b) - }) - .unwrap() - } - - fn sub(self) -> Self::Output { - try_simple_multi_op_ref(self.into_iter().map(Ok::<_, std::convert::Infallible>), |a, b| { - SubAssign::sub_assign(a, b) - }) - .unwrap() - } - - fn xor(self) -> Self::Output { - try_naive_lazy_multi_op_ref( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - |a, b| BitXorAssign::bitxor_assign(a, b), - ) - .unwrap() - } -} - -impl<'a, I, E: 'a> IterExt> for I -where - I: IntoIterator>, -{ - type Output = Result; - - fn or(self) -> Self::Output { - try_naive_lazy_multi_op_ref(self, |a, b| BitOrAssign::bitor_assign(a, b)) - } - - fn and(self) -> Self::Output { - try_simple_multi_op_ref(self, |a, b| BitAndAssign::bitand_assign(a, b)) - } - - fn sub(self) -> Self::Output { - try_simple_multi_op_ref(self, |a, b| SubAssign::sub_assign(a, b)) - } - - fn xor(self) -> Self::Output { - try_naive_lazy_multi_op_ref(self, |a, b| BitXorAssign::bitxor_assign(a, b)) - } -} - -#[inline] -fn try_simple_multi_op_owned( - bitmaps: impl IntoIterator>, - op: impl Fn(&mut RoaringBitmap, RoaringBitmap), -) -> Result { - let mut iter = bitmaps.into_iter(); - match iter.next().transpose()? { - Some(mut lhs) => { - for rhs in iter { - if lhs.is_empty() { - return Ok(lhs); - } - op(&mut lhs, rhs?); - } - Ok(lhs) - } - None => Ok(RoaringBitmap::default()), - } -} - -#[inline] -fn try_simple_multi_op_ref<'a, E>( - bitmaps: impl IntoIterator>, - op: impl Fn(&mut RoaringBitmap, &RoaringBitmap), -) -> Result { - let mut iter = bitmaps.into_iter(); - match iter.next().transpose()?.cloned() { - Some(mut lhs) => { - for rhs in iter { - if lhs.is_empty() { - return Ok(lhs); - } - op(&mut lhs, rhs?); - } - - Ok(lhs) - } - None => Ok(RoaringBitmap::default()), - } -} - -#[inline] -fn try_naive_lazy_multi_op_owned( - bitmaps: impl IntoIterator>, - op: impl Fn(&mut Store, &Store), -) -> Result { - let mut iter = bitmaps.into_iter(); - let mut containers = match iter.next().transpose()? { - None => Vec::new(), - Some(v) => v.containers, - }; - - for bitmap in iter { - for mut rhs in bitmap?.containers { - match containers.binary_search_by_key(&rhs.key, |c| c.key) { - Err(loc) => containers.insert(loc, rhs), - Ok(loc) => { - let lhs = &mut containers[loc]; - match (&lhs.store, &rhs.store) { - (Store::Array(..), Store::Array(..)) => lhs.store = lhs.store.to_bitmap(), - (Store::Array(..), Store::Bitmap(..)) => mem::swap(lhs, &mut rhs), - _ => (), - }; - op(&mut lhs.store, &rhs.store); - } - } - } - } - - RetainMut::retain_mut(&mut containers, |container| { - container.ensure_correct_store(); - container.len() > 0 - }); - - Ok(RoaringBitmap { containers }) -} - -#[inline] -fn try_naive_lazy_multi_op_ref<'a, E: 'a>( - bitmaps: impl IntoIterator>, - op: impl Fn(&mut Store, &Store), -) -> Result { - // - // This algorithm operates on bitmaps. It must deal with arrays for which there are not (yet) - // any others with the same key. - // - // 1. Eager cloning would create useless intermediate values that might become bitmaps - // 2. Eager promoting forces disjoint containers to converted back to arrays at the end - // - // This strategy uses COW to lazily promote arrays to bitmaps as they are operated on. - // More memory efficient, negligible wall time difference benchmarks - - // Phase 1. Borrow all the containers from the first element. - let mut iter = bitmaps.into_iter(); - let mut containers: Vec> = match iter.next().transpose()? { - None => Vec::new(), - Some(v) => v.containers.iter().map(Cow::Borrowed).collect(), - }; - - // Phase 2: Operate on the remaining contaners - for bitmap in iter { - for rhs in &bitmap?.containers { - match containers.binary_search_by_key(&rhs.key, |c| c.key) { - Err(loc) => { - // A container not currently in containers. Borrow it. - containers.insert(loc, Cow::Borrowed(rhs)) - } - Ok(loc) => { - // A container that is in containers. Operate on it. - let lhs = &mut containers[loc]; - match (&lhs.store, &rhs.store) { - (Store::Array(..), Store::Array(..)) => { - // We had borrowed an array. Without cloning it, create a new bitmap - // Add all the elements to the new bitmap - let mut store = lhs.store.to_bitmap(); - op(&mut store, &rhs.store); - *lhs = Cow::Owned(Container { key: lhs.key, store }); - } - (Store::Array(..), Store::Bitmap(..)) => { - // We had borrowed an array. Copy the rhs bitmap, add lhs to it - let mut store = rhs.store.clone(); - op(&mut store, &lhs.store); - *lhs = Cow::Owned(Container { key: lhs.key, store }); - } - (Store::Bitmap(..), _) => { - // This might be a owned or borrowed bitmap. - // If it was borrowed it will clone-on-write - op(&mut lhs.to_mut().store, &rhs.store); - } - }; - } - } - } - } - - // Phase 3: Clean up - let containers: Vec = containers - .into_iter() - .map(|c| { - // Any borrowed bitmaps or arrays left over get cloned here - let mut container = c.into_owned(); - container.ensure_correct_store(); - container - }) - .filter(|container| container.len() > 0) - .collect(); - - Ok(RoaringBitmap { containers }) -} - #[cfg(test)] mod test { use crate::{IterExt, RoaringBitmap}; diff --git a/src/treemap/mod.rs b/src/treemap/mod.rs index 3da971bf3..d5a669b94 100644 --- a/src/treemap/mod.rs +++ b/src/treemap/mod.rs @@ -2,6 +2,7 @@ use crate::RoaringBitmap; use std::collections::BTreeMap; mod fmt; +mod multiops; mod util; // Order of these modules matters as it determines the `impl` blocks order in diff --git a/src/treemap/multiops.rs b/src/treemap/multiops.rs new file mode 100644 index 000000000..d0f4ae071 --- /dev/null +++ b/src/treemap/multiops.rs @@ -0,0 +1,376 @@ +use std::{ + borrow::Borrow, + cmp::Ordering, + collections::{binary_heap::PeekMut, BTreeMap, BinaryHeap}, + mem, +}; + +use crate::{IterExt, RoaringBitmap, RoaringTreemap}; + +impl IterExt for I +where + I: IntoIterator, +{ + type Output = RoaringTreemap; + + fn or(self) -> Self::Output { + try_simple_multi_op_owned::<_, _, OrOp>( + self.into_iter().map(Ok::<_, std::convert::Infallible>), + ) + .unwrap() + } + + fn and(self) -> Self::Output { + try_ordered_multi_op_owned::<_, _, AndOp>( + self.into_iter().map(Ok::<_, std::convert::Infallible>), + ) + .unwrap() + } + + fn sub(self) -> Self::Output { + try_ordered_multi_op_owned::<_, _, SubOp>( + self.into_iter().map(Ok::<_, std::convert::Infallible>), + ) + .unwrap() + } + + fn xor(self) -> Self::Output { + try_simple_multi_op_owned::<_, _, XorOp>( + self.into_iter().map(Ok::<_, std::convert::Infallible>), + ) + .unwrap() + } +} + +impl IterExt> for I +where + I: IntoIterator>, +{ + type Output = Result; + + fn or(self) -> Self::Output { + try_simple_multi_op_owned::<_, _, OrOp>(self) + } + + fn and(self) -> Self::Output { + try_ordered_multi_op_owned::<_, _, AndOp>(self) + } + + fn sub(self) -> Self::Output { + try_ordered_multi_op_owned::<_, _, SubOp>(self) + } + + fn xor(self) -> Self::Output { + try_simple_multi_op_owned::<_, _, XorOp>(self) + } +} + +#[inline] +fn try_simple_multi_op_owned(treemaps: I) -> Result +where + I: IntoIterator>, +{ + let treemaps = treemaps.into_iter().collect::, _>>()?; + + let mut heap: BinaryHeap<_> = treemaps + .into_iter() + .filter_map(|treemap| { + let mut iter = treemap.map.into_iter(); + iter.next().map(|(key, bitmap)| PeekedRoaringBitmap { key, bitmap, iter }) + }) + .collect(); + + let mut bitmaps = Vec::new(); + let mut map = BTreeMap::new(); + + while let Some(mut peek) = heap.peek_mut() { + let (key, bitmap) = match peek.iter.next() { + Some((next_key, next_bitmap)) => { + let key = peek.key; + peek.key = next_key; + let bitmap = mem::replace(&mut peek.bitmap, next_bitmap); + (key, bitmap) + } + None => { + let poped = PeekMut::pop(peek); + (poped.key, poped.bitmap) + } + }; + + if let Some((first_key, _)) = bitmaps.first() { + if *first_key != key { + let current_key = *first_key; + let computed_bitmap = O::op_owned(bitmaps.drain(..).map(|(_, rb)| rb)); + if !computed_bitmap.is_empty() { + map.insert(current_key, computed_bitmap); + } + } + } + + bitmaps.push((key, bitmap)); + } + + if let Some((first_key, _)) = bitmaps.first() { + let current_key = *first_key; + let computed_bitmap = O::op_owned(bitmaps.drain(..).map(|(_, rb)| rb)); + if !computed_bitmap.is_empty() { + map.insert(current_key, computed_bitmap); + } + } + + Ok(RoaringTreemap { map }) +} + +#[inline] +fn try_ordered_multi_op_owned(treemaps: I) -> Result +where + I: IntoIterator>, +{ + let mut treemaps = treemaps.into_iter(); + let mut treemap = match treemaps.next().transpose()? { + Some(treemap) => treemap, + None => return Ok(RoaringTreemap::new()), + }; + let mut treemaps = treemaps.collect::, _>>()?; + + // for each keys in the first treemap we're going find and accumulate all the corresponding bitmaps + let keys: Vec<_> = treemap.map.keys().copied().collect(); + for k in keys { + // the unwrap is safe since we're iterating on our keys + let current_bitmap = treemap.map.remove(&k).unwrap(); + let new_bitmap = + O::op_owned(std::iter::once(current_bitmap).chain( + treemaps.iter_mut().map(|treemap| treemap.map.remove(&k).unwrap_or_default()), + )); + if !new_bitmap.is_empty() { + treemap.map.insert(k, new_bitmap); + } + } + + Ok(treemap) +} + +#[inline] +fn try_ordered_multi_op_ref<'a, E: 'a, I, O: Op>(treemaps: I) -> Result +where + I: IntoIterator>, +{ + let mut treemaps = treemaps.into_iter(); + let treemap = match treemaps.next().transpose()? { + Some(treemap) => treemap, + None => return Ok(RoaringTreemap::new()), + }; + let treemaps = treemaps.collect::, _>>()?; + + let mut ret = RoaringTreemap::new(); + + // for each keys in the first treemap we're going find and accumulate all the corresponding bitmaps + let keys: Vec<_> = treemap.map.keys().copied().collect(); + let empty_bitmap = RoaringBitmap::new(); + for k in keys { + // the unwrap is safe since we're iterating on our keys + let current_bitmap = treemap.map.get(&k).unwrap(); + let new_bitmap = O::op_ref( + std::iter::once(current_bitmap) + .chain(treemaps.iter().map(|treemap| treemap.map.get(&k).unwrap_or(&empty_bitmap))), + ); + if !new_bitmap.is_empty() { + ret.map.insert(k, new_bitmap); + } + } + + Ok(ret) +} + +#[inline] +fn try_simple_multi_op_ref<'a, E: 'a, I, O: Op>(treemaps: I) -> Result +where + I: IntoIterator>, +{ + let treemaps = treemaps.into_iter().collect::, E>>()?; + + let mut heap: BinaryHeap<_> = treemaps + .into_iter() + .filter_map(|treemap| { + let mut iter = treemap.map.iter(); + iter.next().map(|(&key, bitmap)| PeekedRoaringBitmap { key, bitmap, iter }) + }) + .collect(); + + let mut bitmaps = Vec::new(); + let mut map = BTreeMap::new(); + + while let Some(mut peek) = heap.peek_mut() { + let (key, bitmap) = match peek.iter.next() { + Some((&next_key, next_bitmap)) => { + let key = peek.key; + peek.key = next_key; + let bitmap = mem::replace(&mut peek.bitmap, next_bitmap); + (key, bitmap) + } + None => { + let poped = PeekMut::pop(peek); + (poped.key, poped.bitmap) + } + }; + + if let Some((first_key, _)) = bitmaps.first() { + if *first_key != key { + let current_key = *first_key; + let computed_bitmap = O::op_ref(bitmaps.drain(..).map(|(_, rb)| rb)); + if !computed_bitmap.is_empty() { + map.insert(current_key, computed_bitmap); + } + } + } + + bitmaps.push((key, bitmap)); + } + + if let Some((first_key, _)) = bitmaps.first() { + let current_key = *first_key; + let computed_bitmap = O::op_ref(bitmaps.drain(..).map(|(_, rb)| rb)); + if !computed_bitmap.is_empty() { + map.insert(current_key, computed_bitmap); + } + } + + Ok(RoaringTreemap { map }) +} + +trait Op { + fn op_owned>(iter: I) -> RoaringBitmap; + fn op_ref<'a, I: IntoIterator>(iter: I) -> RoaringBitmap; +} + +enum OrOp {} + +impl Op for OrOp { + fn op_owned>(iter: J) -> RoaringBitmap { + iter.or() + } + + fn op_ref<'a, J: IntoIterator>(iter: J) -> RoaringBitmap { + iter.or() + } +} + +enum AndOp {} + +impl Op for AndOp { + fn op_owned>(iter: J) -> RoaringBitmap { + iter.and() + } + + fn op_ref<'a, J: IntoIterator>(iter: J) -> RoaringBitmap { + iter.and() + } +} + +enum SubOp {} + +impl Op for SubOp { + fn op_owned>(iter: J) -> RoaringBitmap { + iter.sub() + } + + fn op_ref<'a, J: IntoIterator>(iter: J) -> RoaringBitmap { + iter.sub() + } +} + +enum XorOp {} + +impl Op for XorOp { + fn op_owned>(iter: J) -> RoaringBitmap { + iter.xor() + } + + fn op_ref<'a, J: IntoIterator>(iter: J) -> RoaringBitmap { + iter.xor() + } +} + +impl<'a, I> IterExt<&'a RoaringTreemap> for I +where + I: IntoIterator, +{ + type Output = RoaringTreemap; + + fn or(self) -> Self::Output { + try_simple_multi_op_ref::<_, _, OrOp>( + self.into_iter().map(Ok::<_, std::convert::Infallible>), + ) + .unwrap() + } + + fn and(self) -> Self::Output { + try_ordered_multi_op_ref::<_, _, AndOp>( + self.into_iter().map(Ok::<_, std::convert::Infallible>), + ) + .unwrap() + } + + fn sub(self) -> Self::Output { + try_ordered_multi_op_ref::<_, _, SubOp>( + self.into_iter().map(Ok::<_, std::convert::Infallible>), + ) + .unwrap() + } + + fn xor(self) -> Self::Output { + try_simple_multi_op_ref::<_, _, XorOp>( + self.into_iter().map(Ok::<_, std::convert::Infallible>), + ) + .unwrap() + } +} + +impl<'a, I, E: 'a> IterExt> for I +where + I: IntoIterator>, +{ + type Output = Result; + + fn or(self) -> Self::Output { + try_simple_multi_op_ref::<_, _, OrOp>(self) + } + + fn and(self) -> Self::Output { + try_ordered_multi_op_ref::<_, _, AndOp>(self) + } + + fn sub(self) -> Self::Output { + try_ordered_multi_op_ref::<_, _, SubOp>(self) + } + + fn xor(self) -> Self::Output { + try_simple_multi_op_ref::<_, _, XorOp>(self) + } +} + +struct PeekedRoaringBitmap { + key: u32, + bitmap: R, + iter: I, +} + +impl, I> Ord for PeekedRoaringBitmap { + fn cmp(&self, other: &Self) -> Ordering { + self.key.cmp(&other.key).reverse() + } +} + +impl, I> PartialOrd for PeekedRoaringBitmap { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl, I> Eq for PeekedRoaringBitmap {} + +impl, I> PartialEq for PeekedRoaringBitmap { + fn eq(&self, other: &Self) -> bool { + self.key == other.key + } +} diff --git a/src/treemap/ops.rs b/src/treemap/ops.rs index e3472ec96..f549bd549 100644 --- a/src/treemap/ops.rs +++ b/src/treemap/ops.rs @@ -1,12 +1,8 @@ -use std::borrow::Borrow; -use std::cmp::Ordering; -use std::collections::binary_heap::PeekMut; use std::collections::btree_map::Entry; -use std::collections::{BTreeMap, BinaryHeap}; use std::mem; use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign}; -use crate::{IterExt, RoaringBitmap, RoaringTreemap}; +use crate::RoaringTreemap; impl RoaringTreemap { /// Computes the len of the union with the specified other treemap without creating a new @@ -403,374 +399,6 @@ impl BitXorAssign<&RoaringTreemap> for RoaringTreemap { } } -impl IterExt for I -where - I: IntoIterator, -{ - type Output = RoaringTreemap; - - fn or(self) -> Self::Output { - try_simple_multi_op_owned::<_, _, OrOp>( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - ) - .unwrap() - } - - fn and(self) -> Self::Output { - try_ordered_multi_op_owned::<_, _, AndOp>( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - ) - .unwrap() - } - - fn sub(self) -> Self::Output { - try_ordered_multi_op_owned::<_, _, SubOp>( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - ) - .unwrap() - } - - fn xor(self) -> Self::Output { - try_simple_multi_op_owned::<_, _, XorOp>( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - ) - .unwrap() - } -} - -impl IterExt> for I -where - I: IntoIterator>, -{ - type Output = Result; - - fn or(self) -> Self::Output { - try_simple_multi_op_owned::<_, _, OrOp>(self) - } - - fn and(self) -> Self::Output { - try_ordered_multi_op_owned::<_, _, AndOp>(self) - } - - fn sub(self) -> Self::Output { - try_ordered_multi_op_owned::<_, _, SubOp>(self) - } - - fn xor(self) -> Self::Output { - try_simple_multi_op_owned::<_, _, XorOp>(self) - } -} - -#[inline] -fn try_simple_multi_op_owned(treemaps: I) -> Result -where - I: IntoIterator>, -{ - let treemaps = treemaps.into_iter().collect::, _>>()?; - - let mut heap: BinaryHeap<_> = treemaps - .into_iter() - .filter_map(|treemap| { - let mut iter = treemap.map.into_iter(); - iter.next().map(|(key, bitmap)| PeekedRoaringBitmap { key, bitmap, iter }) - }) - .collect(); - - let mut bitmaps = Vec::new(); - let mut map = BTreeMap::new(); - - while let Some(mut peek) = heap.peek_mut() { - let (key, bitmap) = match peek.iter.next() { - Some((next_key, next_bitmap)) => { - let key = peek.key; - peek.key = next_key; - let bitmap = mem::replace(&mut peek.bitmap, next_bitmap); - (key, bitmap) - } - None => { - let poped = PeekMut::pop(peek); - (poped.key, poped.bitmap) - } - }; - - if let Some((first_key, _)) = bitmaps.first() { - if *first_key != key { - let current_key = *first_key; - let computed_bitmap = O::op_owned(bitmaps.drain(..).map(|(_, rb)| rb)); - if !computed_bitmap.is_empty() { - map.insert(current_key, computed_bitmap); - } - } - } - - bitmaps.push((key, bitmap)); - } - - if let Some((first_key, _)) = bitmaps.first() { - let current_key = *first_key; - let computed_bitmap = O::op_owned(bitmaps.drain(..).map(|(_, rb)| rb)); - if !computed_bitmap.is_empty() { - map.insert(current_key, computed_bitmap); - } - } - - Ok(RoaringTreemap { map }) -} - -#[inline] -fn try_ordered_multi_op_owned(treemaps: I) -> Result -where - I: IntoIterator>, -{ - let mut treemaps = treemaps.into_iter(); - let mut treemap = match treemaps.next().transpose()? { - Some(treemap) => treemap, - None => return Ok(RoaringTreemap::new()), - }; - let mut treemaps = treemaps.collect::, _>>()?; - - // for each keys in the first treemap we're going find and accumulate all the corresponding bitmaps - let keys: Vec<_> = treemap.map.keys().copied().collect(); - for k in keys { - // the unwrap is safe since we're iterating on our keys - let current_bitmap = treemap.map.remove(&k).unwrap(); - let new_bitmap = - O::op_owned(std::iter::once(current_bitmap).chain( - treemaps.iter_mut().map(|treemap| treemap.map.remove(&k).unwrap_or_default()), - )); - if !new_bitmap.is_empty() { - treemap.map.insert(k, new_bitmap); - } - } - - Ok(treemap) -} - -#[inline] -fn try_ordered_multi_op_ref<'a, E: 'a, I, O: Op>(treemaps: I) -> Result -where - I: IntoIterator>, -{ - let mut treemaps = treemaps.into_iter(); - let treemap = match treemaps.next().transpose()? { - Some(treemap) => treemap, - None => return Ok(RoaringTreemap::new()), - }; - let treemaps = treemaps.collect::, _>>()?; - - let mut ret = RoaringTreemap::new(); - - // for each keys in the first treemap we're going find and accumulate all the corresponding bitmaps - let keys: Vec<_> = treemap.map.keys().copied().collect(); - let empty_bitmap = RoaringBitmap::new(); - for k in keys { - // the unwrap is safe since we're iterating on our keys - let current_bitmap = treemap.map.get(&k).unwrap(); - let new_bitmap = O::op_ref( - std::iter::once(current_bitmap) - .chain(treemaps.iter().map(|treemap| treemap.map.get(&k).unwrap_or(&empty_bitmap))), - ); - if !new_bitmap.is_empty() { - ret.map.insert(k, new_bitmap); - } - } - - Ok(ret) -} - -#[inline] -fn try_simple_multi_op_ref<'a, E: 'a, I, O: Op>(treemaps: I) -> Result -where - I: IntoIterator>, -{ - let treemaps = treemaps.into_iter().collect::, E>>()?; - - let mut heap: BinaryHeap<_> = treemaps - .into_iter() - .filter_map(|treemap| { - let mut iter = treemap.map.iter(); - iter.next().map(|(&key, bitmap)| PeekedRoaringBitmap { key, bitmap, iter }) - }) - .collect(); - - let mut bitmaps = Vec::new(); - let mut map = BTreeMap::new(); - - while let Some(mut peek) = heap.peek_mut() { - let (key, bitmap) = match peek.iter.next() { - Some((&next_key, next_bitmap)) => { - let key = peek.key; - peek.key = next_key; - let bitmap = mem::replace(&mut peek.bitmap, next_bitmap); - (key, bitmap) - } - None => { - let poped = PeekMut::pop(peek); - (poped.key, poped.bitmap) - } - }; - - if let Some((first_key, _)) = bitmaps.first() { - if *first_key != key { - let current_key = *first_key; - let computed_bitmap = O::op_ref(bitmaps.drain(..).map(|(_, rb)| rb)); - if !computed_bitmap.is_empty() { - map.insert(current_key, computed_bitmap); - } - } - } - - bitmaps.push((key, bitmap)); - } - - if let Some((first_key, _)) = bitmaps.first() { - let current_key = *first_key; - let computed_bitmap = O::op_ref(bitmaps.drain(..).map(|(_, rb)| rb)); - if !computed_bitmap.is_empty() { - map.insert(current_key, computed_bitmap); - } - } - - Ok(RoaringTreemap { map }) -} - -trait Op { - fn op_owned>(iter: I) -> RoaringBitmap; - fn op_ref<'a, I: IntoIterator>(iter: I) -> RoaringBitmap; -} - -enum OrOp {} - -impl Op for OrOp { - fn op_owned>(iter: J) -> RoaringBitmap { - iter.or() - } - - fn op_ref<'a, J: IntoIterator>(iter: J) -> RoaringBitmap { - iter.or() - } -} - -enum AndOp {} - -impl Op for AndOp { - fn op_owned>(iter: J) -> RoaringBitmap { - iter.and() - } - - fn op_ref<'a, J: IntoIterator>(iter: J) -> RoaringBitmap { - iter.and() - } -} - -enum SubOp {} - -impl Op for SubOp { - fn op_owned>(iter: J) -> RoaringBitmap { - iter.sub() - } - - fn op_ref<'a, J: IntoIterator>(iter: J) -> RoaringBitmap { - iter.sub() - } -} - -enum XorOp {} - -impl Op for XorOp { - fn op_owned>(iter: J) -> RoaringBitmap { - iter.xor() - } - - fn op_ref<'a, J: IntoIterator>(iter: J) -> RoaringBitmap { - iter.xor() - } -} - -impl<'a, I> IterExt<&'a RoaringTreemap> for I -where - I: IntoIterator, -{ - type Output = RoaringTreemap; - - fn or(self) -> Self::Output { - try_simple_multi_op_ref::<_, _, OrOp>( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - ) - .unwrap() - } - - fn and(self) -> Self::Output { - try_ordered_multi_op_ref::<_, _, AndOp>( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - ) - .unwrap() - } - - fn sub(self) -> Self::Output { - try_ordered_multi_op_ref::<_, _, SubOp>( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - ) - .unwrap() - } - - fn xor(self) -> Self::Output { - try_simple_multi_op_ref::<_, _, XorOp>( - self.into_iter().map(Ok::<_, std::convert::Infallible>), - ) - .unwrap() - } -} - -impl<'a, I, E: 'a> IterExt> for I -where - I: IntoIterator>, -{ - type Output = Result; - - fn or(self) -> Self::Output { - try_simple_multi_op_ref::<_, _, OrOp>(self) - } - - fn and(self) -> Self::Output { - try_ordered_multi_op_ref::<_, _, AndOp>(self) - } - - fn sub(self) -> Self::Output { - try_ordered_multi_op_ref::<_, _, SubOp>(self) - } - - fn xor(self) -> Self::Output { - try_simple_multi_op_ref::<_, _, XorOp>(self) - } -} - -struct PeekedRoaringBitmap { - key: u32, - bitmap: R, - iter: I, -} - -impl, I> Ord for PeekedRoaringBitmap { - fn cmp(&self, other: &Self) -> Ordering { - self.key.cmp(&other.key).reverse() - } -} - -impl, I> PartialOrd for PeekedRoaringBitmap { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl, I> Eq for PeekedRoaringBitmap {} - -impl, I> PartialEq for PeekedRoaringBitmap { - fn eq(&self, other: &Self) -> bool { - self.key == other.key - } -} - #[cfg(test)] mod test { use crate::{IterExt, RoaringTreemap};