Skip to content

Commit

Permalink
Adding ToBitSet and FromBitSet
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed Dec 18, 2023
1 parent cceddf2 commit 99c06c1
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 14 deletions.
11 changes: 10 additions & 1 deletion arraycontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,17 @@ func (ac *arrayContainer) String() string {
}

func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uint32) int {
if i < 0 {
panic("negative index")
}
if len(ac.content) == 0 {
return i
}
_ = x[len(ac.content)-1+i]
_ = ac.content[len(ac.content)-1]
for k := 0; k < len(ac.content); k++ {
x[k+i] = uint32(ac.content[k]) | mask
x[k+i] =
uint32(ac.content[k]) | mask
}
return i + len(ac.content)
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/RoaringBitmap/roaring
go 1.14

require (
github.com/bits-and-blooms/bitset v1.2.0
github.com/bits-and-blooms/bitset v1.12.0
github.com/mschoch/smat v0.2.0
github.com/stretchr/testify v1.7.0
)
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZtmFVPHmA=
github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
Expand Down
38 changes: 28 additions & 10 deletions roaring.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"strconv"

"github.com/RoaringBitmap/roaring/internal"
"github.com/bits-and-blooms/bitset"
)

// Bitmap represents a compressed bitmap where you can add integers.
Expand Down Expand Up @@ -90,13 +91,16 @@ func (rb *Bitmap) ToDense() []uint64 {
// Useful to convert bitmaps from libraries like https://github.com/bits-and-blooms/bitset or
// https://github.com/kelindar/bitmap into roaring bitmaps fast and with convenience.
//
// This function won't create any run containers, only array and bitmap containers. It's up to
// This function will not create any run containers, only array and bitmap containers. It's up to
// the caller to call RunOptimize if they want to further compress the runs of consecutive values.
//
// When doCopy is true, the bitmap is copied into a new slice for each bitmap container.
// This is useful when the bitmap is going to be modified after this function returns or if it's
// undesirable to hold references to large bitmaps which the GC wouldn't be able to collect.
// One copy can still happen even when doCopy is false if the bitmap length isn't divisible by bitmapContainerSize.
// undesirable to hold references to large bitmaps which the GC would not be able to collect.
// One copy can still happen even when doCopy is false if the bitmap length is not divisible
// by bitmapContainerSize.
//
// See also FromBitSet.
func FromDense(bitmap []uint64, doCopy bool) *Bitmap {
sz := (len(bitmap) + bitmapContainerSize - 1) / bitmapContainerSize // round up
rb := &Bitmap{
Expand All @@ -115,13 +119,16 @@ func FromDense(bitmap []uint64, doCopy bool) *Bitmap {
// https://github.com/kelindar/bitmap into roaring bitmaps fast and with convenience.
// Callers are responsible for ensuring that the bitmap is empty before calling this function.
//
// This function won't create any run containers, only array and bitmap containers. It's up to
// This function will not create any run containers, only array and bitmap containers. It is up to
// the caller to call RunOptimize if they want to further compress the runs of consecutive values.
//
// When doCopy is true, the bitmap is copied into a new slice for each bitmap container.
// This is useful when the bitmap is going to be modified after this function returns or if it's
// undesirable to hold references to large bitmaps which the GC wouldn't be able to collect.
// One copy can still happen even when doCopy is false if the bitmap length isn't divisible by bitmapContainerSize.
// undesirable to hold references to large bitmaps which the GC would not be able to collect.
// One copy can still happen even when doCopy is false if the bitmap length is not divisible
// by bitmapContainerSize.
//
// See FromBitSet.
func (rb *Bitmap) FromDense(bitmap []uint64, doCopy bool) {
if len(bitmap) == 0 {
return
Expand Down Expand Up @@ -220,8 +227,9 @@ func (rb *Bitmap) WriteDenseTo(bitmap []uint64) {
// generally quicker comparisons.
// The implementation is biased towards efficiency in little endian machines, so
// expect some extra CPU cycles and memory to be used if your machine is big endian.
// Likewise, don't use this to verify integrity unless you're certain you'll load
// the bitmap on a machine with the same endianess used to create it.
// Likewise, do not use this to verify integrity unless you are certain you will load
// the bitmap on a machine with the same endianess used to create it. (Thankfully
// very few people use big endian machines these days.)
func (rb *Bitmap) Checksum() uint64 {
const (
offset = 14695981039346656037
Expand Down Expand Up @@ -381,6 +389,16 @@ func (rb *Bitmap) Clear() {
rb.highlowcontainer.clear()
}

// ToBitSet copies the content of the RoaringBitmap into a bitset.BitSet instance
func (rb *Bitmap) ToBitSet() *bitset.BitSet {
return bitset.From(rb.ToDense())
}

// FromBitSet creates a new RoaringBitmap from a bitset.BitSet instance
func FromBitSet(bitset *bitset.BitSet) *Bitmap {
return FromDense(bitset.Bytes(), false)
}

// ToArray creates a new slice containing all of the integers stored in the Bitmap in sorted order
func (rb *Bitmap) ToArray() []uint32 {
array := make([]uint32, rb.GetCardinality())
Expand Down Expand Up @@ -420,7 +438,7 @@ func BoundSerializedSizeInBytes(cardinality uint64, universeSize uint64) uint64
contnbr := (universeSize + uint64(65535)) / uint64(65536)
if contnbr > cardinality {
contnbr = cardinality
// we can't have more containers than we have values
// we cannot have more containers than we have values
}
headermax := 8*contnbr + 4
if 4 > (contnbr+7)/8 {
Expand Down Expand Up @@ -1032,7 +1050,7 @@ func (rb *Bitmap) Select(x uint32) (uint32, error) {
return uint32(key)<<16 + uint32(c.selectInt(uint16(remaining))), nil
}
}
return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality())
return 0, fmt.Errorf("cannot find %dth integer in a bitmap with only %d items", x, rb.GetCardinality())
}

// And computes the intersection between two bitmaps and stores the result in the current bitmap
Expand Down
10 changes: 10 additions & 0 deletions roaring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2608,6 +2608,16 @@ func TestFromDense(t *testing.T) {
})
}

func TestFromBitSet(t *testing.T) {
testDense(func(name string, rb *Bitmap) {
t.Run(fmt.Sprintf("%s", name), func(t *testing.T) {
dense := rb.ToBitSet()
cp := FromBitSet(dense)
assert.True(t, rb.Equals(cp))
})
})
}

func BenchmarkFromDense(b *testing.B) {
testDense(func(name string, rb *Bitmap) {
dense := make([]uint64, rb.DenseSize())
Expand Down

0 comments on commit 99c06c1

Please sign in to comment.