diff --git a/arraycontainer.go b/arraycontainer.go index 9541fd53..a575caff 100644 --- a/arraycontainer.go +++ b/arraycontainer.go @@ -17,8 +17,17 @@ func (ac *arrayContainer) String() string { } func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uint32) int { + if i < 0 { + panic("negative index") + } + if len(ac.content) == 0 { + return i + } + _ = x[len(ac.content)-1+i] + _ = ac.content[len(ac.content)-1] for k := 0; k < len(ac.content); k++ { - x[k+i] = uint32(ac.content[k]) | mask + x[k+i] = + uint32(ac.content[k]) | mask } return i + len(ac.content) } diff --git a/go.mod b/go.mod index c01a35b0..45dd12f5 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/RoaringBitmap/roaring go 1.14 require ( - github.com/bits-and-blooms/bitset v1.2.0 + github.com/bits-and-blooms/bitset v1.12.0 github.com/mschoch/smat v0.2.0 github.com/stretchr/testify v1.7.0 ) diff --git a/go.sum b/go.sum index 64cf1ea0..9d4dc8aa 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,5 @@ -github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA= -github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA= +github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZtmFVPHmA= +github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= diff --git a/roaring.go b/roaring.go index 5862f1d7..8d9fdce5 100644 --- a/roaring.go +++ b/roaring.go @@ -13,6 +13,7 @@ import ( "strconv" "github.com/RoaringBitmap/roaring/internal" + "github.com/bits-and-blooms/bitset" ) // Bitmap represents a compressed bitmap where you can add integers. @@ -90,13 +91,16 @@ func (rb *Bitmap) ToDense() []uint64 { // Useful to convert bitmaps from libraries like https://github.com/bits-and-blooms/bitset or // https://github.com/kelindar/bitmap into roaring bitmaps fast and with convenience. // -// This function won't create any run containers, only array and bitmap containers. It's up to +// This function will not create any run containers, only array and bitmap containers. It's up to // the caller to call RunOptimize if they want to further compress the runs of consecutive values. // // When doCopy is true, the bitmap is copied into a new slice for each bitmap container. // This is useful when the bitmap is going to be modified after this function returns or if it's -// undesirable to hold references to large bitmaps which the GC wouldn't be able to collect. -// One copy can still happen even when doCopy is false if the bitmap length isn't divisible by bitmapContainerSize. +// undesirable to hold references to large bitmaps which the GC would not be able to collect. +// One copy can still happen even when doCopy is false if the bitmap length is not divisible +// by bitmapContainerSize. +// +// See also FromBitSet. func FromDense(bitmap []uint64, doCopy bool) *Bitmap { sz := (len(bitmap) + bitmapContainerSize - 1) / bitmapContainerSize // round up rb := &Bitmap{ @@ -115,13 +119,16 @@ func FromDense(bitmap []uint64, doCopy bool) *Bitmap { // https://github.com/kelindar/bitmap into roaring bitmaps fast and with convenience. // Callers are responsible for ensuring that the bitmap is empty before calling this function. // -// This function won't create any run containers, only array and bitmap containers. It's up to +// This function will not create any run containers, only array and bitmap containers. It is up to // the caller to call RunOptimize if they want to further compress the runs of consecutive values. // // When doCopy is true, the bitmap is copied into a new slice for each bitmap container. // This is useful when the bitmap is going to be modified after this function returns or if it's -// undesirable to hold references to large bitmaps which the GC wouldn't be able to collect. -// One copy can still happen even when doCopy is false if the bitmap length isn't divisible by bitmapContainerSize. +// undesirable to hold references to large bitmaps which the GC would not be able to collect. +// One copy can still happen even when doCopy is false if the bitmap length is not divisible +// by bitmapContainerSize. +// +// See FromBitSet. func (rb *Bitmap) FromDense(bitmap []uint64, doCopy bool) { if len(bitmap) == 0 { return @@ -220,8 +227,9 @@ func (rb *Bitmap) WriteDenseTo(bitmap []uint64) { // generally quicker comparisons. // The implementation is biased towards efficiency in little endian machines, so // expect some extra CPU cycles and memory to be used if your machine is big endian. -// Likewise, don't use this to verify integrity unless you're certain you'll load -// the bitmap on a machine with the same endianess used to create it. +// Likewise, do not use this to verify integrity unless you are certain you will load +// the bitmap on a machine with the same endianess used to create it. (Thankfully +// very few people use big endian machines these days.) func (rb *Bitmap) Checksum() uint64 { const ( offset = 14695981039346656037 @@ -381,6 +389,16 @@ func (rb *Bitmap) Clear() { rb.highlowcontainer.clear() } +// ToBitSet copies the content of the RoaringBitmap into a bitset.BitSet instance +func (rb *Bitmap) ToBitSet() *bitset.BitSet { + return bitset.From(rb.ToDense()) +} + +// FromBitSet creates a new RoaringBitmap from a bitset.BitSet instance +func FromBitSet(bitset *bitset.BitSet) *Bitmap { + return FromDense(bitset.Bytes(), false) +} + // ToArray creates a new slice containing all of the integers stored in the Bitmap in sorted order func (rb *Bitmap) ToArray() []uint32 { array := make([]uint32, rb.GetCardinality()) @@ -420,7 +438,7 @@ func BoundSerializedSizeInBytes(cardinality uint64, universeSize uint64) uint64 contnbr := (universeSize + uint64(65535)) / uint64(65536) if contnbr > cardinality { contnbr = cardinality - // we can't have more containers than we have values + // we cannot have more containers than we have values } headermax := 8*contnbr + 4 if 4 > (contnbr+7)/8 { @@ -1032,7 +1050,7 @@ func (rb *Bitmap) Select(x uint32) (uint32, error) { return uint32(key)<<16 + uint32(c.selectInt(uint16(remaining))), nil } } - return 0, fmt.Errorf("can't find %dth integer in a bitmap with only %d items", x, rb.GetCardinality()) + return 0, fmt.Errorf("cannot find %dth integer in a bitmap with only %d items", x, rb.GetCardinality()) } // And computes the intersection between two bitmaps and stores the result in the current bitmap diff --git a/roaring_test.go b/roaring_test.go index 6572a366..080b9f02 100644 --- a/roaring_test.go +++ b/roaring_test.go @@ -2608,6 +2608,16 @@ func TestFromDense(t *testing.T) { }) } +func TestFromBitSet(t *testing.T) { + testDense(func(name string, rb *Bitmap) { + t.Run(fmt.Sprintf("%s", name), func(t *testing.T) { + dense := rb.ToBitSet() + cp := FromBitSet(dense) + assert.True(t, rb.Equals(cp)) + }) + }) +} + func BenchmarkFromDense(b *testing.B) { testDense(func(name string, rb *Bitmap) { dense := make([]uint64, rb.DenseSize())