diff --git a/roaring64/roaring64.go b/roaring64/roaring64.go index 56b9e30e..438160de 100644 --- a/roaring64/roaring64.go +++ b/roaring64/roaring64.go @@ -12,8 +12,10 @@ import ( "github.com/RoaringBitmap/roaring/internal" ) -const serialCookieNoRunContainer = 12346 // only arrays and bitmaps -const serialCookie = 12347 // runs, arrays, and bitmaps +const ( + serialCookieNoRunContainer = 12346 // only arrays and bitmaps + serialCookie = 12347 // runs, arrays, and bitmaps +) // Bitmap represents a compressed bitmap where you can add integers. type Bitmap struct { @@ -25,7 +27,6 @@ func (rb *Bitmap) ToBase64() (string, error) { buf := new(bytes.Buffer) _, err := rb.WriteTo(buf) return base64.StdEncoding.EncodeToString(buf.Bytes()), err - } // FromBase64 deserializes a bitmap from Base64 @@ -52,7 +53,6 @@ func (rb *Bitmap) ToBytes() ([]byte, error) { // implementations (Java, Go, C++) and it has a specification : // https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) { - var n int64 buf := make([]byte, 8) binary.LittleEndian.PutUint64(buf, uint64(rb.highlowcontainer.size())) @@ -1243,6 +1243,10 @@ func (rb *Bitmap) GetSerializedSizeInBytes() uint64 { return rb.highlowcontainer.serializedSizeInBytes() } +func (rb *Bitmap) Validate() error { + return rb.highlowcontainer.validate() +} + // Roaring32AsRoaring64 inserts a 32-bit roaring bitmap into // a 64-bit roaring bitmap. No copy is made. func Roaring32AsRoaring64(bm32 *roaring.Bitmap) *Bitmap { diff --git a/roaring64/roaring64_test.go b/roaring64/roaring64_test.go index 92bfc8fb..2188a695 100644 --- a/roaring64/roaring64_test.go +++ b/roaring64/roaring64_test.go @@ -245,7 +245,7 @@ func TestRangeRemovalFromContent(t *testing.T) { bm.RemoveRange(0, 30000) c := bm.GetCardinality() - assert.EqualValues(t, 00, c) + assert.EqualValues(t, 0o0, c) } func TestFlipOnEmpty(t *testing.T) { @@ -624,7 +624,6 @@ func TestBitmap(t *testing.T) { assert.Equal(t, len(arrayres), len(arrayand)) assert.True(t, ok) - }) t.Run("Test AND 4", func(t *testing.T) { @@ -1401,6 +1400,7 @@ func TestBitmap(t *testing.T) { assert.True(t, valide) }) } + func TestXORtest4(t *testing.T) { t.Run("XORtest 4", func(t *testing.T) { rb := NewBitmap() @@ -1895,9 +1895,9 @@ func TestSerialization(t *testing.T) { //assert.Nil(t, err) //assert.True(t, bufBmp.Equals(bmp)) - //var base64 string - //base64, err = bufBmp.ToBase64() - //assert.Nil(t, err) + // var base64 string + // base64, err = bufBmp.ToBase64() + // assert.Nil(t, err) //base64Bmp := New() //_, err = base64Bmp.FromBase64(base64) @@ -1988,3 +1988,45 @@ func Test32As64(t *testing.T) { assert.True(t, r32asr64.Equals(r64)) assert.True(t, r64.Equals(r32asr64)) } + +func TestRoaringArray64Validation(t *testing.T) { + a := roaringArray64{} + + assert.ErrorIs(t, a.validate(), ErrEmptyKeys) + + a.keys = append(a.keys, uint32(3), uint32(1)) + assert.ErrorIs(t, a.validate(), ErrKeySortOrder) + a.clear() + + // build up cardinality coherent arrays + a.keys = append(a.keys, uint32(1), uint32(3), uint32(10)) + assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint) + a.containers = append(a.containers, roaring.NewBitmap(), roaring.NewBitmap(), roaring.NewBitmap()) + assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint) + a.needCopyOnWrite = append(a.needCopyOnWrite, true, false, true) + assert.Errorf(t, a.validate(), "zero intervals") +} + +func TestBitMapValidation(t *testing.T) { + bm := NewBitmap() + bm.AddRange(0, 100) + bm.AddRange(306, 406) + bm.AddRange(102, 202) + bm.AddRange(204, 304) + assert.NoError(t, bm.Validate()) + + randomEntries := make([]uint64, 0, 1000) + for i := 0; i < 1000; i++ { + randomEntries = append(randomEntries, rand.Uint64()) + } + + bm.AddMany(randomEntries) + assert.NoError(t, bm.Validate()) + + randomEntries = make([]uint64, 0, 1000) + for i := 0; i < 1000; i++ { + randomEntries = append(randomEntries, uint64(i)) + } + bm.AddMany(randomEntries) + assert.NoError(t, bm.Validate()) +} diff --git a/roaring64/roaringarray64.go b/roaring64/roaringarray64.go index 26aabd72..474f788a 100644 --- a/roaring64/roaringarray64.go +++ b/roaring64/roaringarray64.go @@ -1,6 +1,8 @@ package roaring64 import ( + "errors" + "github.com/RoaringBitmap/roaring" ) @@ -11,6 +13,12 @@ type roaringArray64 struct { copyOnWrite bool } +var ( + ErrEmptyKeys = errors.New("keys were empty") + ErrKeySortOrder = errors.New("keys were out of order") + ErrCardinalityConstraint = errors.New("size of arrays was not coherent") +) + // runOptimize compresses the element containers to minimize space consumed. // Q: how does this interact with copyOnWrite and needCopyOnWrite? // A: since we aren't changing the logical content, just the representation, @@ -140,7 +148,6 @@ func (ra *roaringArray64) clear() { } func (ra *roaringArray64) clone() *roaringArray64 { - sa := roaringArray64{} sa.copyOnWrite = ra.copyOnWrite @@ -401,3 +408,49 @@ func (ra *roaringArray64) serializedSizeInBytes() uint64 { } return answer } + +func (ra *roaringArray64) checkKeysSorted() bool { + if len(ra.keys) == 0 || len(ra.keys) == 1 { + return true + } + previous := ra.keys[0] + for nextIdx := 1; nextIdx < len(ra.keys); nextIdx++ { + next := ra.keys[nextIdx] + if previous >= next { + return false + } + previous = next + + } + return true +} + +// validate checks the referential integrity +// ensures len(keys) == len(containers), recurses and checks each container type +func (ra *roaringArray64) validate() error { + if len(ra.keys) == 0 { + return ErrEmptyKeys + } + + if !ra.checkKeysSorted() { + return ErrKeySortOrder + } + + if len(ra.keys) != len(ra.containers) { + return ErrCardinalityConstraint + } + + if len(ra.keys) != len(ra.needCopyOnWrite) { + return ErrCardinalityConstraint + } + + for _, maps := range ra.containers { + + err := maps.Validate() + if err != nil { + return err + } + } + + return nil +} diff --git a/roaring64/serialization_test.go b/roaring64/serialization_test.go index b77dbc8b..31d0e514 100644 --- a/roaring64/serialization_test.go +++ b/roaring64/serialization_test.go @@ -78,7 +78,7 @@ func TestSerializationBasic037(t *testing.T) { func TestSerializationToFile038(t *testing.T) { rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000) fname := "myfile.bin" - fout, err := os.OpenFile(fname, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0660) + fout, err := os.OpenFile(fname, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o660) if err != nil { fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n") return @@ -233,7 +233,6 @@ func benchmarkUnserializeFunc(b *testing.B, name string, f func(*Bitmap, []byte) } _, err := rb.WriteTo(buf) - if err != nil { b.Fatalf("Unexpected error occurs: %v", err) } @@ -284,7 +283,7 @@ func Test_tryReadFromRoaring32WithRoaring64_File(t *testing.T) { } name := filepath.Join(tempDir, "r32") - if err := ioutil.WriteFile(name, bs, 0600); err != nil { + if err := ioutil.WriteFile(name, bs, 0o600); err != nil { t.Fatal(err) } file, err := os.Open(name)