Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start roaring64 validation #431

Merged
merged 2 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions roaring64/roaring64.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ import (
"github.com/RoaringBitmap/roaring/v2/internal"
)

const serialCookieNoRunContainer = 12346 // only arrays and bitmaps
const serialCookie = 12347 // runs, arrays, and bitmaps
const (
serialCookieNoRunContainer = 12346 // only arrays and bitmaps
serialCookie = 12347 // runs, arrays, and bitmaps
)

// Bitmap represents a compressed bitmap where you can add integers.
type Bitmap struct {
Expand All @@ -25,7 +27,6 @@ func (rb *Bitmap) ToBase64() (string, error) {
buf := new(bytes.Buffer)
_, err := rb.WriteTo(buf)
return base64.StdEncoding.EncodeToString(buf.Bytes()), err

}

// FromBase64 deserializes a bitmap from Base64
Expand All @@ -52,7 +53,6 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
// implementations (Java, Go, C++) and it has a specification :
// https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations
func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {

var n int64
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, uint64(rb.highlowcontainer.size()))
Expand Down Expand Up @@ -1243,6 +1243,10 @@ func (rb *Bitmap) GetSerializedSizeInBytes() uint64 {
return rb.highlowcontainer.serializedSizeInBytes()
}

func (rb *Bitmap) Validate() error {
return rb.highlowcontainer.validate()
}

// Roaring32AsRoaring64 inserts a 32-bit roaring bitmap into
// a 64-bit roaring bitmap. No copy is made.
func Roaring32AsRoaring64(bm32 *roaring.Bitmap) *Bitmap {
Expand Down
77 changes: 72 additions & 5 deletions roaring64/roaring64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func TestRangeRemovalFromContent(t *testing.T) {
bm.RemoveRange(0, 30000)
c := bm.GetCardinality()

assert.EqualValues(t, 00, c)
assert.EqualValues(t, 0o0, c)
}

func TestFlipOnEmpty(t *testing.T) {
Expand Down Expand Up @@ -624,7 +624,6 @@ func TestBitmap(t *testing.T) {

assert.Equal(t, len(arrayres), len(arrayand))
assert.True(t, ok)

})

t.Run("Test AND 4", func(t *testing.T) {
Expand Down Expand Up @@ -1401,6 +1400,7 @@ func TestBitmap(t *testing.T) {
assert.True(t, valide)
})
}

func TestXORtest4(t *testing.T) {
t.Run("XORtest 4", func(t *testing.T) {
rb := NewBitmap()
Expand Down Expand Up @@ -1895,9 +1895,9 @@ func TestSerialization(t *testing.T) {
//assert.Nil(t, err)
//assert.True(t, bufBmp.Equals(bmp))

//var base64 string
//base64, err = bufBmp.ToBase64()
//assert.Nil(t, err)
// var base64 string
// base64, err = bufBmp.ToBase64()
// assert.Nil(t, err)

//base64Bmp := New()
//_, err = base64Bmp.FromBase64(base64)
Expand Down Expand Up @@ -1988,3 +1988,70 @@ func Test32As64(t *testing.T) {
assert.True(t, r32asr64.Equals(r64))
assert.True(t, r64.Equals(r32asr64))
}

func TestRoaringArray64Validation(t *testing.T) {
a := roaringArray64{}

assert.ErrorIs(t, a.validate(), ErrEmptyKeys)

a.keys = append(a.keys, uint32(3), uint32(1))
assert.ErrorIs(t, a.validate(), ErrKeySortOrder)
a.clear()

// build up cardinality coherent arrays
a.keys = append(a.keys, uint32(1), uint32(3), uint32(10))
assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint)
a.containers = append(a.containers, roaring.NewBitmap(), roaring.NewBitmap(), roaring.NewBitmap())
assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint)
a.needCopyOnWrite = append(a.needCopyOnWrite, true, false, true)
assert.Errorf(t, a.validate(), "zero intervals")
}

func TestBitMapValidation(t *testing.T) {
bm := NewBitmap()
bm.AddRange(0, 100)
bm.AddRange(306, 406)
bm.AddRange(102, 202)
bm.AddRange(204, 304)
assert.NoError(t, bm.Validate())

randomEntries := make([]uint64, 0, 1000)
for i := 0; i < 1000; i++ {
randomEntries = append(randomEntries, rand.Uint64())
}

bm.AddMany(randomEntries)
assert.NoError(t, bm.Validate())

randomEntries = make([]uint64, 0, 1000)
for i := 0; i < 1000; i++ {
randomEntries = append(randomEntries, uint64(i))
}
bm.AddMany(randomEntries)
assert.NoError(t, bm.Validate())
}

func TestRoaringArray64SortOrder(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
a := roaringArray64{}
assert.True(t, a.checkKeysSorted())
})
t.Run("Empty", func(t *testing.T) {
a := roaringArray64{}
assert.True(t, a.checkKeysSorted())
})
t.Run("Cardinality 1", func(t *testing.T) {
bm := NewBitmap()
bm.Add(65)

assert.True(t, bm.highlowcontainer.checkKeysSorted())
})

t.Run("Many Entries", func(t *testing.T) {
bm := NewBitmap()
bm.AddRange(1, 129)
bm.AddRange(511, 2049)

assert.True(t, bm.highlowcontainer.checkKeysSorted())
})
}
55 changes: 54 additions & 1 deletion roaring64/roaringarray64.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package roaring64

import (
"errors"

"github.com/RoaringBitmap/roaring/v2"
)

Expand All @@ -11,6 +13,12 @@ type roaringArray64 struct {
copyOnWrite bool
}

var (
ErrEmptyKeys = errors.New("keys were empty")
ErrKeySortOrder = errors.New("keys were out of order")
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
)

// runOptimize compresses the element containers to minimize space consumed.
// Q: how does this interact with copyOnWrite and needCopyOnWrite?
// A: since we aren't changing the logical content, just the representation,
Expand Down Expand Up @@ -140,7 +148,6 @@ func (ra *roaringArray64) clear() {
}

func (ra *roaringArray64) clone() *roaringArray64 {

sa := roaringArray64{}
sa.copyOnWrite = ra.copyOnWrite

Expand Down Expand Up @@ -401,3 +408,49 @@ func (ra *roaringArray64) serializedSizeInBytes() uint64 {
}
return answer
}

func (ra *roaringArray64) checkKeysSorted() bool {
bearrito marked this conversation as resolved.
Show resolved Hide resolved
if len(ra.keys) == 0 || len(ra.keys) == 1 {
return true
}
previous := ra.keys[0]
for nextIdx := 1; nextIdx < len(ra.keys); nextIdx++ {
next := ra.keys[nextIdx]
if previous >= next {
return false
}
previous = next

}
return true
}

// validate checks the referential integrity
// ensures len(keys) == len(containers), recurses and checks each container type
func (ra *roaringArray64) validate() error {
bearrito marked this conversation as resolved.
Show resolved Hide resolved
if len(ra.keys) == 0 {
return ErrEmptyKeys
}

if !ra.checkKeysSorted() {
return ErrKeySortOrder
}

if len(ra.keys) != len(ra.containers) {
return ErrCardinalityConstraint
}

if len(ra.keys) != len(ra.needCopyOnWrite) {
return ErrCardinalityConstraint
}

for _, maps := range ra.containers {

err := maps.Validate()
if err != nil {
return err
}
}

return nil
}
5 changes: 2 additions & 3 deletions roaring64/serialization_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func TestSerializationBasic037(t *testing.T) {
func TestSerializationToFile038(t *testing.T) {
rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000)
fname := "myfile.bin"
fout, err := os.OpenFile(fname, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0660)
fout, err := os.OpenFile(fname, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o660)
if err != nil {
fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n")
return
Expand Down Expand Up @@ -233,7 +233,6 @@ func benchmarkUnserializeFunc(b *testing.B, name string, f func(*Bitmap, []byte)
}

_, err := rb.WriteTo(buf)

if err != nil {
b.Fatalf("Unexpected error occurs: %v", err)
}
Expand Down Expand Up @@ -284,7 +283,7 @@ func Test_tryReadFromRoaring32WithRoaring64_File(t *testing.T) {
}

name := filepath.Join(tempDir, "r32")
if err := ioutil.WriteFile(name, bs, 0600); err != nil {
if err := ioutil.WriteFile(name, bs, 0o600); err != nil {
t.Fatal(err)
}
file, err := os.Open(name)
Expand Down
Loading