From 51cafdeed4cbc4932d8494367c8ba6555fb388f9 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Tue, 10 Oct 2023 18:43:35 -0300 Subject: [PATCH] GH-35344: [Go][Format] Implementation of the LIST_VIEW and LARGE_LIST_VIEW array formats (#37468) ### Rationale for this change Go implementation of #35345. ### What changes are included in this PR? - [x] Add `LIST_VIEW` and `LARGE_LIST_VIEW` to datatype.go - [x] Add `ListView` and `LargeListView` to list.go - [x] Add `ListViewType` and `LargeListViewType` to datatype_nested.go - [x] Add list-view builders - [x] Implement list-view comparison in compare.go - [x] String conversion in both directions - [x] Validation of list-view arrays - [x] Generation of random list-view arrays - [x] Concatenation of list-view arrays in concat.go - [x] JSON serialization/deserialization - [x] Add data used for tests in `arrdata.go` - [x] Add Flatbuffer changes - [x] Add IPC support ### Are these changes tested? Yes. Existing tests are being changed to also cover list-view variations as well as new tests focused solely on the list-view format. ### Are there any user-facing changes? New structs and functions introduced. * Closes: #35344 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Matt Topol --- go/arrow/array/array.go | 2 + go/arrow/array/builder.go | 6 + go/arrow/array/compare.go | 50 + go/arrow/array/concat.go | 171 +++ go/arrow/array/concat_test.go | 21 + go/arrow/array/list.go | 1080 ++++++++++++++++- go/arrow/array/list_test.go | 645 ++++++++-- go/arrow/array/map.go | 4 + go/arrow/datatype.go | 15 +- go/arrow/datatype_nested.go | 147 +++ go/arrow/internal/arrdata/arrdata.go | 82 ++ go/arrow/internal/arrjson/arrjson.go | 132 +- go/arrow/internal/arrjson/arrjson_test.go | 236 +++- .../internal/testing/gen/random_array_gen.go | 151 +++ go/arrow/ipc/file_reader.go | 17 + go/arrow/ipc/metadata.go | 26 + go/arrow/ipc/writer.go | 137 ++- go/arrow/ipc/writer_test.go | 6 +- go/arrow/type_string.go | 8 +- 19 files changed, 2809 insertions(+), 127 deletions(-) diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go index 418f67034583d..1ee04c7aa2bcc 100644 --- a/go/arrow/array/array.go +++ b/go/arrow/array/array.go @@ -176,6 +176,8 @@ func init() { arrow.LARGE_LIST: func(data arrow.ArrayData) arrow.Array { return NewLargeListData(data) }, arrow.INTERVAL_MONTH_DAY_NANO: func(data arrow.ArrayData) arrow.Array { return NewMonthDayNanoIntervalData(data) }, arrow.RUN_END_ENCODED: func(data arrow.ArrayData) arrow.Array { return NewRunEndEncodedData(data) }, + arrow.LIST_VIEW: func(data arrow.ArrayData) arrow.Array { return NewListViewData(data) }, + arrow.LARGE_LIST_VIEW: func(data arrow.ArrayData) arrow.Array { return NewLargeListViewData(data) }, // invalid data types to fill out array to size 2^6 - 1 63: invalidDataType, diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go index 58d4a0f4b8895..2f15ac965e07c 100644 --- a/go/arrow/array/builder.go +++ b/go/arrow/array/builder.go @@ -342,6 +342,12 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder { case arrow.MAP: typ := dtype.(*arrow.MapType) return NewMapBuilderWithType(mem, typ) + case arrow.LIST_VIEW: + typ := dtype.(*arrow.ListViewType) + return NewListViewBuilderWithField(mem, typ.ElemField()) + case arrow.LARGE_LIST_VIEW: + typ := dtype.(*arrow.LargeListViewType) + return NewLargeListViewBuilderWithField(mem, typ.ElemField()) case arrow.EXTENSION: typ := dtype.(arrow.ExtensionType) bldr := NewExtensionBuilder(mem, typ) diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go index 7dca60688d490..e70716bee91a7 100644 --- a/go/arrow/array/compare.go +++ b/go/arrow/array/compare.go @@ -292,6 +292,12 @@ func Equal(left, right arrow.Array) bool { case *LargeList: r := right.(*LargeList) return arrayEqualLargeList(l, r) + case *ListView: + r := right.(*ListView) + return arrayEqualListView(l, r) + case *LargeListView: + r := right.(*LargeListView) + return arrayEqualLargeListView(l, r) case *FixedSizeList: r := right.(*FixedSizeList) return arrayEqualFixedSizeList(l, r) @@ -536,6 +542,12 @@ func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool { case *LargeList: r := right.(*LargeList) return arrayApproxEqualLargeList(l, r, opt) + case *ListView: + r := right.(*ListView) + return arrayApproxEqualListView(l, r, opt) + case *LargeListView: + r := right.(*LargeListView) + return arrayApproxEqualLargeListView(l, r, opt) case *FixedSizeList: r := right.(*FixedSizeList) return arrayApproxEqualFixedSizeList(l, r, opt) @@ -682,6 +694,44 @@ func arrayApproxEqualLargeList(left, right *LargeList, opt equalOption) bool { return true } +func arrayApproxEqualListView(left, right *ListView, opt equalOption) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + o := func() bool { + l := left.newListValue(i) + defer l.Release() + r := right.newListValue(i) + defer r.Release() + return arrayApproxEqual(l, r, opt) + }() + if !o { + return false + } + } + return true +} + +func arrayApproxEqualLargeListView(left, right *LargeListView, opt equalOption) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + o := func() bool { + l := left.newListValue(i) + defer l.Release() + r := right.newListValue(i) + defer r.Release() + return arrayApproxEqual(l, r, opt) + }() + if !o { + return false + } + } + return true +} + func arrayApproxEqualFixedSizeList(left, right *FixedSizeList, opt equalOption) bool { for i := 0; i < left.Len(); i++ { if left.IsNull(i) { diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go index f00a36fec1171..9d815023c4b76 100644 --- a/go/arrow/array/concat.go +++ b/go/arrow/array/concat.go @@ -21,6 +21,7 @@ import ( "fmt" "math" "math/bits" + "unsafe" "github.com/apache/arrow/go/v14/arrow" "github.com/apache/arrow/go/v14/arrow/bitutil" @@ -355,6 +356,164 @@ func concatOffsets(buffers []*memory.Buffer, byteWidth int, mem memory.Allocator } } +func sumArraySizes(data []arrow.ArrayData) int { + outSize := 0 + for _, arr := range data { + outSize += arr.Len() + } + return outSize +} + +func getListViewBufferValues[T int32 | int64](data arrow.ArrayData, i int) []T { + bytes := data.Buffers()[i].Bytes() + base := (*T)(unsafe.Pointer(&bytes[0])) + ret := unsafe.Slice(base, data.Offset()+data.Len()) + return ret[data.Offset():] +} + +func putListViewOffsets32(in arrow.ArrayData, displacement int32, out *memory.Buffer, outOff int) { + debug.Assert(in.DataType().ID() == arrow.LIST_VIEW, "putListViewOffsets32: expected LIST_VIEW data") + inOff, inLen := in.Offset(), in.Len() + if inLen == 0 { + return + } + bitmap := in.Buffers()[0] + srcOffsets := getListViewBufferValues[int32](in, 1) + srcSizes := getListViewBufferValues[int32](in, 2) + isValidAndNonEmpty := func(i int) bool { + return (bitmap == nil || bitutil.BitIsSet(bitmap.Bytes(), inOff+i)) && srcSizes[i] > 0 + } + + dstOffsets := arrow.Int32Traits.CastFromBytes(out.Bytes()) + for i, offset := range srcOffsets { + if isValidAndNonEmpty(i) { + // This is guaranteed by RangeOfValuesUsed returning the smallest offset + // of valid and non-empty list-views. + debug.Assert(offset+displacement >= 0, "putListViewOffsets32: offset underflow while concatenating arrays") + dstOffsets[outOff+i] = offset + displacement + } else { + dstOffsets[outOff+i] = 0 + } + } +} + +func putListViewOffsets64(in arrow.ArrayData, displacement int64, out *memory.Buffer, outOff int) { + debug.Assert(in.DataType().ID() == arrow.LARGE_LIST_VIEW, "putListViewOffsets64: expected LARGE_LIST_VIEW data") + inOff, inLen := in.Offset(), in.Len() + if inLen == 0 { + return + } + bitmap := in.Buffers()[0] + srcOffsets := getListViewBufferValues[int64](in, 1) + srcSizes := getListViewBufferValues[int64](in, 2) + isValidAndNonEmpty := func(i int) bool { + return (bitmap == nil || bitutil.BitIsSet(bitmap.Bytes(), inOff+i)) && srcSizes[i] > 0 + } + + dstOffsets := arrow.Int64Traits.CastFromBytes(out.Bytes()) + for i, offset := range srcOffsets { + if isValidAndNonEmpty(i) { + // This is guaranteed by RangeOfValuesUsed returning the smallest offset + // of valid and non-empty list-views. + debug.Assert(offset+displacement >= 0, "putListViewOffsets64: offset underflow while concatenating arrays") + dstOffsets[outOff+i] = offset + displacement + } else { + dstOffsets[outOff+i] = 0 + } + } +} + +// Concatenate buffers holding list-view offsets into a single buffer of offsets +// +// valueRanges contains the relevant ranges of values in the child array actually +// referenced to by the views. Most commonly, these ranges will start from 0, +// but when that is not the case, we need to adjust the displacement of offsets. +// The concatenated child array does not contain values from the beginning +// if they are not referenced to by any view. +func concatListViewOffsets(data []arrow.ArrayData, byteWidth int, valueRanges []rng, mem memory.Allocator) (*memory.Buffer, error) { + outSize := sumArraySizes(data) + if byteWidth == 4 && outSize > math.MaxInt32 { + return nil, fmt.Errorf("%w: offset overflow while concatenating arrays", arrow.ErrInvalid) + } + out := memory.NewResizableBuffer(mem) + out.Resize(byteWidth * outSize) + + numChildValues, elementsLength := 0, 0 + for i, arr := range data { + displacement := numChildValues - valueRanges[i].offset + if byteWidth == 4 { + putListViewOffsets32(arr, int32(displacement), out, elementsLength) + } else { + putListViewOffsets64(arr, int64(displacement), out, elementsLength) + } + elementsLength += arr.Len() + numChildValues += valueRanges[i].len + } + debug.Assert(elementsLength == outSize, "implementation error") + + return out, nil +} + +func zeroNullListViewSizes[T int32 | int64](data arrow.ArrayData) { + if data.Len() == 0 || data.Buffers()[0] == nil { + return + } + validity := data.Buffers()[0].Bytes() + sizes := getListViewBufferValues[T](data, 2) + + for i := 0; i < data.Len(); i++ { + if !bitutil.BitIsSet(validity, data.Offset()+i) { + sizes[i] = 0 + } + } +} + +func concatListView(data []arrow.ArrayData, offsetType arrow.FixedWidthDataType, out *Data, mem memory.Allocator) (err error) { + // Calculate the ranges of values that each list-view array uses + valueRanges := make([]rng, len(data)) + for i, input := range data { + offset, len := rangeOfValuesUsed(input) + valueRanges[i].offset = offset + valueRanges[i].len = len + } + + // Gather the children ranges of each input array + childData := gatherChildrenRanges(data, 0, valueRanges) + for _, c := range childData { + defer c.Release() + } + + // Concatenate the values + values, err := concat(childData, mem) + if err != nil { + return err + } + + // Concatenate the offsets + offsetBuffer, err := concatListViewOffsets(data, offsetType.Bytes(), valueRanges, mem) + if err != nil { + return err + } + + // Concatenate the sizes + sizeBuffers := gatherBuffersFixedWidthType(data, 2, offsetType) + sizeBuffer := concatBuffers(sizeBuffers, mem) + + out.childData = []arrow.ArrayData{values} + out.buffers[1] = offsetBuffer + out.buffers[2] = sizeBuffer + + // To make sure the sizes don't reference values that are not in the new + // concatenated values array, we zero the sizes of null list-view values. + if offsetType.ID() == arrow.INT32 { + zeroNullListViewSizes[int32](out) + } else { + zeroNullListViewSizes[int64](out) + } + + return nil +} + // concat is the implementation for actually performing the concatenation of the arrow.ArrayData // objects that we can call internally for nested types. func concat(data []arrow.ArrayData, mem memory.Allocator) (arr arrow.ArrayData, err error) { @@ -483,6 +642,18 @@ func concat(data []arrow.ArrayData, mem memory.Allocator) (arr arrow.ArrayData, if err != nil { return nil, err } + case *arrow.ListViewType: + offsetType := arrow.PrimitiveTypes.Int32.(arrow.FixedWidthDataType) + err := concatListView(data, offsetType, out, mem) + if err != nil { + return nil, err + } + case *arrow.LargeListViewType: + offsetType := arrow.PrimitiveTypes.Int64.(arrow.FixedWidthDataType) + err := concatListView(data, offsetType, out, mem) + if err != nil { + return nil, err + } case *arrow.FixedSizeListType: childData := gatherChildrenMultiplier(data, 0, int(dt.Len())) for _, c := range childData { diff --git a/go/arrow/array/concat_test.go b/go/arrow/array/concat_test.go index cc4d29cf42460..c80844f05bacd 100644 --- a/go/arrow/array/concat_test.go +++ b/go/arrow/array/concat_test.go @@ -78,6 +78,8 @@ func TestConcatenate(t *testing.T) { {arrow.BinaryTypes.LargeString}, {arrow.ListOf(arrow.PrimitiveTypes.Int8)}, {arrow.LargeListOf(arrow.PrimitiveTypes.Int8)}, + {arrow.ListViewOf(arrow.PrimitiveTypes.Int8)}, + {arrow.LargeListViewOf(arrow.PrimitiveTypes.Int8)}, {arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Int8)}, {arrow.StructOf()}, {arrow.MapOf(arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8)}, @@ -200,6 +202,16 @@ func (cts *ConcatTestSuite) generateArr(size int64, nullprob float64) arrow.Arra } } return bldr.NewArray() + case arrow.LIST_VIEW: + arr := cts.rng.ListView(cts.dt.(arrow.VarLenListLikeType), size, 0, 20, nullprob) + err := arr.ValidateFull() + cts.NoError(err) + return arr + case arrow.LARGE_LIST_VIEW: + arr := cts.rng.LargeListView(cts.dt.(arrow.VarLenListLikeType), size, 0, 20, nullprob) + err := arr.ValidateFull() + cts.NoError(err) + return arr case arrow.FIXED_SIZE_LIST: const listsize = 3 valuesSize := size * listsize @@ -317,11 +329,20 @@ func (cts *ConcatTestSuite) TestCheckConcat() { slices := cts.slices(arr, offsets) for _, s := range slices { + if s.DataType().ID() == arrow.LIST_VIEW { + err := s.(*array.ListView).ValidateFull() + cts.NoError(err) + } defer s.Release() } actual, err := array.Concatenate(slices, cts.mem) cts.NoError(err) + if arr.DataType().ID() == arrow.LIST_VIEW { + lv := actual.(*array.ListView) + err := lv.ValidateFull() + cts.NoError(err) + } defer actual.Release() cts.Truef(array.Equal(expected, actual), "expected: %s\ngot: %s\n", expected, actual) diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go index 36035dd2f01a8..d8d8b8c76165a 100644 --- a/go/arrow/array/list.go +++ b/go/arrow/array/list.go @@ -19,6 +19,7 @@ package array import ( "bytes" "fmt" + "math" "strings" "sync/atomic" @@ -35,6 +36,10 @@ type ListLike interface { ValueOffsets(i int) (start, end int64) } +type VarLenListLike interface { + ListLike +} + // List represents an immutable sequence of array values. type List struct { array @@ -68,7 +73,7 @@ func (a *List) String() string { if i > 0 { o.WriteString(" ") } - if !a.IsValid(i) { + if a.IsNull(i) { o.WriteString(NullValueStr) continue } @@ -86,6 +91,7 @@ func (a *List) newListValue(i int) arrow.Array { } func (a *List) setData(data *Data) { + debug.Assert(len(data.buffers) >= 2, "list data should have 2 buffers") a.array.setData(data) vals := data.buffers[1] if vals != nil { @@ -199,7 +205,7 @@ func (a *LargeList) String() string { if i > 0 { o.WriteString(" ") } - if !a.IsValid(i) { + if a.IsNull(i) { o.WriteString(NullValueStr) continue } @@ -217,6 +223,7 @@ func (a *LargeList) newListValue(i int) arrow.Array { } func (a *LargeList) setData(data *Data) { + debug.Assert(len(data.buffers) >= 2, "list data should have 2 buffers") a.array.setData(data) vals := data.buffers[1] if vals != nil { @@ -314,6 +321,11 @@ type ListLikeBuilder interface { Append(bool) } +type VarLenListLikeBuilder interface { + ListLikeBuilder + AppendWithSize(bool, int) +} + type ListBuilder struct { baseListBuilder } @@ -422,6 +434,10 @@ func (b *baseListBuilder) Append(v bool) { b.appendNextOffset() } +func (b *baseListBuilder) AppendWithSize(v bool, _ int) { + b.Append(v) +} + func (b *baseListBuilder) AppendNull() { b.Reserve(1) b.unsafeAppendBoolToBitmap(false) @@ -618,19 +634,1055 @@ func (b *baseListBuilder) UnmarshalJSON(data []byte) error { return b.Unmarshal(dec) } +// ListView represents an immutable sequence of array values defined by an +// offset into a child array and a length. +type ListView struct { + array + values arrow.Array + offsets []int32 + sizes []int32 +} + +var _ VarLenListLike = (*ListView)(nil) + +func NewListViewData(data arrow.ArrayData) *ListView { + a := &ListView{} + a.refCount = 1 + a.setData(data.(*Data)) + return a +} + +func (a *ListView) ListValues() arrow.Array { return a.values } + +func (a *ListView) ValueStr(i int) string { + if !a.IsValid(i) { + return NullValueStr + } + return string(a.GetOneForMarshal(i).(json.RawMessage)) +} + +func (a *ListView) String() string { + o := new(strings.Builder) + o.WriteString("[") + for i := 0; i < a.Len(); i++ { + if i > 0 { + o.WriteString(" ") + } + if a.IsNull(i) { + o.WriteString(NullValueStr) + continue + } + sub := a.newListValue(i) + fmt.Fprintf(o, "%v", sub) + sub.Release() + } + o.WriteString("]") + return o.String() +} + +func (a *ListView) newListValue(i int) arrow.Array { + beg, end := a.ValueOffsets(i) + return NewSlice(a.values, beg, end) +} + +func (a *ListView) setData(data *Data) { + debug.Assert(len(data.buffers) >= 3, "list-view data should have 3 buffers") + a.array.setData(data) + offsets := data.buffers[1] + if offsets != nil { + a.offsets = arrow.Int32Traits.CastFromBytes(offsets.Bytes()) + } + sizes := data.buffers[2] + if sizes != nil { + a.sizes = arrow.Int32Traits.CastFromBytes(sizes.Bytes()) + } + a.values = MakeFromData(data.childData[0]) +} + +func (a *ListView) GetOneForMarshal(i int) interface{} { + if a.IsNull(i) { + return nil + } + + slice := a.newListValue(i) + defer slice.Release() + v, err := json.Marshal(slice) + if err != nil { + panic(err) + } + return json.RawMessage(v) +} + +func (a *ListView) MarshalJSON() ([]byte, error) { + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + + buf.WriteByte('[') + for i := 0; i < a.Len(); i++ { + if i != 0 { + buf.WriteByte(',') + } + if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { + return nil, err + } + } + buf.WriteByte(']') + return buf.Bytes(), nil +} + +func arrayEqualListView(left, right *ListView) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + o := func() bool { + l := left.newListValue(i) + defer l.Release() + r := right.newListValue(i) + defer r.Release() + return Equal(l, r) + }() + if !o { + return false + } + } + return true +} + +// Len returns the number of elements in the array. +func (a *ListView) Len() int { return a.array.Len() } + +func (a *ListView) Offsets() []int32 { return a.offsets } + +func (a *ListView) Sizes() []int32 { return a.sizes } + +func (a *ListView) Retain() { + a.array.Retain() + a.values.Retain() +} + +func (a *ListView) Release() { + a.array.Release() + a.values.Release() +} + +func (a *ListView) ValueOffsets(i int) (start, end int64) { + debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") + j := i + a.array.data.offset + size := int64(a.sizes[j]) + // If size is 0, skip accessing offsets. + if size == 0 { + start, end = 0, 0 + return + } + start = int64(a.offsets[j]) + end = start + size + return +} + +// LargeListView represents an immutable sequence of array values defined by an +// offset into a child array and a length. +type LargeListView struct { + array + values arrow.Array + offsets []int64 + sizes []int64 +} + +var _ VarLenListLike = (*LargeListView)(nil) + +// NewLargeListViewData returns a new LargeListView array value, from data. +func NewLargeListViewData(data arrow.ArrayData) *LargeListView { + a := new(LargeListView) + a.refCount = 1 + a.setData(data.(*Data)) + return a +} + +func (a *LargeListView) ListValues() arrow.Array { return a.values } + +func (a *LargeListView) ValueStr(i int) string { + if !a.IsValid(i) { + return NullValueStr + } + return string(a.GetOneForMarshal(i).(json.RawMessage)) +} + +func (a *LargeListView) String() string { + o := new(strings.Builder) + o.WriteString("[") + for i := 0; i < a.Len(); i++ { + if i > 0 { + o.WriteString(" ") + } + if a.IsNull(i) { + o.WriteString(NullValueStr) + continue + } + sub := a.newListValue(i) + fmt.Fprintf(o, "%v", sub) + sub.Release() + } + o.WriteString("]") + return o.String() +} + +func (a *LargeListView) newListValue(i int) arrow.Array { + beg, end := a.ValueOffsets(i) + return NewSlice(a.values, beg, end) +} + +func (a *LargeListView) setData(data *Data) { + debug.Assert(len(data.buffers) >= 3, "list-view data should have 3 buffers") + a.array.setData(data) + offsets := data.buffers[1] + if offsets != nil { + a.offsets = arrow.Int64Traits.CastFromBytes(offsets.Bytes()) + } + sizes := data.buffers[2] + if sizes != nil { + a.sizes = arrow.Int64Traits.CastFromBytes(sizes.Bytes()) + } + a.values = MakeFromData(data.childData[0]) +} + +func (a *LargeListView) GetOneForMarshal(i int) interface{} { + if a.IsNull(i) { + return nil + } + + slice := a.newListValue(i) + defer slice.Release() + v, err := json.Marshal(slice) + if err != nil { + panic(err) + } + return json.RawMessage(v) +} + +func (a *LargeListView) MarshalJSON() ([]byte, error) { + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + + buf.WriteByte('[') + for i := 0; i < a.Len(); i++ { + if i != 0 { + buf.WriteByte(',') + } + if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { + return nil, err + } + } + buf.WriteByte(']') + return buf.Bytes(), nil +} + +func arrayEqualLargeListView(left, right *LargeListView) bool { + for i := 0; i < left.Len(); i++ { + if left.IsNull(i) { + continue + } + o := func() bool { + l := left.newListValue(i) + defer l.Release() + r := right.newListValue(i) + defer r.Release() + return Equal(l, r) + }() + if !o { + return false + } + } + return true +} + +// Len returns the number of elements in the array. +func (a *LargeListView) Len() int { return a.array.Len() } + +func (a *LargeListView) Offsets() []int64 { return a.offsets } + +func (a *LargeListView) Sizes() []int64 { return a.sizes } + +func (a *LargeListView) ValueOffsets(i int) (start, end int64) { + debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") + j := i + a.array.data.offset + size := a.sizes[j] + // If size is 0, skip accessing offsets. + if size == 0 { + return 0, 0 + } + start = a.offsets[j] + end = start + size + return +} + +func (a *LargeListView) Retain() { + a.array.Retain() + a.values.Retain() +} + +func (a *LargeListView) Release() { + a.array.Release() + a.values.Release() +} + +// Acessors for offsets and sizes to make ListView and LargeListView validation generic. +type offsetsAndSizes interface { + offsetAt(slot int64) int64 + sizeAt(slot int64) int64 +} + +var _ offsetsAndSizes = (*ListView)(nil) +var _ offsetsAndSizes = (*LargeListView)(nil) + +func (a *ListView) offsetAt(slot int64) int64 { return int64(a.offsets[int64(a.data.offset)+slot]) } + +func (a *ListView) sizeAt(slot int64) int64 { return int64(a.sizes[int64(a.data.offset)+slot]) } + +func (a *LargeListView) offsetAt(slot int64) int64 { return a.offsets[int64(a.data.offset)+slot] } + +func (a *LargeListView) sizeAt(slot int64) int64 { return a.sizes[int64(a.data.offset)+slot] } + +func outOfBoundsListViewOffset(l offsetsAndSizes, slot int64, offsetLimit int64) error { + offset := l.offsetAt(slot) + return fmt.Errorf("%w: Offset invariant failure: offset for slot %d out of bounds. Expected %d to be at least 0 and less than %d", arrow.ErrInvalid, slot, offset, offsetLimit) +} + +func outOfBoundsListViewSize(l offsetsAndSizes, slot int64, offsetLimit int64) error { + size := l.sizeAt(slot) + if size < 0 { + return fmt.Errorf("%w: Offset invariant failure: size for slot %d out of bounds: %d < 0", arrow.ErrInvalid, slot, size) + } + offset := l.offsetAt(slot) + return fmt.Errorf("%w: Offset invariant failure: size for slot %d out of bounds: %d + %d > %d", arrow.ErrInvalid, slot, offset, size, offsetLimit) +} + +// Pre-condition: Basic validation has already been performed +func (a *array) fullyValidateOffsetsAndSizes(l offsetsAndSizes, offsetLimit int64) error { + for slot := int64(0); slot < int64(a.Len()); slot += 1 { + size := l.sizeAt(slot) + if size > 0 { + offset := l.offsetAt(slot) + if offset < 0 || offset > offsetLimit { + return outOfBoundsListViewOffset(l, slot, offsetLimit) + } + if size > offsetLimit-int64(offset) { + return outOfBoundsListViewSize(l, slot, offsetLimit) + } + } else if size < 0 { + return outOfBoundsListViewSize(l, slot, offsetLimit) + } + } + + return nil +} + +func (a *array) validateOffsetsAndMaybeSizes(l offsetsAndSizes, offsetByteWidth int, isListView bool, offsetLimit int64, fullValidation bool) error { + nonEmpty := a.Len() > 0 + if a.data.buffers[1] == nil { + // For length 0, an empty offsets buffer is accepted (ARROW-544). + if nonEmpty { + return fmt.Errorf("non-empty array but offsets are null") + } + return nil + } + if isListView && a.data.buffers[2] == nil { + if nonEmpty { + return fmt.Errorf("non-empty array but sizes are null") + } + return nil + } + + var requiredOffsets int + if nonEmpty { + requiredOffsets = a.Len() + a.Offset() + if !isListView { + requiredOffsets += 1 + } + } else { + requiredOffsets = 0 + } + offsetsByteSize := a.data.buffers[1].Len() + if offsetsByteSize/offsetByteWidth < requiredOffsets { + return fmt.Errorf("offsets buffer size (bytes): %d isn't large enough for length: %d and offset: %d", + offsetsByteSize, a.Len(), a.Offset()) + } + if isListView { + requiredSizes := a.Len() + a.Offset() + sizesBytesSize := a.data.buffers[2].Len() + if sizesBytesSize/offsetByteWidth < requiredSizes { + return fmt.Errorf("sizes buffer size (bytes): %d isn't large enough for length: %d and offset: %d", + sizesBytesSize, a.Len(), a.Offset()) + } + } + + if fullValidation && requiredOffsets > 0 { + if isListView { + return a.fullyValidateOffsetsAndSizes(l, offsetLimit) + } + // TODO: implement validation of List and LargeList + // return fullyValidateOffsets(offset_limit) + return nil + } + return nil +} + +func (a *ListView) validate(fullValidation bool) error { + values := a.array.data.childData[0] + offsetLimit := values.Len() + return a.array.validateOffsetsAndMaybeSizes(a, 4, true, int64(offsetLimit), fullValidation) +} + +func (a *ListView) Validate() error { + return a.validate(false) +} + +func (a *ListView) ValidateFull() error { + return a.validate(true) +} + +func (a *LargeListView) validate(fullValidation bool) error { + values := a.array.data.childData[0] + offsetLimit := values.Len() + return a.array.validateOffsetsAndMaybeSizes(a, 8, true, int64(offsetLimit), fullValidation) +} + +func (a *LargeListView) Validate() error { + return a.validate(false) +} + +func (a *LargeListView) ValidateFull() error { + return a.validate(true) +} + +type baseListViewBuilder struct { + builder + + values Builder // value builder for the list-view's elements. + offsets Builder + sizes Builder + + // actual list-view type + dt arrow.DataType + appendOffsetVal func(int) + appendSizeVal func(int) +} + +type ListViewBuilder struct { + baseListViewBuilder +} + +type LargeListViewBuilder struct { + baseListViewBuilder +} + +// NewListViewBuilder returns a builder, using the provided memory allocator. +// The created list-view builder will create a list whose elements will be +// of type etype. +func NewListViewBuilder(mem memory.Allocator, etype arrow.DataType) *ListViewBuilder { + offsetBldr := NewInt32Builder(mem) + sizeBldr := NewInt32Builder(mem) + return &ListViewBuilder{ + baseListViewBuilder{ + builder: builder{refCount: 1, mem: mem}, + values: NewBuilder(mem, etype), + offsets: offsetBldr, + sizes: sizeBldr, + dt: arrow.ListViewOf(etype), + appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, + appendSizeVal: func(s int) { sizeBldr.Append(int32(s)) }, + }, + } +} + +// NewListViewBuilderWithField takes a field to use for the child rather than just +// a datatype to allow for more customization. +func NewListViewBuilderWithField(mem memory.Allocator, field arrow.Field) *ListViewBuilder { + offsetBldr := NewInt32Builder(mem) + sizeBldr := NewInt32Builder(mem) + return &ListViewBuilder{ + baseListViewBuilder{ + builder: builder{refCount: 1, mem: mem}, + values: NewBuilder(mem, field.Type), + offsets: offsetBldr, + sizes: sizeBldr, + dt: arrow.ListViewOfField(field), + appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, + appendSizeVal: func(s int) { sizeBldr.Append(int32(s)) }, + }, + } +} + +func (b *baseListViewBuilder) Type() arrow.DataType { + switch dt := b.dt.(type) { + case *arrow.ListViewType: + f := dt.ElemField() + f.Type = b.values.Type() + return arrow.ListViewOfField(f) + case *arrow.LargeListViewType: + f := dt.ElemField() + f.Type = b.values.Type() + return arrow.LargeListViewOfField(f) + } + return nil +} + +// NewLargeListViewBuilder returns a builder, using the provided memory allocator. +// The created list-view builder will create a list whose elements will be of type etype. +func NewLargeListViewBuilder(mem memory.Allocator, etype arrow.DataType) *LargeListViewBuilder { + offsetBldr := NewInt64Builder(mem) + sizeBldr := NewInt64Builder(mem) + return &LargeListViewBuilder{ + baseListViewBuilder{ + builder: builder{refCount: 1, mem: mem}, + values: NewBuilder(mem, etype), + offsets: offsetBldr, + sizes: sizeBldr, + dt: arrow.LargeListViewOf(etype), + appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, + appendSizeVal: func(s int) { sizeBldr.Append(int64(s)) }, + }, + } +} + +// NewLargeListViewBuilderWithField takes a field rather than just an element type +// to allow for more customization of the final type of the LargeListView Array +func NewLargeListViewBuilderWithField(mem memory.Allocator, field arrow.Field) *LargeListViewBuilder { + offsetBldr := NewInt64Builder(mem) + sizeBldr := NewInt64Builder(mem) + return &LargeListViewBuilder{ + baseListViewBuilder{ + builder: builder{refCount: 1, mem: mem}, + values: NewBuilder(mem, field.Type), + offsets: offsetBldr, + sizes: sizeBldr, + dt: arrow.LargeListViewOfField(field), + appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, + appendSizeVal: func(o int) { sizeBldr.Append(int64(o)) }, + }, + } +} + +// Release decreases the reference count by 1. +// When the reference count goes to zero, the memory is freed. +func (b *baseListViewBuilder) Release() { + debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") + + if atomic.AddInt64(&b.refCount, -1) == 0 { + if b.nullBitmap != nil { + b.nullBitmap.Release() + b.nullBitmap = nil + } + b.values.Release() + b.offsets.Release() + b.sizes.Release() + } +} + +func (b *baseListViewBuilder) AppendDimensions(offset int, listSize int) { + b.Reserve(1) + b.unsafeAppendBoolToBitmap(true) + b.appendOffsetVal(offset) + b.appendSizeVal(listSize) +} + +func (b *baseListViewBuilder) Append(v bool) { + debug.Assert(false, "baseListViewBuilder.Append should never be called -- use AppendWithSize instead") +} + +func (b *baseListViewBuilder) AppendWithSize(v bool, listSize int) { + debug.Assert(v || listSize == 0, "invalid list-view should have size 0") + b.Reserve(1) + b.unsafeAppendBoolToBitmap(v) + b.appendOffsetVal(b.values.Len()) + b.appendSizeVal(listSize) +} + +func (b *baseListViewBuilder) AppendNull() { + b.AppendWithSize(false, 0) +} + +func (b *baseListViewBuilder) AppendNulls(n int) { + for i := 0; i < n; i++ { + b.AppendNull() + } +} + +func (b *baseListViewBuilder) AppendEmptyValue() { + b.AppendWithSize(true, 0) +} + +func (b *baseListViewBuilder) AppendEmptyValues(n int) { + for i := 0; i < n; i++ { + b.AppendEmptyValue() + } +} + +func (b *ListViewBuilder) AppendValuesWithSizes(offsets []int32, sizes []int32, valid []bool) { + b.Reserve(len(valid)) + b.offsets.(*Int32Builder).AppendValues(offsets, nil) + b.sizes.(*Int32Builder).AppendValues(sizes, nil) + b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) +} + +func (b *LargeListViewBuilder) AppendValuesWithSizes(offsets []int64, sizes []int64, valid []bool) { + b.Reserve(len(valid)) + b.offsets.(*Int64Builder).AppendValues(offsets, nil) + b.sizes.(*Int64Builder).AppendValues(sizes, nil) + b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) +} + +func (b *baseListViewBuilder) unsafeAppendBoolToBitmap(isValid bool) { + if isValid { + bitutil.SetBit(b.nullBitmap.Bytes(), b.length) + } else { + b.nulls++ + } + b.length++ +} + +func (b *baseListViewBuilder) init(capacity int) { + b.builder.init(capacity) + b.offsets.init(capacity) + b.sizes.init(capacity) +} + +// Reserve ensures there is enough space for appending n elements +// by checking the capacity and calling Resize if necessary. +func (b *baseListViewBuilder) Reserve(n int) { + b.builder.reserve(n, b.resizeHelper) + b.offsets.Reserve(n) + b.sizes.Reserve(n) +} + +// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), +// additional memory will be allocated. If n is smaller, the allocated memory may reduced. +func (b *baseListViewBuilder) Resize(n int) { + b.resizeHelper(n) + b.offsets.Resize(n) + b.sizes.Resize(n) +} + +func (b *baseListViewBuilder) resizeHelper(n int) { + if n < minBuilderCapacity { + n = minBuilderCapacity + } + + if b.capacity == 0 { + b.init(n) + } else { + b.builder.resize(n, b.builder.init) + } +} + +func (b *baseListViewBuilder) ValueBuilder() Builder { + return b.values +} + +// NewArray creates a ListView array from the memory buffers used by the builder and +// resets the ListViewBuilder so it can be used to build a new array. +func (b *ListViewBuilder) NewArray() arrow.Array { + return b.NewListViewArray() +} + +// NewArray creates a LargeListView array from the memory buffers used by the builder +// and resets the LargeListViewBuilder so it can be used to build a new array. +func (b *LargeListViewBuilder) NewArray() arrow.Array { + return b.NewLargeListViewArray() +} + +// NewListViewArray creates a ListView array from the memory buffers used by the builder +// and resets the ListViewBuilder so it can be used to build a new array. +func (b *ListViewBuilder) NewListViewArray() (a *ListView) { + data := b.newData() + a = NewListViewData(data) + data.Release() + return +} + +// NewLargeListViewArray creates a ListView array from the memory buffers used by the +// builder and resets the LargeListViewBuilder so it can be used to build a new array. +func (b *LargeListViewBuilder) NewLargeListViewArray() (a *LargeListView) { + data := b.newData() + a = NewLargeListViewData(data) + data.Release() + return +} + +func (b *baseListViewBuilder) newData() (data *Data) { + values := b.values.NewArray() + defer values.Release() + + var offsets *memory.Buffer + if b.offsets != nil { + arr := b.offsets.NewArray() + defer arr.Release() + offsets = arr.Data().Buffers()[1] + } + + var sizes *memory.Buffer + if b.sizes != nil { + arr := b.sizes.NewArray() + defer arr.Release() + sizes = arr.Data().Buffers()[1] + } + + data = NewData( + b.Type(), b.length, + []*memory.Buffer{ + b.nullBitmap, + offsets, + sizes, + }, + []arrow.ArrayData{values.Data()}, + b.nulls, + 0, + ) + b.reset() + + return +} + +func (b *baseListViewBuilder) AppendValueFromString(s string) error { + if s == NullValueStr { + b.AppendNull() + return nil + } + + return b.UnmarshalOne(json.NewDecoder(strings.NewReader(s))) +} + +func (b *baseListViewBuilder) UnmarshalOne(dec *json.Decoder) error { + t, err := dec.Token() + if err != nil { + return err + } + + switch t { + case json.Delim('['): + offset := b.values.Len() + // 0 is a placeholder size as we don't know the actual size yet + b.AppendWithSize(true, 0) + if err := b.values.Unmarshal(dec); err != nil { + return err + } + // consume ']' + _, err := dec.Token() + // replace the last size with the actual size + switch b.sizes.(type) { + case *Int32Builder: + b.sizes.(*Int32Builder).rawData[b.sizes.Len()-1] = int32(b.values.Len() - offset) + case *Int64Builder: + b.sizes.(*Int64Builder).rawData[b.sizes.Len()-1] = int64(b.values.Len() - offset) + } + return err + case nil: + b.AppendNull() + default: + return &json.UnmarshalTypeError{ + Value: fmt.Sprint(t), + Struct: b.dt.String(), + } + } + + return nil +} + +func (b *baseListViewBuilder) Unmarshal(dec *json.Decoder) error { + for dec.More() { + if err := b.UnmarshalOne(dec); err != nil { + return err + } + } + return nil +} + +func (b *baseListViewBuilder) UnmarshalJSON(data []byte) error { + dec := json.NewDecoder(bytes.NewReader(data)) + t, err := dec.Token() + if err != nil { + return err + } + + if delim, ok := t.(json.Delim); !ok || delim != '[' { + return fmt.Errorf("list-view builder must unpack from json array, found %s", delim) + } + + return b.Unmarshal(dec) +} + +// Pre-conditions: +// +// input.DataType() is ListViewType +// input.Len() > 0 && input.NullN() != input.Len() +func minListViewOffset32(input arrow.ArrayData) int32 { + var bitmap []byte + if input.Buffers()[0] != nil { + bitmap = input.Buffers()[0].Bytes() + } + offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] + sizes := arrow.Int32Traits.CastFromBytes(input.Buffers()[2].Bytes())[input.Offset():] + + isNull := func(i int) bool { + return bitmap != nil && bitutil.BitIsNotSet(bitmap, input.Offset()+i) + } + + // It's very likely that the first non-null non-empty list-view starts at + // offset 0 of the child array. + i := 0 + for i < input.Len() && (isNull(i) || sizes[i] == 0) { + i += 1 + } + if i >= input.Len() { + return 0 + } + minOffset := offsets[i] + if minOffset == 0 { + // early exit: offset 0 found already + return 0 + } + + // Slow path: scan the buffers entirely. + i += 1 + for ; i < input.Len(); i += 1 { + if isNull(i) { + continue + } + offset := offsets[i] + if offset < minOffset && sizes[i] > 0 { + minOffset = offset + } + } + return minOffset +} + +// Find the maximum offset+size in a LIST_VIEW array. +// +// Pre-conditions: +// +// input.DataType() is ListViewType +// input.Len() > 0 && input.NullN() != input.Len() +func maxListViewOffset32(input arrow.ArrayData) int { + inputOffset := input.Offset() + var bitmap []byte + if input.Buffers()[0] != nil { + bitmap = input.Buffers()[0].Bytes() + } + offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[inputOffset:] + sizes := arrow.Int32Traits.CastFromBytes(input.Buffers()[2].Bytes())[inputOffset:] + + isNull := func(i int) bool { + return bitmap != nil && bitutil.BitIsNotSet(bitmap, inputOffset+i) + } + + i := input.Len() - 1 // safe because input.Len() > 0 + for i != 0 && (isNull(i) || sizes[i] == 0) { + i -= 1 + } + offset := offsets[i] + size := sizes[i] + if i == 0 { + if isNull(i) || sizes[i] == 0 { + return 0 + } else { + return int(offset + size) + } + } + + values := input.Children()[0] + maxEnd := int(offsets[i] + sizes[i]) + if maxEnd == values.Len() { + // Early-exit: maximum possible view-end found already. + return maxEnd + } + + // Slow path: scan the buffers entirely. + for ; i >= 0; i -= 1 { + offset := offsets[i] + size := sizes[i] + if size > 0 && !isNull(i) { + if int(offset+size) > maxEnd { + maxEnd = int(offset + size) + if maxEnd == values.Len() { + return maxEnd + } + } + } + } + return maxEnd +} + +// Pre-conditions: +// +// input.DataType() is LargeListViewType +// input.Len() > 0 && input.NullN() != input.Len() +func minLargeListViewOffset64(input arrow.ArrayData) int64 { + var bitmap []byte + if input.Buffers()[0] != nil { + bitmap = input.Buffers()[0].Bytes() + } + offsets := arrow.Int64Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] + sizes := arrow.Int64Traits.CastFromBytes(input.Buffers()[2].Bytes())[input.Offset():] + + isNull := func(i int) bool { + return bitmap != nil && bitutil.BitIsNotSet(bitmap, input.Offset()+i) + } + + // It's very likely that the first non-null non-empty list-view starts at + // offset 0 of the child array. + i := 0 + for i < input.Len() && (isNull(i) || sizes[i] == 0) { + i += 1 + } + if i >= input.Len() { + return 0 + } + minOffset := offsets[i] + if minOffset == 0 { + // early exit: offset 0 found already + return 0 + } + + // Slow path: scan the buffers entirely. + i += 1 + for ; i < input.Len(); i += 1 { + if isNull(i) { + continue + } + offset := offsets[i] + if offset < minOffset && sizes[i] > 0 { + minOffset = offset + } + } + return minOffset +} + +// Find the maximum offset+size in a LARGE_LIST_VIEW array. +// +// Pre-conditions: +// +// input.DataType() is LargeListViewType +// input.Len() > 0 && input.NullN() != input.Len() +func maxLargeListViewOffset64(input arrow.ArrayData) int64 { + inputOffset := input.Offset() + var bitmap []byte + if input.Buffers()[0] != nil { + bitmap = input.Buffers()[0].Bytes() + } + offsets := arrow.Int64Traits.CastFromBytes(input.Buffers()[1].Bytes())[inputOffset:] + sizes := arrow.Int64Traits.CastFromBytes(input.Buffers()[2].Bytes())[inputOffset:] + + isNull := func(i int) bool { + return bitmap != nil && bitutil.BitIsNotSet(bitmap, inputOffset+i) + } + + // It's very likely that the first non-null non-empty list-view starts at + // offset zero, so we check that first and potentially early-return a 0. + i := input.Len() - 1 // safe because input.Len() > 0 + for i != 0 && (isNull(i) || sizes[i] == 0) { + i -= 1 + } + offset := offsets[i] + size := sizes[i] + if i == 0 { + if isNull(i) || sizes[i] == 0 { + return 0 + } else { + return offset + size + } + } + + if offset > math.MaxInt64-size { + // Early-exit: 64-bit overflow detected. This is not possible on a + // valid list-view, but we return the maximum possible value to + // avoid undefined behavior. + return math.MaxInt64 + } + values := input.Children()[0] + maxEnd := offsets[i] + sizes[i] + if maxEnd == int64(values.Len()) { + // Early-exit: maximum possible view-end found already. + return maxEnd + } + + // Slow path: scan the buffers entirely. + for ; i >= 0; i -= 1 { + offset := offsets[i] + size := sizes[i] + if size > 0 && !isNull(i) { + if offset+size > maxEnd { + if offset > math.MaxInt64-size { + // 64-bit overflow detected. This is not possible on a valid list-view, + // but we saturate maxEnd to the maximum possible value to avoid + // undefined behavior. + return math.MaxInt64 + } + maxEnd = offset + size + if maxEnd == int64(values.Len()) { + return maxEnd + } + } + } + } + return maxEnd +} + +func rangeOfValuesUsed(input arrow.ArrayData) (int, int) { + if input.Len() == 0 || input.NullN() == input.Len() { + return 0, 0 + } + var minOffset, maxEnd int + switch input.DataType().(type) { + case *arrow.ListViewType: + minOffset = int(minListViewOffset32(input)) + maxEnd = maxListViewOffset32(input) + case *arrow.LargeListViewType: + minOffset = int(minLargeListViewOffset64(input)) + maxEnd = int(maxLargeListViewOffset64(input)) + case *arrow.ListType: + offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] + minOffset = int(offsets[0]) + maxEnd = int(offsets[len(offsets)-1]) + case *arrow.LargeListType: + offsets := arrow.Int64Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] + minOffset = int(offsets[0]) + maxEnd = int(offsets[len(offsets)-1]) + case *arrow.MapType: + offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] + minOffset = int(offsets[0]) + maxEnd = int(offsets[len(offsets)-1]) + } + return minOffset, maxEnd - minOffset +} + +// Returns the smallest contiguous range of values of the child array that are +// referenced by all the list values in the input array. +func RangeOfValuesUsed(input VarLenListLike) (int, int) { + return rangeOfValuesUsed(input.Data()) +} + var ( _ arrow.Array = (*List)(nil) _ arrow.Array = (*LargeList)(nil) - _ Builder = (*ListBuilder)(nil) - _ Builder = (*LargeListBuilder)(nil) - - _ ListLike = (*List)(nil) - _ ListLike = (*LargeList)(nil) - _ ListLike = (*FixedSizeList)(nil) - _ ListLike = (*Map)(nil) - - _ ListLikeBuilder = (*ListBuilder)(nil) - _ ListLikeBuilder = (*LargeListBuilder)(nil) - _ ListLikeBuilder = (*FixedSizeListBuilder)(nil) - _ ListLikeBuilder = (*MapBuilder)(nil) + _ arrow.Array = (*ListView)(nil) + _ arrow.Array = (*LargeListView)(nil) + + _ Builder = (*ListBuilder)(nil) + _ Builder = (*LargeListBuilder)(nil) + _ Builder = (*ListViewBuilder)(nil) + _ Builder = (*LargeListViewBuilder)(nil) + + _ VarLenListLike = (*List)(nil) + _ VarLenListLike = (*LargeList)(nil) + _ VarLenListLike = (*Map)(nil) + _ VarLenListLike = (*ListView)(nil) + _ VarLenListLike = (*LargeListView)(nil) + _ ListLike = (*FixedSizeList)(nil) + + _ VarLenListLikeBuilder = (*ListBuilder)(nil) + _ VarLenListLikeBuilder = (*LargeListBuilder)(nil) + _ VarLenListLikeBuilder = (*ListBuilder)(nil) + _ VarLenListLikeBuilder = (*LargeListBuilder)(nil) + _ VarLenListLikeBuilder = (*MapBuilder)(nil) + _ ListLikeBuilder = (*FixedSizeListBuilder)(nil) ) diff --git a/go/arrow/array/list_test.go b/go/arrow/array/list_test.go index 9f193fe19aabd..bf3555b3f6603 100644 --- a/go/arrow/array/list_test.go +++ b/go/arrow/array/list_test.go @@ -30,12 +30,15 @@ func TestListArray(t *testing.T) { tests := []struct { typeID arrow.Type offsets interface{} + sizes interface{} dt arrow.DataType }{ - {arrow.LIST, []int32{0, 3, 3, 3, 7}, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LIST, []int32{0, 3, 3, 3, 7}, arrow.ListOfField(arrow.Field{Name: "item", Type: arrow.PrimitiveTypes.Int32, Nullable: true})}, - {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, arrow.LargeListOfField(arrow.Field{Name: "item", Type: arrow.PrimitiveTypes.Int32, Nullable: true})}, + {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOfField(arrow.Field{Name: "item", Type: arrow.PrimitiveTypes.Int32, Nullable: true})}, + {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOfField(arrow.Field{Name: "item", Type: arrow.PrimitiveTypes.Int32, Nullable: true})}, + {arrow.LIST_VIEW, []int32{0, 3, 3, 3}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LARGE_LIST_VIEW, []int64{0, 3, 3, 3}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, } for _, tt := range tests { @@ -49,7 +52,7 @@ func TestListArray(t *testing.T) { isValid = []bool{true, false, true, true} ) - lb := array.NewBuilder(pool, tt.dt).(array.ListLikeBuilder) + lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) defer lb.Release() for i := 0; i < 10; i++ { @@ -58,7 +61,7 @@ func TestListArray(t *testing.T) { pos := 0 for i, length := range lengths { - lb.Append(isValid[i]) + lb.AppendWithSize(isValid[i], length) for j := 0; j < length; j++ { vb.Append(vs[pos]) pos++ @@ -88,18 +91,32 @@ func TestListArray(t *testing.T) { } } - var got interface{} + var gotOffsets, gotSizes interface{} switch tt.typeID { case arrow.LIST: arr := arr.(*array.List) - got = arr.Offsets() + gotOffsets = arr.Offsets() case arrow.LARGE_LIST: arr := arr.(*array.LargeList) - got = arr.Offsets() + gotOffsets = arr.Offsets() + case arrow.LIST_VIEW: + arr := arr.(*array.ListView) + gotOffsets = arr.Offsets() + gotSizes = arr.Sizes() + case arrow.LARGE_LIST_VIEW: + arr := arr.(*array.LargeListView) + gotOffsets = arr.Offsets() + gotSizes = arr.Sizes() } - if !reflect.DeepEqual(got, tt.offsets) { - t.Fatalf("got=%v, want=%v", got, tt.offsets) + if !reflect.DeepEqual(gotOffsets, tt.offsets) { + t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) + } + + if tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW { + if !reflect.DeepEqual(gotSizes, tt.sizes) { + t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) + } } varr := arr.ListValues().(*array.Int32) @@ -109,13 +126,110 @@ func TestListArray(t *testing.T) { } }) } +} + +// Like the list-view tests in TestListArray, but with out-of-order offsets. +func TestListViewArray(t *testing.T) { + tests := []struct { + typeID arrow.Type + offsets interface{} + sizes interface{} + dt arrow.DataType + }{ + {arrow.LIST_VIEW, []int32{5, 0, 0, 1}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LARGE_LIST_VIEW, []int64{5, 0, 0, 1}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, + } + + for _, tt := range tests { + t.Run(tt.typeID.String(), func(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + var ( + vs = []int32{-1, 3, 4, 5, 6, 0, 1, 2} + lengths = []int{3, 0, 0, 4} + isValid = []bool{true, false, true, true} + ) + + lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) + defer lb.Release() + + for i := 0; i < 10; i++ { + switch lvb := lb.(type) { + case *array.ListViewBuilder: + lvb.AppendDimensions(5, 3) + lb.AppendNull() + lvb.AppendDimensions(0, 0) + lvb.AppendDimensions(1, 4) + case *array.LargeListViewBuilder: + lvb.AppendDimensions(5, 3) + lb.AppendNull() + lvb.AppendDimensions(0, 0) + lvb.AppendDimensions(1, 4) + } + + vb := lb.ValueBuilder().(*array.Int32Builder) + vb.Reserve(len(vs)) + vb.AppendValues(vs, []bool{false, true, true, true, true, true, true, true}) + + arr := lb.NewArray().(array.ListLike) + defer arr.Release() + + arr.Retain() + arr.Release() + + if got, want := arr.DataType().ID(), tt.typeID; got != want { + t.Fatalf("got=%v, want=%v", got, want) + } + + if got, want := arr.Len(), len(isValid); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + for i := range lengths { + if got, want := arr.IsValid(i), isValid[i]; got != want { + t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + } + if got, want := arr.IsNull(i), !isValid[i]; got != want { + t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + } + } + + var gotOffsets, gotSizes interface{} + switch tt.typeID { + case arrow.LIST_VIEW: + arr := arr.(*array.ListView) + gotOffsets = arr.Offsets() + gotSizes = arr.Sizes() + case arrow.LARGE_LIST_VIEW: + arr := arr.(*array.LargeListView) + gotOffsets = arr.Offsets() + gotSizes = arr.Sizes() + } + + if !reflect.DeepEqual(gotOffsets, tt.offsets) { + t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) + } + + if !reflect.DeepEqual(gotSizes, tt.sizes) { + t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) + } + + varr := arr.ListValues().(*array.Int32) + if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } + } + }) + } } func TestListArrayEmpty(t *testing.T) { typ := []arrow.DataType{ arrow.ListOf(arrow.PrimitiveTypes.Int32), arrow.LargeListOf(arrow.PrimitiveTypes.Int32), + arrow.ListViewOf(arrow.PrimitiveTypes.Int32), + arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32), } for _, dt := range typ { @@ -138,10 +252,13 @@ func TestListArrayBulkAppend(t *testing.T) { tests := []struct { typeID arrow.Type offsets interface{} + sizes interface{} dt arrow.DataType }{ - {arrow.LIST, []int32{0, 3, 3, 3, 7}, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LIST_VIEW, []int32{0, 3, 3, 3}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LARGE_LIST_VIEW, []int64{0, 3, 3, 3}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, } for _, tt := range tests { @@ -155,7 +272,7 @@ func TestListArrayBulkAppend(t *testing.T) { isValid = []bool{true, false, true, true} ) - lb := array.NewBuilder(pool, tt.dt).(array.ListLikeBuilder) + lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) defer lb.Release() vb := lb.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(vs)) @@ -165,12 +282,16 @@ func TestListArrayBulkAppend(t *testing.T) { lb.(*array.ListBuilder).AppendValues(tt.offsets.([]int32), isValid) case arrow.LARGE_LIST: lb.(*array.LargeListBuilder).AppendValues(tt.offsets.([]int64), isValid) + case arrow.LIST_VIEW: + lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) + case arrow.LARGE_LIST_VIEW: + lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) } for _, v := range vs { vb.Append(v) } - arr := lb.NewArray().(array.ListLike) + arr := lb.NewArray().(array.VarLenListLike) defer arr.Release() if got, want := arr.DataType().ID(), tt.typeID; got != want { @@ -190,18 +311,115 @@ func TestListArrayBulkAppend(t *testing.T) { } } - var got interface{} + var gotOffsets, gotSizes interface{} switch tt.typeID { case arrow.LIST: arr := arr.(*array.List) - got = arr.Offsets() + gotOffsets = arr.Offsets() case arrow.LARGE_LIST: arr := arr.(*array.LargeList) - got = arr.Offsets() + gotOffsets = arr.Offsets() + case arrow.LIST_VIEW: + arr := arr.(*array.ListView) + gotOffsets = arr.Offsets() + gotSizes = arr.Sizes() + case arrow.LARGE_LIST_VIEW: + arr := arr.(*array.LargeListView) + gotOffsets = arr.Offsets() + gotSizes = arr.Sizes() + } + + if !reflect.DeepEqual(gotOffsets, tt.offsets) { + t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) + } + if tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW { + if !reflect.DeepEqual(gotSizes, tt.sizes) { + t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) + } + } + + varr := arr.ListValues().(*array.Int32) + if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } + }) + } +} + +func TestListViewArrayBulkAppend(t *testing.T) { + tests := []struct { + typeID arrow.Type + offsets interface{} + sizes interface{} + dt arrow.DataType + }{ + {arrow.LIST_VIEW, []int32{5, 0, 0, 1}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LARGE_LIST_VIEW, []int64{5, 0, 0, 1}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, + } + + for _, tt := range tests { + t.Run(tt.typeID.String(), func(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + var ( + vs = []int32{-1, 3, 4, 5, 6, 0, 1, 2} + lengths = []int{3, 0, 0, 4} + isValid = []bool{true, false, true, true} + ) + + lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) + defer lb.Release() + vb := lb.ValueBuilder().(*array.Int32Builder) + vb.Reserve(len(vs)) + + switch tt.typeID { + case arrow.LIST_VIEW: + lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) + case arrow.LARGE_LIST_VIEW: + lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) + } + for _, v := range vs { + vb.Append(v) + } + + arr := lb.NewArray().(array.VarLenListLike) + defer arr.Release() + + if got, want := arr.DataType().ID(), tt.typeID; got != want { + t.Fatalf("got=%v, want=%v", got, want) + } + + if got, want := arr.Len(), len(isValid); got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + for i := range lengths { + if got, want := arr.IsValid(i), isValid[i]; got != want { + t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + } + if got, want := arr.IsNull(i), !isValid[i]; got != want { + t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + } + } + + var gotOffsets, gotSizes interface{} + switch tt.typeID { + case arrow.LIST_VIEW: + arr := arr.(*array.ListView) + gotOffsets = arr.Offsets() + gotSizes = arr.Sizes() + case arrow.LARGE_LIST_VIEW: + arr := arr.(*array.LargeListView) + gotOffsets = arr.Offsets() + gotSizes = arr.Sizes() } - if !reflect.DeepEqual(got, tt.offsets) { - t.Fatalf("got=%v, want=%v", got, tt.offsets) + if !reflect.DeepEqual(gotOffsets, tt.offsets) { + t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) + } + if !reflect.DeepEqual(gotSizes, tt.sizes) { + t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) } varr := arr.ListValues().(*array.Int32) @@ -216,10 +434,13 @@ func TestListArraySlice(t *testing.T) { tests := []struct { typeID arrow.Type offsets interface{} + sizes interface{} dt arrow.DataType }{ - {arrow.LIST, []int32{0, 3, 3, 3, 7}, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LIST_VIEW, []int32{0, 3, 3, 3, 7}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LARGE_LIST_VIEW, []int64{0, 3, 3, 3, 7}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, } for _, tt := range tests { @@ -233,7 +454,7 @@ func TestListArraySlice(t *testing.T) { isValid = []bool{true, false, true, true} ) - lb := array.NewBuilder(pool, tt.dt).(array.ListLikeBuilder) + lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) defer lb.Release() vb := lb.ValueBuilder().(*array.Int32Builder) vb.Reserve(len(vs)) @@ -243,12 +464,16 @@ func TestListArraySlice(t *testing.T) { lb.(*array.ListBuilder).AppendValues(tt.offsets.([]int32), isValid) case arrow.LARGE_LIST: lb.(*array.LargeListBuilder).AppendValues(tt.offsets.([]int64), isValid) + case arrow.LIST_VIEW: + lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) + case arrow.LARGE_LIST_VIEW: + lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) } for _, v := range vs { vb.Append(v) } - arr := lb.NewArray().(array.ListLike) + arr := lb.NewArray().(array.VarLenListLike) defer arr.Release() if got, want := arr.DataType().ID(), tt.typeID; got != want { @@ -268,18 +493,129 @@ func TestListArraySlice(t *testing.T) { } } - var got interface{} + var gotOffsets, gotSizes interface{} switch tt.typeID { case arrow.LIST: arr := arr.(*array.List) - got = arr.Offsets() + gotOffsets = arr.Offsets() case arrow.LARGE_LIST: arr := arr.(*array.LargeList) - got = arr.Offsets() + gotOffsets = arr.Offsets() + case arrow.LIST_VIEW: + arr := arr.(*array.ListView) + gotOffsets = arr.Offsets() + gotSizes = arr.Sizes() + case arrow.LARGE_LIST_VIEW: + arr := arr.(*array.LargeListView) + gotOffsets = arr.Offsets() + gotSizes = arr.Sizes() + } + + if !reflect.DeepEqual(gotOffsets, tt.offsets) { + t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) + } + + if tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW { + if !reflect.DeepEqual(gotSizes, tt.sizes) { + t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) + } + } + + varr := arr.ListValues().(*array.Int32) + if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { + t.Fatalf("got=%v, want=%v", got, want) + } + + if got, want := arr.String(), `[[0 1 2] (null) [] [3 4 5 6]]`; got != want { + t.Fatalf("got=%q, want=%q", got, want) + } + assert.Equal(t, "[0,1,2]", arr.ValueStr(0)) + + sub := array.NewSlice(arr, 1, 4).(array.ListLike) + defer sub.Release() + + if got, want := sub.String(), `[(null) [] [3 4 5 6]]`; got != want { + t.Fatalf("got=%q, want=%q", got, want) + } + }) + } +} + +func TestLisViewtArraySlice(t *testing.T) { + tests := []struct { + typeID arrow.Type + offsets interface{} + sizes interface{} + dt arrow.DataType + }{ + {arrow.LIST_VIEW, []int32{5, 0, 0, 1}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, + {arrow.LARGE_LIST_VIEW, []int64{5, 0, 0, 1}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, + } + + for _, tt := range tests { + t.Run(tt.typeID.String(), func(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + var ( + vs = []int32{-1, 3, 4, 5, 6, 0, 1, 2} + lengths = []int{3, 0, 0, 4} + isValid = []bool{true, false, true, true} + ) + + lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) + defer lb.Release() + vb := lb.ValueBuilder().(*array.Int32Builder) + vb.Reserve(len(vs)) + + switch tt.typeID { + case arrow.LIST_VIEW: + lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) + case arrow.LARGE_LIST_VIEW: + lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) + } + for _, v := range vs { + vb.Append(v) + } + + arr := lb.NewArray().(array.VarLenListLike) + defer arr.Release() + + if got, want := arr.DataType().ID(), tt.typeID; got != want { + t.Fatalf("got=%v, want=%v", got, want) + } + + if got, want := arr.Len(), len(isValid); got != want { + t.Fatalf("got=%d, want=%d", got, want) } - if !reflect.DeepEqual(got, tt.offsets) { - t.Fatalf("got=%v, want=%v", got, tt.offsets) + for i := range lengths { + if got, want := arr.IsValid(i), isValid[i]; got != want { + t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + } + if got, want := arr.IsNull(i), !isValid[i]; got != want { + t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) + } + } + + var gotOffsets, gotSizes interface{} + switch tt.typeID { + case arrow.LIST_VIEW: + arr := arr.(*array.ListView) + gotOffsets = arr.Offsets() + gotSizes = arr.Sizes() + case arrow.LARGE_LIST_VIEW: + arr := arr.(*array.LargeListView) + gotOffsets = arr.Offsets() + gotSizes = arr.Sizes() + } + + if !reflect.DeepEqual(gotOffsets, tt.offsets) { + t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) + } + + if !reflect.DeepEqual(gotSizes, tt.sizes) { + t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) } varr := arr.ListValues().(*array.Int32) @@ -302,86 +638,227 @@ func TestListArraySlice(t *testing.T) { } } -func TestListStringRoundTrip(t *testing.T) { +func TestVarLenListLikeStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) - b := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) - defer b.Release() - vb := b.ValueBuilder().(*array.Int32Builder) + builders := []array.VarLenListLikeBuilder{ + array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32), + array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), + array.NewLargeListBuilder(mem, arrow.PrimitiveTypes.Int32), + array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), + } - var values = [][]int32{ - {0, 1, 2, 3, 4, 5, 6}, - {1, 2, 3, 4, 5, 6, 7}, - {2, 3, 4, 5, 6, 7, 8}, - {3, 4, 5, 6, 7, 8, 9}, + builders1 := []array.VarLenListLikeBuilder{ + array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32), + array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), + array.NewLargeListBuilder(mem, arrow.PrimitiveTypes.Int32), + array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), } - for _, value := range values { - b.AppendNull() - b.Append(true) - for _, el := range value { - vb.Append(el) - vb.AppendNull() + + for i, b := range builders { + defer b.Release() + + vb := b.ValueBuilder().(*array.Int32Builder) + + var values = [][]int32{ + {0, 1, 2, 3, 4, 5, 6}, + {1, 2, 3, 4, 5, 6, 7}, + {2, 3, 4, 5, 6, 7, 8}, + {3, 4, 5, 6, 7, 8, 9}, + } + for _, value := range values { + b.AppendNull() + b.AppendWithSize(true, 2*len(value)) + for _, el := range value { + vb.Append(el) + vb.AppendNull() + } + b.AppendWithSize(false, 0) } - b.Append(false) - } - arr := b.NewArray().(*array.List) - defer arr.Release() + arr := b.NewArray() + defer arr.Release() - // 2. create array via AppendValueFromString - b1 := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) - defer b1.Release() + // 2. create array via AppendValueFromString + b1 := builders1[i] + defer b1.Release() - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } + for i := 0; i < arr.Len(); i++ { + assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) + } - arr1 := b1.NewArray().(*array.List) - defer arr1.Release() + arr1 := b1.NewArray() + defer arr1.Release() - assert.True(t, array.Equal(arr, arr1)) + assert.True(t, array.Equal(arr, arr1)) + } } -func TestLargeListStringRoundTrip(t *testing.T) { +// Test the string roun-trip for a list-view containing out-of-order offsets. +func TestListViewStringRoundTrip(t *testing.T) { // 1. create array mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) defer mem.AssertSize(t, 0) - b := array.NewLargeListBuilder(mem, arrow.PrimitiveTypes.Int32) - defer b.Release() - vb := b.ValueBuilder().(*array.Int32Builder) + builders := []array.VarLenListLikeBuilder{ + array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), + array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), + } - var values = [][]int32{ - {0, 1, 2, 3, 4, 5, 6}, - {1, 2, 3, 4, 5, 6, 7}, - {2, 3, 4, 5, 6, 7, 8}, - {3, 4, 5, 6, 7, 8, 9}, + builders1 := []array.VarLenListLikeBuilder{ + array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), + array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), } - for _, value := range values { - b.AppendNull() - b.Append(true) - for _, el := range value { - vb.Append(el) - vb.AppendNull() + + for i, b := range builders { + defer b.Release() + + switch lvb := b.(type) { + case *array.ListViewBuilder: + lvb.AppendDimensions(5, 3) + b.AppendNull() + lvb.AppendDimensions(0, 0) + lvb.AppendDimensions(1, 4) + case *array.LargeListViewBuilder: + lvb.AppendDimensions(5, 3) + b.AppendNull() + lvb.AppendDimensions(0, 0) + lvb.AppendDimensions(1, 4) } - b.Append(false) - } - arr := b.NewArray().(*array.LargeList) - defer arr.Release() + vb := b.ValueBuilder().(*array.Int32Builder) + + vs := []int32{-1, 3, 4, 5, 6, 0, 1, 2} + isValid := []bool{false, true, true, true, true, true, true, true} + vb.Reserve(len(vs)) + vb.AppendValues(vs, isValid) + + arr := b.NewArray() + defer arr.Release() + + // 2. create array via AppendValueFromString + b1 := builders1[i] + defer b1.Release() + + for i := 0; i < arr.Len(); i++ { + assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) + } - // 2. create array via AppendValueFromString - b1 := array.NewLargeListBuilder(mem, arrow.PrimitiveTypes.Int32) - defer b1.Release() + arr1 := b1.NewArray() + defer arr1.Release() - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) + assert.True(t, array.Equal(arr, arr1)) } +} + +func TestRangeOfValuesUsed(t *testing.T) { + tests := []struct { + typeID arrow.Type + dt arrow.DataType + }{ + {arrow.LIST, arrow.ListOf(arrow.PrimitiveTypes.Int16)}, + {arrow.LARGE_LIST, arrow.LargeListOf(arrow.PrimitiveTypes.Int16)}, + {arrow.LIST_VIEW, arrow.ListViewOf(arrow.PrimitiveTypes.Int16)}, + {arrow.LARGE_LIST_VIEW, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int16)}, + } + for _, tt := range tests { + t.Run(tt.typeID.String(), func(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + isListView := tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW + + bldr := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) + defer bldr.Release() - arr1 := b1.NewArray().(*array.LargeList) - defer arr1.Release() + var arr array.VarLenListLike - assert.True(t, array.Equal(arr, arr1)) + // Empty array + arr = bldr.NewArray().(array.VarLenListLike) + defer arr.Release() + offset, len := array.RangeOfValuesUsed(arr) + assert.Equal(t, 0, offset) + assert.Equal(t, 0, len) + + // List-like array with only nulls + bldr.AppendNulls(3) + arr = bldr.NewArray().(array.VarLenListLike) + defer arr.Release() + offset, len = array.RangeOfValuesUsed(arr) + assert.Equal(t, 0, offset) + assert.Equal(t, 0, len) + + // Array with nulls and non-nulls (starting at a non-zero offset) + vb := bldr.ValueBuilder().(*array.Int16Builder) + vb.Append(-2) + vb.Append(-1) + bldr.AppendWithSize(false, 0) + bldr.AppendWithSize(true, 2) + vb.Append(0) + vb.Append(1) + bldr.AppendWithSize(true, 3) + vb.Append(2) + vb.Append(3) + vb.Append(4) + if isListView { + vb.Append(10) + vb.Append(11) + } + arr = bldr.NewArray().(array.VarLenListLike) + defer arr.Release() + offset, len = array.RangeOfValuesUsed(arr) + assert.Equal(t, 2, offset) + assert.Equal(t, 5, len) + + // Overlapping list-views + // [null, [0, 1, 2, 3, 4, 5], [1, 2], null, [4], null, null] + vb = bldr.ValueBuilder().(*array.Int16Builder) + vb.Append(-2) + vb.Append(-1) + bldr.AppendWithSize(false, 0) + if isListView { + bldr.AppendWithSize(true, 6) + vb.Append(0) + bldr.AppendWithSize(true, 2) + vb.Append(1) + vb.Append(2) + vb.Append(3) + bldr.AppendWithSize(false, 0) + bldr.AppendWithSize(true, 1) + vb.Append(4) + vb.Append(5) + // -- used range ends here -- + vb.Append(10) + vb.Append(11) + } else { + bldr.AppendWithSize(true, 6) + vb.Append(0) + vb.Append(1) + vb.Append(2) + vb.Append(3) + vb.Append(4) + vb.Append(5) + bldr.AppendWithSize(true, 2) + vb.Append(1) + vb.Append(2) + bldr.AppendWithSize(false, 0) + bldr.AppendWithSize(true, 1) + vb.Append(4) + } + bldr.AppendNulls(2) + arr = bldr.NewArray().(array.VarLenListLike) + defer arr.Release() + + // Check the range + offset, len = array.RangeOfValuesUsed(arr) + assert.Equal(t, 2, offset) + if isListView { + assert.Equal(t, 6, len) + } else { + assert.Equal(t, 9, len) + } + }) + } } diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go index 4fe860f26ef61..9945a90ce495e 100644 --- a/go/arrow/array/map.go +++ b/go/arrow/array/map.go @@ -222,6 +222,10 @@ func (b *MapBuilder) Append(v bool) { b.listBuilder.Append(v) } +func (b *MapBuilder) AppendWithSize(v bool, _ int) { + b.Append(v) +} + // AppendNull adds a null map entry to the array. func (b *MapBuilder) AppendNull() { b.Append(false) diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go index d784c2bfe0767..f0fb24ec873c5 100644 --- a/go/arrow/datatype.go +++ b/go/arrow/datatype.go @@ -152,6 +152,19 @@ const ( RUN_END_ENCODED + // String (UTF8) view type with 4-byte prefix and inline + // small string optimizations + STRING_VIEW + + // Bytes view with 4-byte prefix and inline small byte arrays optimization + BINARY_VIEW + + // LIST_VIEW is a list of some logical data type represented with offsets and sizes + LIST_VIEW + + // like LIST but with 64-bit offsets + LARGE_LIST_VIEW + // Alias to ensure we do not break any consumers DECIMAL = DECIMAL128 ) @@ -384,7 +397,7 @@ func IsListLike(t Type) bool { // IsNested returns true for List, LargeList, FixedSizeList, Map, Struct, and Unions func IsNested(t Type) bool { switch t { - case LIST, LARGE_LIST, FIXED_SIZE_LIST, MAP, STRUCT, SPARSE_UNION, DENSE_UNION: + case LIST, LARGE_LIST, FIXED_SIZE_LIST, MAP, LIST_VIEW, LARGE_LIST_VIEW, STRUCT, SPARSE_UNION, DENSE_UNION: return true } return false diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go index 50777929c00a6..4ae4880334620 100644 --- a/go/arrow/datatype_nested.go +++ b/go/arrow/datatype_nested.go @@ -39,6 +39,10 @@ type ( Elem() DataType ElemField() Field } + + VarLenListLikeType interface { + ListLikeType + } ) // ListType describes a nested type in which each array slot contains @@ -242,6 +246,142 @@ func (*FixedSizeListType) Layout() DataTypeLayout { return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap()}} } +type ListViewType struct { + elem Field +} + +func ListViewOfField(f Field) *ListViewType { + if f.Type == nil { + panic("arrow: nil DataType") + } + return &ListViewType{elem: f} +} + +// ListViewOf returns the list-view type with element type t. +// For example, if t represents int32, ListViewOf(t) represents []int32. +// +// ListViewOf panics if t is nil or invalid. NullableElem defaults to true +func ListViewOf(t DataType) *ListViewType { + if t == nil { + panic("arrow: nil DataType") + } + return &ListViewType{elem: Field{Name: "item", Type: t, Nullable: true}} +} + +// ListViewOfNonNullable is like ListViewOf but NullableElem defaults to false, indicating +// that the child type should be marked as non-nullable. +func ListViewOfNonNullable(t DataType) *ListViewType { + if t == nil { + panic("arrow: nil DataType") + } + return &ListViewType{elem: Field{Name: "item", Type: t, Nullable: false}} +} + +func (*ListViewType) ID() Type { return LIST_VIEW } +func (*ListViewType) Name() string { return "list_view" } + +func (t *ListViewType) String() string { + if t.elem.Nullable { + return fmt.Sprintf("list_view<%s: %s, nullable>", t.elem.Name, t.elem.Type) + } + return fmt.Sprintf("list_view<%s: %s>", t.elem.Name, t.elem.Type) +} + +func (t *ListViewType) Fingerprint() string { + child := t.elem.Type.Fingerprint() + if len(child) > 0 { + return typeFingerprint(t) + "{" + child + "}" + } + return "" +} + +func (t *ListViewType) SetElemMetadata(md Metadata) { t.elem.Metadata = md } + +func (t *ListViewType) SetElemNullable(n bool) { t.elem.Nullable = n } + +// Elem returns the ListViewType's element type. +func (t *ListViewType) Elem() DataType { return t.elem.Type } + +func (t *ListViewType) ElemField() Field { + return t.elem +} + +func (t *ListViewType) Fields() []Field { return []Field{t.ElemField()} } + +func (*ListViewType) Layout() DataTypeLayout { + return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int32SizeBytes), SpecFixedWidth(Int32SizeBytes)}} +} + +func (*ListViewType) OffsetTypeTraits() OffsetTraits { return Int32Traits } + +type LargeListViewType struct { + elem Field +} + +func LargeListViewOfField(f Field) *LargeListViewType { + if f.Type == nil { + panic("arrow: nil DataType") + } + return &LargeListViewType{elem: f} +} + +// LargeListViewOf returns the list-view type with element type t. +// For example, if t represents int32, LargeListViewOf(t) represents []int32. +// +// LargeListViewOf panics if t is nil or invalid. NullableElem defaults to true +func LargeListViewOf(t DataType) *LargeListViewType { + if t == nil { + panic("arrow: nil DataType") + } + return &LargeListViewType{elem: Field{Name: "item", Type: t, Nullable: true}} +} + +// LargeListViewOfNonNullable is like LargeListViewOf but NullableElem defaults +// to false, indicating that the child type should be marked as non-nullable. +func LargeListViewOfNonNullable(t DataType) *LargeListViewType { + if t == nil { + panic("arrow: nil DataType") + } + return &LargeListViewType{elem: Field{Name: "item", Type: t, Nullable: false}} +} + +func (*LargeListViewType) ID() Type { return LARGE_LIST_VIEW } +func (*LargeListViewType) Name() string { return "large_list_view" } + +func (t *LargeListViewType) String() string { + if t.elem.Nullable { + return fmt.Sprintf("large_list_view<%s: %s, nullable>", t.elem.Name, t.elem.Type) + } + return fmt.Sprintf("large_list_view<%s: %s>", t.elem.Name, t.elem.Type) +} + +func (t *LargeListViewType) Fingerprint() string { + child := t.elem.Type.Fingerprint() + if len(child) > 0 { + return typeFingerprint(t) + "{" + child + "}" + } + return "" +} + +func (t *LargeListViewType) SetElemMetadata(md Metadata) { t.elem.Metadata = md } + +func (t *LargeListViewType) SetElemNullable(n bool) { t.elem.Nullable = n } + +// Elem returns the LargeListViewType's element type. +func (t *LargeListViewType) Elem() DataType { return t.elem.Type } + +func (t *LargeListViewType) ElemField() Field { + return t.elem +} + +func (t *LargeListViewType) Fields() []Field { return []Field{t.ElemField()} } + +func (*LargeListViewType) Layout() DataTypeLayout { + return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int64SizeBytes), SpecFixedWidth(Int64SizeBytes)}} +} + +func (*LargeListViewType) OffsetTypeTraits() OffsetTraits { return Int64Traits } + // StructType describes a nested type parameterized by an ordered sequence // of relative types, called its fields. type StructType struct { @@ -827,4 +967,11 @@ var ( _ ListLikeType = (*LargeListType)(nil) _ ListLikeType = (*FixedSizeListType)(nil) _ ListLikeType = (*MapType)(nil) + + _ VarLenListLikeType = (*ListType)(nil) + _ VarLenListLikeType = (*LargeListType)(nil) + _ VarLenListLikeType = (*ListViewType)(nil) + _ VarLenListLikeType = (*LargeListViewType)(nil) + _ VarLenListLikeType = (*FixedSizeListType)(nil) + _ VarLenListLikeType = (*MapType)(nil) ) diff --git a/go/arrow/internal/arrdata/arrdata.go b/go/arrow/internal/arrdata/arrdata.go index 3b592cf3992aa..0851bff0fe0da 100644 --- a/go/arrow/internal/arrdata/arrdata.go +++ b/go/arrow/internal/arrdata/arrdata.go @@ -41,6 +41,7 @@ func init() { Records["primitives"] = makePrimitiveRecords() Records["structs"] = makeStructsRecords() Records["lists"] = makeListsRecords() + Records["list_views"] = makeListViewsRecords() Records["strings"] = makeStringsRecords() Records["fixed_size_lists"] = makeFixedSizeListsRecords() Records["fixed_width_types"] = makeFixedWidthTypesRecords() @@ -321,6 +322,63 @@ func makeListsRecords() []arrow.Record { return recs } +func makeListViewsRecords() []arrow.Record { + mem := memory.NewGoAllocator() + dtype := arrow.ListViewOf(arrow.PrimitiveTypes.Int32) + schema := arrow.NewSchema([]arrow.Field{ + {Name: "list_view_nullable", Type: dtype, Nullable: true}, + }, nil) + + mask := []bool{true, false, false, true, true} + + chunks := [][]arrow.Array{ + { + listViewOf(mem, []arrow.Array{ + arrayOf(mem, []int32{1, 2, 3, 4, 5}, mask), + arrayOf(mem, []int32{11, 12, 13, 14, 15}, mask), + arrayOf(mem, []int32{21, 22, 23, 24, 25}, mask), + }, nil), + }, + { + listViewOf(mem, []arrow.Array{ + arrayOf(mem, []int32{-1, -2, -3, -4, -5}, mask), + arrayOf(mem, []int32{-11, -12, -13, -14, -15}, mask), + arrayOf(mem, []int32{-21, -22, -23, -24, -25}, mask), + }, nil), + }, + { + listViewOf(mem, []arrow.Array{ + arrayOf(mem, []int32{-1, -2, -3, -4, -5}, mask), + arrayOf(mem, []int32{}, []bool{}), + arrayOf(mem, []int32{-21, -22, -23, -24, -25}, mask), + }, []bool{true, false, true}), + }, + { + func() arrow.Array { + bldr := array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32) + defer bldr.Release() + + return bldr.NewListViewArray() + }(), + }, + } + + defer func() { + for _, chunk := range chunks { + for _, col := range chunk { + col.Release() + } + } + }() + + recs := make([]arrow.Record, len(chunks)) + for i, chunk := range chunks { + recs[i] = array.NewRecord(schema, chunk, -1) + } + + return recs +} + func makeFixedSizeListsRecords() []arrow.Record { mem := memory.NewGoAllocator() const N = 3 @@ -1439,6 +1497,30 @@ func listOf(mem memory.Allocator, values []arrow.Array, valids []bool) *array.Li return bldr.NewListArray() } +func listViewOf(mem memory.Allocator, values []arrow.Array, valids []bool) *array.ListView { + if mem == nil { + mem = memory.NewGoAllocator() + } + + bldr := array.NewListViewBuilder(mem, values[0].DataType()) + defer bldr.Release() + + valid := func(i int) bool { + return valids[i] + } + + if valids == nil { + valid = func(i int) bool { return true } + } + + for i, value := range values { + bldr.AppendWithSize(valid(i), value.Len()) + buildArray(bldr.ValueBuilder(), value) + } + + return bldr.NewListViewArray() +} + func fixedSizeListOf(mem memory.Allocator, n int32, values []arrow.Array, valids []bool) *array.FixedSizeList { if mem == nil { mem = memory.NewGoAllocator() diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go index fa4438276f186..ad87b73fc4ddb 100644 --- a/go/arrow/internal/arrjson/arrjson.go +++ b/go/arrow/internal/arrjson/arrjson.go @@ -208,6 +208,10 @@ func typeToJSON(arrowType arrow.DataType) (json.RawMessage, error) { typ = nameJSON{"list"} case *arrow.LargeListType: typ = nameJSON{"largelist"} + case *arrow.ListViewType: + typ = nameJSON{"listview"} + case *arrow.LargeListViewType: + typ = nameJSON{"largelistview"} case *arrow.MapType: typ = mapJSON{Name: "map", KeysSorted: dt.KeysSorted} case *arrow.StructType: @@ -400,6 +404,20 @@ func typeFromJSON(typ json.RawMessage, children []FieldWrapper) (arrowType arrow Metadata: children[0].arrowMeta, Nullable: children[0].Nullable, }) + case "listview": + arrowType = arrow.ListViewOfField(arrow.Field{ + Name: children[0].Name, + Type: children[0].arrowType, + Metadata: children[0].arrowMeta, + Nullable: children[0].Nullable, + }) + case "largelistview": + arrowType = arrow.LargeListViewOfField(arrow.Field{ + Name: children[0].Name, + Type: children[0].arrowType, + Metadata: children[0].arrowMeta, + Nullable: children[0].Nullable, + }) case "map": t := mapJSON{} if err = json.Unmarshal(typ, &t); err != nil { @@ -798,6 +816,7 @@ type Array struct { Data []interface{} `json:"DATA,omitempty"` TypeID []arrow.UnionTypeCode `json:"TYPE_ID,omitempty"` Offset interface{} `json:"OFFSET,omitempty"` + Size interface{} `json:"SIZE,omitempty"` Children []Array `json:"children,omitempty"` } @@ -806,7 +825,8 @@ func (a *Array) MarshalJSON() ([]byte, error) { aux := struct { *Alias OutOffset interface{} `json:"OFFSET,omitempty"` - }{Alias: (*Alias)(a), OutOffset: a.Offset} + OutSize interface{} `json:"SIZE,omitempty"` + }{Alias: (*Alias)(a), OutOffset: a.Offset, OutSize: a.Size} return json.Marshal(aux) } @@ -815,6 +835,7 @@ func (a *Array) UnmarshalJSON(b []byte) (err error) { aux := &struct { *Alias RawOffset json.RawMessage `json:"OFFSET,omitempty"` + RawSize json.RawMessage `json:"SIZE,omitempty"` }{Alias: (*Alias)(a)} dec := json.NewDecoder(bytes.NewReader(b)) @@ -824,6 +845,7 @@ func (a *Array) UnmarshalJSON(b []byte) (err error) { return } + // Offsets if len(aux.RawOffset) == 0 { return } @@ -855,6 +877,38 @@ func (a *Array) UnmarshalJSON(b []byte) (err error) { a.Offset = out } + if len(aux.RawSize) == 0 { + return + } + + // Sizes + var rawSizes []interface{} + if err = json.Unmarshal(aux.RawSize, &rawSizes); err != nil { + return + } + + if len(rawSizes) == 0 { + return + } + + switch rawSizes[0].(type) { + case string: + out := make([]int64, len(rawSizes)) + for i, o := range rawSizes { + out[i], err = strconv.ParseInt(o.(string), 10, 64) + if err != nil { + return + } + } + a.Size = out + case float64: + out := make([]int32, len(rawSizes)) + for i, o := range rawSizes { + out[i] = int32(o.(float64)) + } + a.Size = out + } + return nil } @@ -1050,6 +1104,44 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) arrow.Arr memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(arr.Offset.([]int64)))}, []arrow.ArrayData{elems}, nulls, 0) + case *arrow.ListViewType: + valids := validsFromJSON(arr.Valids) + elems := arrayFromJSON(mem, dt.Elem(), arr.Children[0]) + defer elems.Release() + + bitmap := validsToBitmap(valids, mem) + defer bitmap.Release() + + nulls := arr.Count - bitutil.CountSetBits(bitmap.Bytes(), 0, arr.Count) + var offsets, sizes *memory.Buffer + if arr.Count == 0 { + emptyBuffer := memory.NewBufferBytes(nil) + offsets, sizes = emptyBuffer, emptyBuffer + } else { + offsets = memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(arr.Offset.([]int32))) + sizes = memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(arr.Size.([]int32))) + } + return array.NewData(dt, arr.Count, []*memory.Buffer{bitmap, offsets, sizes}, []arrow.ArrayData{elems}, nulls, 0) + + case *arrow.LargeListViewType: + valids := validsFromJSON(arr.Valids) + elems := arrayFromJSON(mem, dt.Elem(), arr.Children[0]) + defer elems.Release() + + bitmap := validsToBitmap(valids, mem) + defer bitmap.Release() + + nulls := arr.Count - bitutil.CountSetBits(bitmap.Bytes(), 0, arr.Count) + var offsets, sizes *memory.Buffer + if arr.Count == 0 { + emptyBuffer := memory.NewBufferBytes(nil) + offsets, sizes = emptyBuffer, emptyBuffer + } else { + offsets = memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(arr.Offset.([]int64))) + sizes = memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(arr.Size.([]int64))) + } + return array.NewData(dt, arr.Count, []*memory.Buffer{bitmap, offsets, sizes}, []arrow.ArrayData{elems}, nulls, 0) + case *arrow.FixedSizeListType: valids := validsFromJSON(arr.Valids) elems := arrayFromJSON(mem, dt.Elem(), arr.Children[0]) @@ -1422,6 +1514,44 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array { }, } + case *array.ListView: + o := Array{ + Name: field.Name, + Count: arr.Len(), + Valids: validsToJSON(arr), + Offset: arr.Offsets(), + Size: arr.Sizes(), + Children: []Array{ + arrayToJSON(arrow.Field{Name: "item", Type: arr.DataType().(*arrow.ListViewType).Elem()}, arr.ListValues()), + }, + } + if arr.Len() == 0 { + o.Offset, o.Size = []int32{}, []int32{} + } + return o + + case *array.LargeListView: + offsets := arr.Offsets() + strOffsets := make([]string, len(offsets)) + for i, o := range offsets { + strOffsets[i] = strconv.FormatInt(o, 10) + } + sizes := arr.Sizes() + strSizes := make([]string, len(sizes)) + for i, s := range sizes { + strSizes[i] = strconv.FormatInt(s, 10) + } + return Array{ + Name: field.Name, + Count: arr.Len(), + Valids: validsToJSON(arr), + Offset: strOffsets, + Size: strSizes, + Children: []Array{ + arrayToJSON(arrow.Field{Name: "item", Type: arr.DataType().(*arrow.LargeListViewType).Elem()}, arr.ListValues()), + }, + } + case *array.Map: o := Array{ Name: field.Name, diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go index 882dc9a0d860a..ee85d431805ab 100644 --- a/go/arrow/internal/arrjson/arrjson_test.go +++ b/go/arrow/internal/arrjson/arrjson_test.go @@ -34,6 +34,7 @@ func TestReadWrite(t *testing.T) { wantJSONs["primitives"] = makePrimitiveWantJSONs() wantJSONs["structs"] = makeStructsWantJSONs() wantJSONs["lists"] = makeListsWantJSONs() + wantJSONs["list_views"] = makeListViewsWantJSONs() wantJSONs["strings"] = makeStringsWantJSONs() wantJSONs["fixed_size_lists"] = makeFixedSizeListsWantJSONs() wantJSONs["fixed_width_types"] = makeFixedWidthTypesWantJSONs() @@ -1366,7 +1367,7 @@ func makeListsWantJSONs() string { 1, 1, 1 - ], + ], "children": [ { "name": "item", @@ -1558,6 +1559,237 @@ func makeListsWantJSONs() string { }` } +func makeListViewsWantJSONs() string { + return `{ + "schema": { + "fields": [ + { + "name": "list_view_nullable", + "type": { + "name": "listview" + }, + "nullable": true, + "children": [ + { + "name": "item", + "type": { + "name": "int", + "isSigned": true, + "bitWidth": 32 + }, + "nullable": true, + "children": [] + } + ] + } + ] + }, + "batches": [ + { + "count": 3, + "columns": [ + { + "name": "list_view_nullable", + "count": 3, + "VALIDITY": [ + 1, + 1, + 1 + ], + "children": [ + { + "name": "item", + "count": 15, + "VALIDITY": [ + 1, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 1, + 1 + ], + "DATA": [ + 1, + 0, + 0, + 4, + 5, + 11, + 0, + 0, + 14, + 15, + 21, + 0, + 0, + 24, + 25 + ] + } + ], + "OFFSET": [ + 0, + 5, + 10 + ], + "SIZE": [ + 5, + 5, + 5 + ] + } + ] + }, + { + "count": 3, + "columns": [ + { + "name": "list_view_nullable", + "count": 3, + "VALIDITY": [ + 1, + 1, + 1 + ], + "children": [ + { + "name": "item", + "count": 15, + "VALIDITY": [ + 1, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 1, + 1 + ], + "DATA": [ + -1, + 0, + 0, + -4, + -5, + -11, + 0, + 0, + -14, + -15, + -21, + 0, + 0, + -24, + -25 + ] + } + ], + "OFFSET": [ + 0, + 5, + 10 + ], + "SIZE": [ + 5, + 5, + 5 + ] + } + ] + }, + { + "count": 3, + "columns": [ + { + "name": "list_view_nullable", + "count": 3, + "VALIDITY": [ + 1, + 0, + 1 + ], + "children": [ + { + "name": "item", + "count": 10, + "VALIDITY": [ + 1, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 1, + 1 + ], + "DATA": [ + -1, + 0, + 0, + -4, + -5, + -21, + 0, + 0, + -24, + -25 + ] + } + ], + "OFFSET": [ + 0, + 5, + 5 + ], + "SIZE": [ + 5, + 0, + 5 + ] + } + ] + }, + { + "count": 0, + "columns": [ + { + "name": "list_view_nullable", + "count": 0, + "children": [ + { + "name": "item", + "count": 0 + } + ], + "OFFSET": [ + ], + "SIZE": [ + ] + } + ] + } + ] +}` +} + func makeFixedSizeListsWantJSONs() string { return `{ "schema": { @@ -3575,7 +3807,7 @@ func makeMapsWantJSONs() string { "VALIDITY": [ 1, 0 - ], + ], "children": [ { "name": "entries", diff --git a/go/arrow/internal/testing/gen/random_array_gen.go b/go/arrow/internal/testing/gen/random_array_gen.go index ab9e015163730..41f2578209a7f 100644 --- a/go/arrow/internal/testing/gen/random_array_gen.go +++ b/go/arrow/internal/testing/gen/random_array_gen.go @@ -22,6 +22,7 @@ import ( "github.com/apache/arrow/go/v14/arrow" "github.com/apache/arrow/go/v14/arrow/array" "github.com/apache/arrow/go/v14/arrow/bitutil" + "github.com/apache/arrow/go/v14/arrow/internal/debug" "github.com/apache/arrow/go/v14/arrow/memory" "golang.org/x/exp/rand" "gonum.org/v1/gonum/stat/distuv" @@ -376,6 +377,156 @@ func (r *RandomArrayGenerator) Numeric(dt arrow.Type, size int64, min, max int64 panic("invalid type for random numeric array") } +// Generate an array of random offsets based on a given sizes array for +// list-view arrays. +// +// Pre-condition: every non-null sizes[i] <= valuesLength. +func viewOffsetsFromLengthsArray32( + seed uint64, avgLength int32, valuesLength int32, + sizesArray *array.Int32, forceEmptyNulls bool, + zeroUndefinedOffsets bool) *memory.Buffer { + sizes := sizesArray.Int32Values() + offsets := make([]int32, sizesArray.Len()) + + offsetDeltaRand := rand.New(rand.NewSource(seed)) + sampleOffset := func(offsetBase int32) int32 { + delta := int32(offsetDeltaRand.Int63n(2*int64(avgLength)) - int64(avgLength)) + offset := offsetBase + delta + if offset < 0 { + return 0 + } + return offset + } + offsetBase := int32(0) + for i := 0; i < sizesArray.Len(); i += 1 { + isNull := sizesArray.IsNull(i) + if forceEmptyNulls && isNull { + sizes[i] = 0 + } + if zeroUndefinedOffsets && (isNull || sizes[i] == 0) { + offsets[i] = 0 + } else { + offset := sampleOffset(offsetBase) + if offset > valuesLength-sizes[i] { + offset = valuesLength - sizes[i] + } + offsets[i] = offset + } + offsetBase += avgLength + } + + return memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets)) +} + +// Generate an array of random offsets based on a given sizes array for +// large list-view arrays. +// +// Pre-condition: every non-null sizes[i] <= valuesLength. +func viewOffsetsFromLengthsArray64( + seed uint64, avgLength int64, valuesLength int64, + sizesArray *array.Int64, forceEmptyNulls bool, + zeroUndefinedOffsets bool) *memory.Buffer { + sizes := sizesArray.Int64Values() + offsets := make([]int64, sizesArray.Len()) + + offsetDeltaRand := rand.New(rand.NewSource(seed)) + sampleOffset := func(offsetBase int64) int64 { + delta := int64(offsetDeltaRand.Int63n(2*avgLength) - avgLength) + offset := offsetBase + delta + if offset < 0 { + return 0 + } + return offset + } + offsetBase := int64(0) + for i := 0; i < sizesArray.Len(); i += 1 { + isNull := sizesArray.IsNull(i) + if forceEmptyNulls && isNull { + sizes[i] = 0 + } + if zeroUndefinedOffsets && (isNull || sizes[i] == 0) { + offsets[i] = 0 + } else { + offset := sampleOffset(offsetBase) + if offset > valuesLength-sizes[i] { + offset = valuesLength - sizes[i] + } + offsets[i] = offset + } + offsetBase += avgLength + } + + return memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(offsets)) +} + +// Generate a random data for ListView or LargeListView arrays. +func (r *RandomArrayGenerator) genListViewData(dt arrow.VarLenListLikeType, length int64, + minLength, maxLength int, nullprob float64, + forceEmptyNulls bool, zeroUndefinedOffsets bool) arrow.ArrayData { + offsetByteWidth := dt.Layout().Buffers[1].ByteWidth + var lengths arrow.Array + if offsetByteWidth == 4 { + lengths = r.Int32(length, int32(minLength), int32(maxLength), nullprob) + } else { + lengths = r.Int64(length, int64(minLength), int64(maxLength), nullprob) + } + defer lengths.Release() + + // List-views don't have to be disjoint, so let's make the valuesLength a + // multiple of the average list-view size. To make sure every list view + // into the values array can fit, it should be at least maxLength. + avgLength := minLength + (maxLength-minLength)/2 + valuesLength := int64(avgLength) * (length - int64(lengths.NullN())) + if valuesLength < int64(maxLength) { + valuesLength = int64(maxLength) + } + debug.Assert(offsetByteWidth == 8 || valuesLength < math.MaxInt32, + "valuesLength must be less than math.MaxInt32") + + values := r.ArrayOf(dt.Elem().ID(), int64(valuesLength), 0.0) + defer values.Release() + + var offsets *memory.Buffer + if offsetByteWidth == 4 { + lengths32 := lengths.(*array.Int32) + offsets = viewOffsetsFromLengthsArray32(r.seed, int32(avgLength), int32(valuesLength), lengths32, + forceEmptyNulls, zeroUndefinedOffsets) + } else { + lengths64 := lengths.(*array.Int64) + offsets = viewOffsetsFromLengthsArray64(r.seed, int64(avgLength), int64(valuesLength), lengths64, + forceEmptyNulls, zeroUndefinedOffsets) + } + defer offsets.Release() + + buffers := []*memory.Buffer{ + memory.NewBufferBytes(lengths.NullBitmapBytes()), + offsets, + memory.NewBufferBytes(lengths.Data().Buffers()[1].Bytes()), + } + childData := []arrow.ArrayData{values.Data()} + return array.NewData(dt, int(length), buffers, childData, int(lengths.NullN()), 0) +} + +func (r *RandomArrayGenerator) ListView(dt arrow.VarLenListLikeType, length int64, + minLength, maxLength int32, nullprob float64) *array.ListView { + forceEmptyNulls := false + zeroUndefineOffsets := false + data := r.genListViewData(dt, length, int(minLength), int(maxLength), nullprob, + forceEmptyNulls, zeroUndefineOffsets) + defer data.Release() + return array.NewListViewData(data) +} + +func (r *RandomArrayGenerator) LargeListView(dt arrow.VarLenListLikeType, length int64, + minLength, maxLength int64, nullprob float64) *array.LargeListView { + forceEmptyNulls := false + zeroUndefineOffsets := false + data := r.genListViewData(dt, length, int(minLength), int(maxLength), nullprob, + forceEmptyNulls, zeroUndefineOffsets) + defer data.Release() + return array.NewLargeListViewData(data) +} + func (r *RandomArrayGenerator) ArrayOf(dt arrow.Type, size int64, nullprob float64) arrow.Array { switch dt { case arrow.BOOL: diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go index 7d799149c2a69..10cb2cae764e6 100644 --- a/go/arrow/ipc/file_reader.go +++ b/go/arrow/ipc/file_reader.go @@ -485,6 +485,12 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) arrow.ArrayData { case *arrow.LargeListType: return ctx.loadList(dt) + case *arrow.ListViewType: + return ctx.loadListView(dt) + + case *arrow.LargeListViewType: + return ctx.loadListView(dt) + case *arrow.FixedSizeListType: return ctx.loadFixedSizeList(dt) @@ -606,6 +612,17 @@ func (ctx *arrayLoaderContext) loadList(dt arrow.ListLikeType) arrow.ArrayData { return array.NewData(dt, int(field.Length()), buffers, []arrow.ArrayData{sub}, int(field.NullCount()), 0) } +func (ctx *arrayLoaderContext) loadListView(dt arrow.VarLenListLikeType) arrow.ArrayData { + field, buffers := ctx.loadCommon(dt.ID(), 3) + buffers = append(buffers, ctx.buffer(), ctx.buffer()) + defer releaseBuffers(buffers) + + sub := ctx.loadChild(dt.Elem()) + defer sub.Release() + + return array.NewData(dt, int(field.Length()), buffers, []arrow.ArrayData{sub}, int(field.NullCount()), 0) +} + func (ctx *arrayLoaderContext) loadFixedSizeList(dt *arrow.FixedSizeListType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 1) defer releaseBuffers(buffers) diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go index 5c5e41833aea1..9bab47d6fa0cd 100644 --- a/go/arrow/ipc/metadata.go +++ b/go/arrow/ipc/metadata.go @@ -386,6 +386,18 @@ func (fv *fieldVisitor) visit(field arrow.Field) { flatbuf.LargeListStart(fv.b) fv.offset = flatbuf.LargeListEnd(fv.b) + case *arrow.ListViewType: + fv.dtype = flatbuf.TypeListView + fv.kids = append(fv.kids, fieldToFB(fv.b, fv.pos.Child(0), dt.ElemField(), fv.memo)) + flatbuf.ListViewStart(fv.b) + fv.offset = flatbuf.ListViewEnd(fv.b) + + case *arrow.LargeListViewType: + fv.dtype = flatbuf.TypeLargeListView + fv.kids = append(fv.kids, fieldToFB(fv.b, fv.pos.Child(0), dt.ElemField(), fv.memo)) + flatbuf.LargeListViewStart(fv.b) + fv.offset = flatbuf.LargeListViewEnd(fv.b) + case *arrow.FixedSizeListType: fv.dtype = flatbuf.TypeFixedSizeList fv.kids = append(fv.kids, fieldToFB(fv.b, fv.pos.Child(0), dt.ElemField(), fv.memo)) @@ -718,6 +730,20 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr dt := arrow.LargeListOfField(children[0]) return dt, nil + case flatbuf.TypeListView: + if len(children) != 1 { + return nil, fmt.Errorf("arrow/ipc: ListView must have exactly 1 child field (got=%d)", len(children)) + } + dt := arrow.ListViewOfField(children[0]) + return dt, nil + + case flatbuf.TypeLargeListView: + if len(children) != 1 { + return nil, fmt.Errorf("arrow/ipc: LargeListView must have exactly 1 child field (got=%d)", len(children)) + } + dt := arrow.LargeListViewOfField(children[0]) + return dt, nil + case flatbuf.TypeFixedSizeList: var dt flatbuf.FixedSizeList dt.Init(data.Bytes, data.Pos) diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go index 7866ec2b41011..a97f47ef4aa43 100644 --- a/go/arrow/ipc/writer.go +++ b/go/arrow/ipc/writer.go @@ -577,10 +577,7 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { case *arrow.BinaryType, *arrow.LargeBinaryType, *arrow.StringType, *arrow.LargeStringType: arr := arr.(array.BinaryLike) - voffsets, err := w.getZeroBasedValueOffsets(arr) - if err != nil { - return fmt.Errorf("could not retrieve zero-based value offsets from %T: %w", arr, err) - } + voffsets := w.getZeroBasedValueOffsets(arr) data := arr.Data() values := data.Buffers()[2] @@ -687,10 +684,7 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { w.depth++ case *arrow.MapType, *arrow.ListType, *arrow.LargeListType: arr := arr.(array.ListLike) - voffsets, err := w.getZeroBasedValueOffsets(arr) - if err != nil { - return fmt.Errorf("could not retrieve zero-based value offsets for array %T: %w", arr, err) - } + voffsets := w.getZeroBasedValueOffsets(arr) p.body = append(p.body, voffsets) w.depth-- @@ -716,7 +710,52 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { values = array.NewSlice(values, values_offset, values_end) mustRelease = true } - err = w.visit(p, values) + err := w.visit(p, values) + + if err != nil { + return fmt.Errorf("could not visit list element for array %T: %w", arr, err) + } + w.depth++ + + case *arrow.ListViewType, *arrow.LargeListViewType: + data := arr.Data() + arr := arr.(array.VarLenListLike) + offsetTraits := arr.DataType().(arrow.OffsetsDataType).OffsetTypeTraits() + rngOff, rngLen := array.RangeOfValuesUsed(arr) + voffsets := w.getValueOffsetsAtBaseValue(arr, rngOff) + p.body = append(p.body, voffsets) + + vsizes := data.Buffers()[2] + if vsizes != nil { + if data.Offset() != 0 || vsizes.Len() > offsetTraits.BytesRequired(arr.Len()) { + beg := offsetTraits.BytesRequired(data.Offset()) + end := beg + offsetTraits.BytesRequired(data.Len()) + vsizes = memory.NewBufferBytes(vsizes.Bytes()[beg:end]) + } else { + vsizes.Retain() + } + } + p.body = append(p.body, vsizes) + + w.depth-- + var ( + values = arr.ListValues() + mustRelease = false + values_offset = int64(rngOff) + values_end = int64(rngOff + rngLen) + ) + defer func() { + if mustRelease { + values.Release() + } + }() + + if arr.Len() > 0 && values_end < int64(values.Len()) { + // must also slice the values + values = array.NewSlice(values, values_offset, values_end) + mustRelease = true + } + err := w.visit(p, values) if err != nil { return fmt.Errorf("could not visit list element for array %T: %w", arr, err) @@ -764,19 +803,25 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { return nil } -func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) (*memory.Buffer, error) { +func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) *memory.Buffer { data := arr.Data() voffsets := data.Buffers()[1] offsetTraits := arr.DataType().(arrow.OffsetsDataType).OffsetTypeTraits() offsetBytesNeeded := offsetTraits.BytesRequired(data.Len() + 1) - if data.Offset() != 0 || offsetBytesNeeded < voffsets.Len() { - // if we have a non-zero offset, then the value offsets do not start at - // zero. we must a) create a new offsets array with shifted offsets and - // b) slice the values array accordingly - // - // or if there are more value offsets than values (the array has been sliced) - // we need to trim off the trailing offsets + if voffsets == nil || voffsets.Len() == 0 { + return nil + } + + // if we have a non-zero offset, then the value offsets do not start at + // zero. we must a) create a new offsets array with shifted offsets and + // b) slice the values array accordingly + // + // or if there are more value offsets than values (the array has been sliced) + // we need to trim off the trailing offsets + needsTruncateAndShift := data.Offset() != 0 || offsetBytesNeeded < voffsets.Len() + + if needsTruncateAndShift { shiftedOffsets := memory.NewResizableBuffer(w.mem) shiftedOffsets.Resize(offsetBytesNeeded) @@ -805,11 +850,65 @@ func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) (*memory.Buffe } else { voffsets.Retain() } + + return voffsets +} + +// Truncates the offsets if needed and shifts the values if minOffset > 0. +// The offsets returned are corrected assuming the child values are truncated +// and now start at minOffset. +// +// This function only works on offset buffers of ListViews and LargeListViews. +// TODO(felipecrv): Unify this with getZeroBasedValueOffsets. +func (w *recordEncoder) getValueOffsetsAtBaseValue(arr arrow.Array, minOffset int) *memory.Buffer { + data := arr.Data() + voffsets := data.Buffers()[1] + offsetTraits := arr.DataType().(arrow.OffsetsDataType).OffsetTypeTraits() + offsetBytesNeeded := offsetTraits.BytesRequired(data.Len()) + if voffsets == nil || voffsets.Len() == 0 { - return nil, nil + return nil + } + + needsTruncate := data.Offset() != 0 || offsetBytesNeeded < voffsets.Len() + needsShift := minOffset > 0 + + if needsTruncate || needsShift { + shiftedOffsets := memory.NewResizableBuffer(w.mem) + shiftedOffsets.Resize(offsetBytesNeeded) + + switch arr.DataType().Layout().Buffers[1].ByteWidth { + case 8: + dest := arrow.Int64Traits.CastFromBytes(shiftedOffsets.Bytes()) + offsets := arrow.Int64Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()] + + if minOffset > 0 { + for i, o := range offsets { + dest[i] = o - int64(minOffset) + } + } else { + copy(dest, offsets) + } + default: + debug.Assert(arr.DataType().Layout().Buffers[1].ByteWidth == 4, "invalid offset bytewidth") + dest := arrow.Int32Traits.CastFromBytes(shiftedOffsets.Bytes()) + offsets := arrow.Int32Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()] + + if minOffset > 0 { + for i, o := range offsets { + dest[i] = o - int32(minOffset) + } + } else { + copy(dest, offsets) + } + } + + voffsets = shiftedOffsets + } else { + voffsets.Retain() } - return voffsets, nil + return voffsets } func (w *recordEncoder) rebaseDenseUnionValueOffsets(arr *array.DenseUnion, offsets, lengths []int32) *memory.Buffer { diff --git a/go/arrow/ipc/writer_test.go b/go/arrow/ipc/writer_test.go index 47aa29db91082..da461c3d52272 100644 --- a/go/arrow/ipc/writer_test.go +++ b/go/arrow/ipc/writer_test.go @@ -112,16 +112,14 @@ func TestGetZeroBasedValueOffsets(t *testing.T) { env := &recordEncoder{mem: alloc} - offsets, err := env.getZeroBasedValueOffsets(arr) - require.NoError(t, err) + offsets := env.getZeroBasedValueOffsets(arr) defer offsets.Release() assert.Equal(t, 44, offsets.Len(), "include all offsets if array is not sliced") sl := array.NewSlice(arr, 0, 4) defer sl.Release() - offsets, err = env.getZeroBasedValueOffsets(sl) - require.NoError(t, err) + offsets = env.getZeroBasedValueOffsets(sl) defer offsets.Release() assert.Equal(t, 20, offsets.Len(), "trim trailing offsets after slice") } diff --git a/go/arrow/type_string.go b/go/arrow/type_string.go index 41a407386357a..ee3ccb7ef9f0a 100644 --- a/go/arrow/type_string.go +++ b/go/arrow/type_string.go @@ -47,11 +47,15 @@ func _() { _ = x[LARGE_LIST-36] _ = x[INTERVAL_MONTH_DAY_NANO-37] _ = x[RUN_END_ENCODED-38] + _ = x[STRING_VIEW-39] + _ = x[BINARY_VIEW-40] + _ = x[LIST_VIEW-41] + _ = x[LARGE_LIST_VIEW-42] } -const _Type_name = "NULLBOOLUINT8INT8UINT16INT16UINT32INT32UINT64INT64FLOAT16FLOAT32FLOAT64STRINGBINARYFIXED_SIZE_BINARYDATE32DATE64TIMESTAMPTIME32TIME64INTERVAL_MONTHSINTERVAL_DAY_TIMEDECIMAL128DECIMAL256LISTSTRUCTSPARSE_UNIONDENSE_UNIONDICTIONARYMAPEXTENSIONFIXED_SIZE_LISTDURATIONLARGE_STRINGLARGE_BINARYLARGE_LISTINTERVAL_MONTH_DAY_NANORUN_END_ENCODED" +const _Type_name = "NULLBOOLUINT8INT8UINT16INT16UINT32INT32UINT64INT64FLOAT16FLOAT32FLOAT64STRINGBINARYFIXED_SIZE_BINARYDATE32DATE64TIMESTAMPTIME32TIME64INTERVAL_MONTHSINTERVAL_DAY_TIMEDECIMAL128DECIMAL256LISTSTRUCTSPARSE_UNIONDENSE_UNIONDICTIONARYMAPEXTENSIONFIXED_SIZE_LISTDURATIONLARGE_STRINGLARGE_BINARYLARGE_LISTINTERVAL_MONTH_DAY_NANORUN_END_ENCODEDSTRING_VIEWBINARY_VIEWLIST_VIEWLARGE_LIST_VIEW" -var _Type_index = [...]uint16{0, 4, 8, 13, 17, 23, 28, 34, 39, 45, 50, 57, 64, 71, 77, 83, 100, 106, 112, 121, 127, 133, 148, 165, 175, 185, 189, 195, 207, 218, 228, 231, 240, 255, 263, 275, 287, 297, 320, 335} +var _Type_index = [...]uint16{0, 4, 8, 13, 17, 23, 28, 34, 39, 45, 50, 57, 64, 71, 77, 83, 100, 106, 112, 121, 127, 133, 148, 165, 175, 185, 189, 195, 207, 218, 228, 231, 240, 255, 263, 275, 287, 297, 320, 335, 346, 357, 366, 381} func (i Type) String() string { if i < 0 || i >= Type(len(_Type_index)-1) {