Skip to content

Commit

Permalink
ARROW-7921: [Go] Add Reset method to various components and clean up …
Browse files Browse the repository at this point in the history
…comments.

The reset method allow the data structures to be re-used so they don't have to be allocated over and over again.

Closes #6430 from richardartoul/ra/merge-upstream and squashes the following commits:

5a0828187 <Richard Artoul> Add license to test file
d76be05ca <Richard Artoul> Add test for data reset
d102b1fff <Richard Artoul> Add tests
d3e6e6785 <Richard Artoul> cleanup comments
c8525aece <Richard Artoul> Add Reset method to int array (#5)
489ca2593 <Richard Artoul> Fix array.setData() to retain before release (#4)
88cd05ff8 <Richard Artoul> Add reset method to Data (#3)
6d1b2775c <Richard Artoul> Add Reset() method to String array (#2)
dca230383 <Richard Artoul> Add Reset method to buffer and cleanup comments (#1)

Lead-authored-by: Richard Artoul <[email protected]>
Co-authored-by: Richard Artoul <[email protected]>
Signed-off-by: Sebastien Binet <[email protected]>
  • Loading branch information
2 people authored and kou committed Aug 30, 2024
1 parent a95eb18 commit e324c33
Show file tree
Hide file tree
Showing 9 changed files with 358 additions and 26 deletions.
4 changes: 3 additions & 1 deletion arrow/array/array.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,13 @@ func (a *array) IsValid(i int) bool {
}

func (a *array) setData(data *Data) {
// Retain before releasing in case a.data is the same as data.
data.Retain()

if a.data != nil {
a.data.Release()
}

data.Retain()
if len(data.buffers) > 0 && data.buffers[0] != nil {
a.nullBitmapBytes = data.buffers[0].Bytes()
}
Expand Down
56 changes: 51 additions & 5 deletions arrow/array/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
"github.com/apache/arrow/go/arrow/memory"
)

// A type which represents the memory and metadata for an Arrow array.
// Data represents the memory and metadata of an Arrow array.
type Data struct {
refCount int64
dtype arrow.DataType
Expand All @@ -35,6 +35,7 @@ type Data struct {
childData []*Data // TODO(sgc): managed by ListArray, StructArray and UnionArray types
}

// NewData creates a new Data.
func NewData(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []*Data, nulls, offset int) *Data {
for _, b := range buffers {
if b != nil {
Expand All @@ -59,6 +60,42 @@ func NewData(dtype arrow.DataType, length int, buffers []*memory.Buffer, childDa
}
}

// Reset sets the Data for re-use.
func (d *Data) Reset(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []*Data, nulls, offset int) {
// Retain new buffers before releasing existing buffers in-case they're the same ones to prevent accidental premature
// release.
for _, b := range buffers {
if b != nil {
b.Retain()
}
}
for _, b := range d.buffers {
if b != nil {
b.Release()
}
}
d.buffers = buffers

// Retain new children data before releasing existing children data in-case they're the same ones to prevent accidental
// premature release.
for _, d := range childData {
if d != nil {
d.Retain()
}
}
for _, d := range d.childData {
if d != nil {
d.Release()
}
}
d.childData = childData

d.dtype = dtype
d.length = length
d.nulls = nulls
d.offset = offset
}

// Retain increases the reference count by 1.
// Retain may be called simultaneously from multiple goroutines.
func (d *Data) Retain() {
Expand All @@ -85,10 +122,19 @@ func (d *Data) Release() {
}
}

func (d *Data) DataType() arrow.DataType { return d.dtype }
func (d *Data) NullN() int { return d.nulls }
func (d *Data) Len() int { return d.length }
func (d *Data) Offset() int { return d.offset }
// DataType returns the DataType of the data.
func (d *Data) DataType() arrow.DataType { return d.dtype }

// NullN returns the number of nulls.
func (d *Data) NullN() int { return d.nulls }

// Len returns the length.
func (d *Data) Len() int { return d.length }

// Offset returns the offset.
func (d *Data) Offset() int { return d.offset }

// Buffers returns the buffers.
func (d *Data) Buffers() []*memory.Buffer { return d.buffers }

// NewSliceData returns a new slice that shares backing data with the input.
Expand Down
51 changes: 51 additions & 0 deletions arrow/array/data_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package array

import (
"testing"

"github.com/apache/arrow/go/arrow"
"github.com/apache/arrow/go/arrow/memory"
"github.com/stretchr/testify/assert"
)

func TestDataReset(t *testing.T) {
var (
buffers1 = make([]*memory.Buffer, 0, 3)
buffers2 = make([]*memory.Buffer, 0, 3)
)
for i := 0; i < cap(buffers1); i++ {
buffers1 = append(buffers1, memory.NewBufferBytes([]byte("some-bytes1")))
buffers2 = append(buffers2, memory.NewBufferBytes([]byte("some-bytes2")))
}

data := NewData(&arrow.StringType{}, 10, buffers1, nil, 0, 0)
data.Reset(&arrow.Int64Type{}, 5, buffers2, nil, 1, 2)

for i := 0; i < 2; i++ {
assert.Equal(t, buffers2, data.Buffers())
assert.Equal(t, &arrow.Int64Type{}, data.DataType())
assert.Equal(t, 1, data.NullN())
assert.Equal(t, 2, data.Offset())
assert.Equal(t, 5, data.Len())

// Make sure it works when resetting the data with its own buffers (new buffers are retained
// before old ones are released.)
data.Reset(&arrow.Int64Type{}, 5, data.Buffers(), nil, 1, 2)
}
}
Loading

0 comments on commit e324c33

Please sign in to comment.