Skip to content

Commit

Permalink
ARROW-17691: [Go] Implement Take for Primitive Types (#14101)
Browse files Browse the repository at this point in the history
Authored-by: Matt Topol <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
  • Loading branch information
zeroshade authored Sep 14, 2022
1 parent 29225ac commit 81f3945
Show file tree
Hide file tree
Showing 8 changed files with 618 additions and 12 deletions.
18 changes: 14 additions & 4 deletions go/arrow/compute/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,20 @@ type ctxExecKey struct{}

const DefaultMaxChunkSize = math.MaxInt64

// global default ExecCtx object, initialized with the
// default max chunk size, contiguous preallocations, and
// the default function registry.
var defaultExecCtx ExecCtx
var (
// global default ExecCtx object, initialized with the
// default max chunk size, contiguous preallocations, and
// the default function registry.
defaultExecCtx ExecCtx

// WithAllocator returns a new context with the provided allocator
// embedded into the context.
WithAllocator = exec.WithAllocator
// GetAllocator retrieves the allocator from the context, or returns
// memory.DefaultAllocator if there was no allocator in the provided
// context.
GetAllocator = exec.GetAllocator
)

func init() {
defaultExecCtx.ChunkSize = DefaultMaxChunkSize
Expand Down
14 changes: 9 additions & 5 deletions go/arrow/compute/expression.go
Original file line number Diff line number Diff line change
Expand Up @@ -484,17 +484,21 @@ const (
SelectionDropNulls = kernels.DropNulls
)

type FilterOptions = kernels.FilterOptions

func DefaultFilterOptions() *FilterOptions { return &FilterOptions{} }

type ArithmeticOptions struct {
CheckOverflow bool `compute:"check_overflow"`
}

func (ArithmeticOptions) TypeName() string { return "ArithmeticOptions" }

type CastOptions = kernels.CastOptions
type (
CastOptions = kernels.CastOptions
FilterOptions = kernels.FilterOptions
TakeOptions = kernels.TakeOptions
)

func DefaultFilterOptions() *FilterOptions { return &FilterOptions{} }

func DefaultTakeOptions() *TakeOptions { return &TakeOptions{BoundsCheck: true} }

func DefaultCastOptions(safe bool) *CastOptions {
if safe {
Expand Down
2 changes: 1 addition & 1 deletion go/arrow/compute/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ type MetaFunction struct {
// impl for dispatching with the expected arity.
//
// Will panic if impl is nil.
func NewMetaFunction(name string, arity Arity, doc FunctionDoc, impl MetaFunctionImpl) Function {
func NewMetaFunction(name string, arity Arity, doc FunctionDoc, impl MetaFunctionImpl) *MetaFunction {
if impl == nil {
panic("arrow/compute: cannot construct MetaFunction with nil impl")
}
Expand Down
54 changes: 54 additions & 0 deletions go/arrow/compute/internal/kernels/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -645,3 +645,57 @@ func (b *bufferBuilder[T]) appendSlice(values []T) {
func (b *bufferBuilder[T]) cap() int {
return cap(b.data) / int(unsafe.Sizeof(b.zero))
}

func checkIndexBoundsImpl[T exec.IntTypes | exec.UintTypes](values *exec.ArraySpan, upperLimit uint64) error {
// for unsigned integers, if the values array is larger
// than the maximum index value, then there's no need to bounds check
isSigned := !arrow.IsUnsignedInteger(values.Type.ID())
if !isSigned && upperLimit > uint64(MaxOf[T]()) {
return nil
}

valuesData := exec.GetSpanValues[T](values, 1)
bitmap := values.Buffers[0].Buf
isOutOfBounds := func(val T) bool {
return ((isSigned && val < 0) || val >= 0 && uint64(val) >= upperLimit)
}
return bitutils.VisitSetBitRuns(bitmap, values.Offset, values.Len,
func(pos, length int64) error {
outOfBounds := false
for i := int64(0); i < length; i++ {
outOfBounds = outOfBounds || isOutOfBounds(valuesData[pos+i])
}
if outOfBounds {
for i := int64(0); i < length; i++ {
if isOutOfBounds(valuesData[pos+i]) {
return fmt.Errorf("%w: %d out of bounds",
arrow.ErrIndex, valuesData[pos+i])
}
}
}
return nil
})
}

func checkIndexBounds(values *exec.ArraySpan, upperLimit uint64) error {
switch values.Type.ID() {
case arrow.INT8:
return checkIndexBoundsImpl[int8](values, upperLimit)
case arrow.UINT8:
return checkIndexBoundsImpl[uint8](values, upperLimit)
case arrow.INT16:
return checkIndexBoundsImpl[int16](values, upperLimit)
case arrow.UINT16:
return checkIndexBoundsImpl[uint16](values, upperLimit)
case arrow.INT32:
return checkIndexBoundsImpl[int32](values, upperLimit)
case arrow.UINT32:
return checkIndexBoundsImpl[uint32](values, upperLimit)
case arrow.INT64:
return checkIndexBoundsImpl[int64](values, upperLimit)
case arrow.UINT64:
return checkIndexBoundsImpl[uint64](values, upperLimit)
default:
return fmt.Errorf("%w: invalid index type for bounds checking", arrow.ErrInvalid)
}
}
Loading

0 comments on commit 81f3945

Please sign in to comment.