From b8eae609a3e27ee6a1748183d7cf4785dae55b40 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Wed, 2 Oct 2024 14:29:35 -0400 Subject: [PATCH 01/38] GH-41891: [C++] Clean up implicit fallthrough warnings (#41892) ### Rationale for this change Helps clean up warnings, and at least one of these looks like a subtle bug that may confuse developers ### What changes are included in this PR? Added break statements where case statements were previously falling through ### Are these changes tested? Builds cleanly ### Are there any user-facing changes? No * GitHub Issue: #41891 Authored-by: Will Ayd Signed-off-by: Will Ayd --- cpp/cmake_modules/SetupCxxFlags.cmake | 1 + cpp/src/arrow/c/bridge.cc | 1 + cpp/src/arrow/util/utf8_internal.h | 7 +++++++ cpp/src/gandiva/precompiled/hash.cc | 16 ++++++++++++++++ python/pyarrow/tests/test_cffi.py | 2 +- 5 files changed, 26 insertions(+), 1 deletion(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index e2e1c4412abd0..fd26dc7dd9c79 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -331,6 +331,7 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-conversion") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-sign-conversion") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdate-time") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wimplicit-fallthrough") string(APPEND CXX_ONLY_FLAGS " -Wredundant-move") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wunused-result") elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index 4f9095182f90c..0976a5cb615f4 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -2333,6 +2333,7 @@ class ArrayStreamReader { break; case ENOSYS: code = StatusCode::NotImplemented; + break; default: code = StatusCode::IOError; break; diff --git a/cpp/src/arrow/util/utf8_internal.h b/cpp/src/arrow/util/utf8_internal.h index 335e875f7ae20..eb62f349ac568 100644 --- a/cpp/src/arrow/util/utf8_internal.h +++ b/cpp/src/arrow/util/utf8_internal.h @@ -183,18 +183,25 @@ static inline bool ValidateUTF8Inline(const uint8_t* data, int64_t size) { switch (size) { case 7: state = internal::ValidateOneUTF8Byte(data[size - 7], state); + [[fallthrough]]; case 6: state = internal::ValidateOneUTF8Byte(data[size - 6], state); + [[fallthrough]]; case 5: state = internal::ValidateOneUTF8Byte(data[size - 5], state); + [[fallthrough]]; case 4: state = internal::ValidateOneUTF8Byte(data[size - 4], state); + [[fallthrough]]; case 3: state = internal::ValidateOneUTF8Byte(data[size - 3], state); + [[fallthrough]]; case 2: state = internal::ValidateOneUTF8Byte(data[size - 2], state); + [[fallthrough]]; case 1: state = internal::ValidateOneUTF8Byte(data[size - 1], state); + [[fallthrough]]; default: break; } diff --git a/cpp/src/gandiva/precompiled/hash.cc b/cpp/src/gandiva/precompiled/hash.cc index eacf3623087a0..e312c9e634306 100644 --- a/cpp/src/gandiva/precompiled/hash.cc +++ b/cpp/src/gandiva/precompiled/hash.cc @@ -223,36 +223,50 @@ static inline gdv_uint64 murmur3_64_buf(const gdv_uint8* key, gdv_int32 len, switch (len & 15) { case 15: k2 = static_cast(tail[14]) << 48; + [[fallthrough]]; case 14: k2 ^= static_cast(tail[13]) << 40; + [[fallthrough]]; case 13: k2 ^= static_cast(tail[12]) << 32; + [[fallthrough]]; case 12: k2 ^= static_cast(tail[11]) << 24; + [[fallthrough]]; case 11: k2 ^= static_cast(tail[10]) << 16; + [[fallthrough]]; case 10: k2 ^= static_cast(tail[9]) << 8; + [[fallthrough]]; case 9: k2 ^= static_cast(tail[8]); k2 *= c2; k2 = rotate_left(k2, 33); k2 *= c1; h2 ^= k2; + [[fallthrough]]; case 8: k1 ^= static_cast(tail[7]) << 56; + [[fallthrough]]; case 7: k1 ^= static_cast(tail[6]) << 48; + [[fallthrough]]; case 6: k1 ^= static_cast(tail[5]) << 40; + [[fallthrough]]; case 5: k1 ^= static_cast(tail[4]) << 32; + [[fallthrough]]; case 4: k1 ^= static_cast(tail[3]) << 24; + [[fallthrough]]; case 3: k1 ^= static_cast(tail[2]) << 16; + [[fallthrough]]; case 2: k1 ^= static_cast(tail[1]) << 8; + [[fallthrough]]; case 1: k1 ^= static_cast(tail[0]) << 0; k1 *= c1; @@ -308,8 +322,10 @@ static gdv_uint32 murmur3_32_buf(const gdv_uint8* key, gdv_int32 len, gdv_int32 switch (len & 3) { case 3: lk1 = (tail[2] & 0xff) << 16; + [[fallthrough]]; case 2: lk1 |= (tail[1] & 0xff) << 8; + [[fallthrough]]; case 1: lk1 |= (tail[0] & 0xff); lk1 *= c1; diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py index e994a09f92ed2..84290a6b880ef 100644 --- a/python/pyarrow/tests/test_cffi.py +++ b/python/pyarrow/tests/test_cffi.py @@ -492,7 +492,7 @@ def gen(): original._export_to_c(ptr_stream) reader = pa.RecordBatchReader._import_from_c(ptr_stream) - with pytest.raises(OSError) as exc_info: + with pytest.raises(NotImplementedError) as exc_info: reader.read_next_batch() # inner *and* outer exception should be present From b754d5a4ef1c169771c17f88fca6558bfe6b8440 Mon Sep 17 00:00:00 2001 From: Sylvain Wallez Date: Thu, 3 Oct 2024 03:49:19 +0200 Subject: [PATCH 02/38] GH-30863: [JS] Use a singleton StructRow proxy handler (#44289) ### Rationale for this change Fixes #30863 by using a singleton proxy handler in `StructRow`'s constructor. Since the handler is stateless, there is no need to create a new instance for each row. ### What changes are included in this PR? Refactoring `StructRow`'s constructor to extract the proxy handler. ### Are these changes tested? No additional tests since this is an internal refactoring, but `yarn test` runs successfully. ### Are there any user-facing changes? No. * GitHub Issue: #30863 --- js/src/row/struct.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/js/src/row/struct.ts b/js/src/row/struct.ts index bc3869cb8d08f..074ec91fd64b3 100644 --- a/js/src/row/struct.ts +++ b/js/src/row/struct.ts @@ -39,7 +39,7 @@ export class StructRow { constructor(parent: Data>, rowIndex: number) { this[kParent] = parent; this[kRowIndex] = rowIndex; - return new Proxy(this, new StructRowProxyHandler()); + return new Proxy(this, structRowProxyHandler); } public toArray() { return Object.values(this.toJSON()); } @@ -157,3 +157,5 @@ class StructRowProxyHandler implements ProxyHandler Date: Thu, 3 Oct 2024 11:56:29 +0200 Subject: [PATCH 03/38] GH-43878: [Go][Release] Remove Go related codes from our release scripts (#44172) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change The Go implementation is moving to apache/arrow-go from go/ in apache/arrow. ### What changes are included in this PR? Remove go related release scripts from apache/arrow ### Are these changes tested? The source verification is tested via archery ### Are there any user-facing changes? Yes, Go will not be releases as part of Apache Arrow anymore. * GitHub Issue: #43878 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- dev/release/01-prepare-test.rb | 14 --- dev/release/post-12-bump-versions-test.rb | 75 -------------- dev/release/post-13-go.sh | 34 ------- .../{post-14-msys2.sh => post-13-msys2.sh} | 0 ...ost-15-homebrew.sh => post-14-homebrew.sh} | 0 .../{post-16-vcpkg.sh => post-15-vcpkg.sh} | 0 .../{post-17-conan.sh => post-16-conan.sh} | 0 dev/release/utils-prepare.sh | 15 --- dev/release/verify-release-candidate.sh | 97 +------------------ docs/source/developers/release.rst | 22 ++--- docs/source/index.rst | 2 +- r/_pkgdown.yml | 2 +- 12 files changed, 9 insertions(+), 252 deletions(-) delete mode 100755 dev/release/post-13-go.sh rename dev/release/{post-14-msys2.sh => post-13-msys2.sh} (100%) rename dev/release/{post-15-homebrew.sh => post-14-homebrew.sh} (100%) rename dev/release/{post-16-vcpkg.sh => post-15-vcpkg.sh} (100%) rename dev/release/{post-17-conan.sh => post-16-conan.sh} (100%) diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index fec99ef058c5b..ca53b7f8fdee5 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -218,20 +218,6 @@ def test_version_pre_tag ] end expected_changes += [ - { - path: "go/arrow/doc.go", - hunks: [ - ["-const PkgVersion = \"#{@snapshot_version}\"", - "+const PkgVersion = \"#{@release_version}\""], - ], - }, - { - path: "go/parquet/writer_properties.go", - hunks: [ - ["-\tDefaultCreatedBy = \"parquet-go version #{@snapshot_version}\"", - "+\tDefaultCreatedBy = \"parquet-go version #{@release_version}\""], - ], - }, { path: "js/package.json", hunks: [ diff --git a/dev/release/post-12-bump-versions-test.rb b/dev/release/post-12-bump-versions-test.rb index f31e1a3122814..9af334c496fe6 100644 --- a/dev/release/post-12-bump-versions-test.rb +++ b/dev/release/post-12-bump-versions-test.rb @@ -210,15 +210,6 @@ def test_version_post_tag ["+ (#{@next_major_version}, 0),"], ], }, - { - path: "docs/source/index.rst", - hunks: [ - [ - "- Go ", - "+ Go ", - ], - ], - }, { path: "r/pkgdown/assets/versions.json", hunks: [ @@ -234,15 +225,6 @@ def test_version_post_tag ], ], }, - { - path: "r/_pkgdown.yml", - hunks: [ - [ - "- [Go](https://pkg.go.dev/github.com/apache/arrow/go/v#{@snapshot_major_version})
", - "+ [Go](https://pkg.go.dev/github.com/apache/arrow/go/v#{@next_major_version})
", - ], - ], - }, ] else expected_changes += [ @@ -260,63 +242,6 @@ def test_version_post_tag ] end - Dir.glob("go/**/{go.mod,*.go,*.go.*,README.md}") do |path| - if path == "go/arrow/doc.go" - expected_changes << { - path: path, - hunks: [ - [ - "-const PkgVersion = \"#{@snapshot_version}\"", - "+const PkgVersion = \"#{@next_snapshot_version}\"", - ], - ] - } - next - end - - import_path = "github.com/apache/arrow/go/v#{@snapshot_major_version}" - hunks = [] - if next_release_type == :major - lines = File.readlines(path, chomp: true) - target_lines = lines.each_with_index.select do |line, i| - line.include?(import_path) - end - next if target_lines.empty? - n_context_lines = 3 # The default of Git's diff.context - target_hunks = [[target_lines.first[0]]] - previous_i = target_lines.first[1] - target_lines[1..-1].each do |line, i| - if i - previous_i < n_context_lines - target_hunks.last << line - else - target_hunks << [line] - end - previous_i = i - end - target_hunks.each do |lines| - hunk = [] - lines.each do |line,| - hunk << "-#{line}" - end - lines.each do |line| - new_line = line.gsub("v#{@snapshot_major_version}") do - "v#{@next_major_version}" - end - hunk << "+#{new_line}" - end - hunks << hunk - end - end - if path == "go/parquet/writer_properties.go" - hunks << [ - "-\tDefaultCreatedBy = \"parquet-go version #{@snapshot_version}\"", - "+\tDefaultCreatedBy = \"parquet-go version #{@next_snapshot_version}\"", - ] - end - next if hunks.empty? - expected_changes << {hunks: hunks, path: path} - end - Dir.glob("java/**/pom.xml") do |path| version = "#{@snapshot_version}" lines = File.readlines(path, chomp: true) diff --git a/dev/release/post-13-go.sh b/dev/release/post-13-go.sh deleted file mode 100755 index 7c60348379564..0000000000000 --- a/dev/release/post-13-go.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -set -ue - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -if [ "$#" -ne 1 ]; then - echo "Usage: $0 " - exit -fi - -version=$1 -version_tag="apache-arrow-${version}" -go_arrow_tag="go/v${version}" - -git tag "${go_arrow_tag}" "${version_tag}" -git push apache "${go_arrow_tag}" diff --git a/dev/release/post-14-msys2.sh b/dev/release/post-13-msys2.sh similarity index 100% rename from dev/release/post-14-msys2.sh rename to dev/release/post-13-msys2.sh diff --git a/dev/release/post-15-homebrew.sh b/dev/release/post-14-homebrew.sh similarity index 100% rename from dev/release/post-15-homebrew.sh rename to dev/release/post-14-homebrew.sh diff --git a/dev/release/post-16-vcpkg.sh b/dev/release/post-15-vcpkg.sh similarity index 100% rename from dev/release/post-16-vcpkg.sh rename to dev/release/post-15-vcpkg.sh diff --git a/dev/release/post-17-conan.sh b/dev/release/post-16-conan.sh similarity index 100% rename from dev/release/post-17-conan.sh rename to dev/release/post-16-conan.sh diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index 6ba8b22a06e89..ecdd0a26dcb7a 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -179,21 +179,6 @@ update_versions() { git add */*/*/version.rb popd - pushd "${ARROW_DIR}/go" - find . "(" -name "*.go*" -o -name "go.mod" -o -name README.md ")" -exec sed -i.bak -E -e \ - "s|(github\\.com/apache/arrow/go)/v[0-9]+|\1/v${major_version}|g" {} \; - # update parquet writer version - sed -i.bak -E -e \ - "s/\"parquet-go version .+\"/\"parquet-go version ${version}\"/" \ - parquet/writer_properties.go - sed -i.bak -E -e \ - "s/const PkgVersion = \".*/const PkgVersion = \"${version}\"/" \ - arrow/doc.go - - find . -name "*.bak" -exec rm {} \; - git add . - popd - pushd "${ARROW_DIR}/docs/source" # godoc link must reference current version, will reference v0.0.0 (2018) otherwise sed -i.bak -E -e \ diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index c1419b30d8de7..4e5593525477e 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -389,55 +389,6 @@ install_csharp() { CSHARP_ALREADY_INSTALLED=1 } -install_go() { - # Install go - if [ "${GO_ALREADY_INSTALLED:-0}" -gt 0 ]; then - show_info "$(go version) already installed at $(which go)" - return 0 - fi - - if command -v go > /dev/null; then - show_info "Found $(go version) at $(command -v go)" - export GOPATH=${ARROW_TMPDIR}/gopath - mkdir -p $GOPATH - return 0 - fi - - local version=1.22.6 - show_info "Installing go version ${version}..." - - local arch="$(uname -m)" - if [ "$arch" == "x86_64" ]; then - arch=amd64 - elif [ "$arch" == "aarch64" ]; then - arch=arm64 - fi - - if [ "$(uname)" == "Darwin" ]; then - local os=darwin - else - local os=linux - fi - - local archive="go${version}.${os}-${arch}.tar.gz" - curl -sLO https://go.dev/dl/$archive - - ls -l - local prefix=${ARROW_TMPDIR}/go - mkdir -p $prefix - tar -xzf $archive -C $prefix - rm -f $archive - - export GOROOT=${prefix}/go - export GOPATH=${prefix}/gopath - export PATH=$GOROOT/bin:$GOPATH/bin:$PATH - - mkdir -p $GOPATH - show_info "$(go version) installed at $(which go)" - - GO_ALREADY_INSTALLED=1 -} - install_conda() { # Setup short-lived miniconda for Python and integration tests show_info "Ensuring that Conda is installed..." @@ -586,13 +537,6 @@ maybe_setup_virtualenv() { fi } -maybe_setup_go() { - show_info "Ensuring that Go is installed..." - if [ "${USE_CONDA}" -eq 0 ]; then - install_go - fi -} - maybe_setup_nodejs() { show_info "Ensuring that NodeJS is installed..." if [ "${USE_CONDA}" -eq 0 ]; then @@ -951,38 +895,6 @@ test_js() { popd } -test_go() { - show_header "Build and test Go libraries" - - maybe_setup_go - maybe_setup_conda compilers go=1.22 - - pushd go - go get -v ./... - if [ ${TEST_GO} -gt 0 ]; then - go test ./... - fi - go install -buildvcs=false ./... - if [ ${TEST_INTEGRATION_GO} -gt 0 ]; then - pushd arrow/internal/cdata_integration - case "$(uname)" in - Linux) - go_lib="arrow_go_integration.so" - ;; - Darwin) - go_lib="arrow_go_integration.dylib" - ;; - MINGW*) - go_lib="arrow_go_integration.dll" - ;; - esac - CGO_ENABLED=1 go build -buildvcs=false -tags cdata_integration,assert -buildmode=c-shared -o ${go_lib} . - popd - fi - go clean -modcache - popd -} - # Run integration tests test_integration() { show_header "Build and execute integration tests" @@ -1011,7 +923,6 @@ test_integration() { --with-cpp=${TEST_INTEGRATION_CPP} \ --with-java=${TEST_INTEGRATION_JAVA} \ --with-js=${TEST_INTEGRATION_JS} \ - --with-go=${TEST_INTEGRATION_GO} \ $INTEGRATION_TEST_ARGS } @@ -1090,9 +1001,6 @@ test_source_distribution() { pushd $ARROW_SOURCE_DIR - if [ ${BUILD_GO} -gt 0 ]; then - test_go - fi if [ ${TEST_CSHARP} -gt 0 ]; then test_csharp fi @@ -1289,22 +1197,19 @@ test_jars() { : ${TEST_RUBY:=${TEST_SOURCE}} : ${TEST_PYTHON:=${TEST_SOURCE}} : ${TEST_JS:=${TEST_SOURCE}} -: ${TEST_GO:=${TEST_SOURCE}} : ${TEST_INTEGRATION:=${TEST_SOURCE}} # For selective Integration testing, set TEST_DEFAULT=0 TEST_INTEGRATION_X=1 TEST_INTEGRATION_Y=1 : ${TEST_INTEGRATION_CPP:=${TEST_INTEGRATION}} : ${TEST_INTEGRATION_JAVA:=${TEST_INTEGRATION}} : ${TEST_INTEGRATION_JS:=${TEST_INTEGRATION}} -: ${TEST_INTEGRATION_GO:=${TEST_INTEGRATION}} # Automatically build/test if its activated by a dependent TEST_GLIB=$((${TEST_GLIB} + ${TEST_RUBY})) BUILD_CPP=$((${TEST_CPP} + ${TEST_GLIB} + ${TEST_PYTHON} + ${TEST_INTEGRATION_CPP})) BUILD_JAVA=$((${TEST_JAVA} + ${TEST_INTEGRATION_JAVA})) BUILD_JS=$((${TEST_JS} + ${TEST_INTEGRATION_JS})) -BUILD_GO=$((${TEST_GO} + ${TEST_INTEGRATION_GO})) -TEST_INTEGRATION=$((${TEST_INTEGRATION} + ${TEST_INTEGRATION_CPP} + ${TEST_INTEGRATION_JAVA} + ${TEST_INTEGRATION_JS} + ${TEST_INTEGRATION_GO})) +TEST_INTEGRATION=$((${TEST_INTEGRATION} + ${TEST_INTEGRATION_CPP} + ${TEST_INTEGRATION_JAVA} + ${TEST_INTEGRATION_JS})) # Execute tests in a conda environment : ${USE_CONDA:=0} diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst index 0d9af1f543cac..55f3d5603ed05 100644 --- a/docs/source/developers/release.rst +++ b/docs/source/developers/release.rst @@ -429,8 +429,8 @@ Be sure to go through on the following checklist: git remote add git@github.com:/homebrew-core.git cd - - # dev/release/post-15-homebrew.sh 10.0.0 kou - dev/release/post-15-homebrew.sh X.Y.Z + # dev/release/post-14-homebrew.sh 10.0.0 kou + dev/release/post-14-homebrew.sh X.Y.Z This script pushes a ``apache-arrow-X.Y.Z`` branch to your ``Homebrew/homebrew-core`` fork. You need to create a pull request from the ``apache-arrow-X.Y.Z`` branch with ``apache-arrow, apache-arrow-glib: X.Y.Z`` title on your Web browser. @@ -455,8 +455,8 @@ Be sure to go through on the following checklist: git remote add upstream https://github.com/msys2/MINGW-packages.git cd - - # dev/release/post-14-msys2.sh 10.0.0 ../MINGW-packages - dev/release/post-14-msys2.sh X.Y.Z + # dev/release/post-13-msys2.sh 10.0.0 ../MINGW-packages + dev/release/post-13-msys2.sh X.Y.Z This script pushes a ``arrow-X.Y.Z`` branch to your ``msys2/MINGW-packages`` fork. You need to create a pull request from the ``arrow-X.Y.Z`` branch with ``arrow: Update to X.Y.Z`` title on your Web browser. @@ -602,8 +602,8 @@ Be sure to go through on the following checklist: git remote add upstream https://github.com/microsoft/vcpkg.git cd - - # dev/release/post-16-vcpkg.sh 10.0.0 ../vcpkg - dev/release/post-16-vcpkg.sh X.Y.Z + # dev/release/post-15-vcpkg.sh 10.0.0 ../vcpkg + dev/release/post-15-vcpkg.sh X.Y.Z This script pushes a ``arrow-X.Y.Z`` branch to your ``microsoft/vcpkg`` fork. You need to create a pull request from the ``arrow-X.Y.Z`` branch with ``[arrow] Update to X.Y.Z`` title on your Web browser. @@ -646,16 +646,6 @@ Be sure to go through on the following checklist: # dev/release/post-12-bump-versions.sh 10.0.0 11.0.0 dev/release/post-12-bump-versions.sh X.Y.Z NEXT_X.NEXT_Y.NEXT_Z -.. dropdown:: Update tags for Go modules - :animate: fade-in-slide-down - :class-title: sd-fs-5 - :class-container: sd-shadow-md - - .. code-block:: Bash - - # dev/release/post-13-go.sh 10.0.0 - dev/release/post-13-go.sh X.Y.Z - .. dropdown:: Update docs :animate: fade-in-slide-down :class-title: sd-fs-5 diff --git a/docs/source/index.rst b/docs/source/index.rst index 6f38ab668d883..3e678c78b6963 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -104,7 +104,7 @@ Implementations C/GLib C++ C# - Go + Go Java JavaScript Julia diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index 10a233356b684..00bbafb55c5e3 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -76,7 +76,7 @@ home: [C GLib](https://arrow.apache.org/docs/c_glib)
[C++](https://arrow.apache.org/docs/cpp)
[C#](https://github.com/apache/arrow/blob/main/csharp/README.md)
- [Go](https://pkg.go.dev/github.com/apache/arrow/go/v18)
+ [Go](https://arrow.apache.org/go/)
[Java](https://arrow.apache.org/docs/java)
[JavaScript](https://arrow.apache.org/docs/js)
[Julia](https://github.com/apache/arrow-julia/blob/main/README.md)
From ac2a93d90f3b9af72163d609f339664a8dd697e8 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Thu, 3 Oct 2024 19:32:01 -0500 Subject: [PATCH 04/38] GH-44297: [Integration][CI] Skip nanoarrow IPC integration tests for compressed/dictionary-encoded files (#44298) ### Rationale for this change There are a few remaining failures when testing nanoarrow against itself: https://github.com/apache/arrow-nanoarrow/pull/643 . Our IPC reader doesn't support dictionaries or compression, so we can't run those tests. ### What changes are included in this PR? Skips were added to the archery code that runs the tests. ### Are these changes tested? Yes (integration tests run on every commit) ### Are there any user-facing changes? No! * GitHub Issue: #44297 Authored-by: Dewey Dunnington Signed-off-by: Sutou Kouhei --- dev/archery/archery/integration/runner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py index 97854b87b24bd..e72baaf878532 100644 --- a/dev/archery/archery/integration/runner.py +++ b/dev/archery/archery/integration/runner.py @@ -193,12 +193,16 @@ def _gold_tests(self, gold_dir): skip_testers.add("Rust") if prefix == '2.0.0-compression': skip_testers.add("JS") + # https://github.com/apache/arrow-nanoarrow/issues/621 + skip_testers.add("nanoarrow") # See https://github.com/apache/arrow/pull/9822 for how to # disable specific compression type tests. if prefix == '4.0.0-shareddict': skip_testers.add("C#") + # https://github.com/apache/arrow-nanoarrow/issues/622 + skip_testers.add("nanoarrow") quirks = set() if prefix in {'0.14.1', '0.17.1', From ad136d8988fa4560511b0d50cf8ee10f5c612024 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 4 Oct 2024 19:27:56 +0900 Subject: [PATCH 05/38] GH-44300: [Integration][Archery] Don't import unused testers (#44301) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Some testers such as `JavaTester` may raise an exception on import when the target implementation isn't built. It stops integration test unexpectedly. ### What changes are included in this PR? Import testers only for enabled implementations. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #44300 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- dev/archery/archery/integration/runner.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py index e72baaf878532..e276738846371 100644 --- a/dev/archery/archery/integration/runner.py +++ b/dev/archery/archery/integration/runner.py @@ -31,13 +31,6 @@ from . import cdata from .scenario import Scenario from .tester import Tester, CDataExporter, CDataImporter -from .tester_cpp import CppTester -from .tester_go import GoTester -from .tester_rust import RustTester -from .tester_java import JavaTester -from .tester_js import JSTester -from .tester_csharp import CSharpTester -from .tester_nanoarrow import NanoarrowTester from .util import guid, printer from .util import SKIP_C_ARRAY, SKIP_C_SCHEMA, SKIP_FLIGHT, SKIP_IPC from ..utils.logger import group as group_raw @@ -603,24 +596,31 @@ def append_tester(implementation, tester): other_testers.append(tester) if with_cpp: + from .tester_cpp import CppTester append_tester("cpp", CppTester(**kwargs)) if with_java: + from .tester_java import JavaTester append_tester("java", JavaTester(**kwargs)) if with_js: + from .tester_js import JSTester append_tester("js", JSTester(**kwargs)) if with_csharp: + from .tester_csharp import CSharpTester append_tester("csharp", CSharpTester(**kwargs)) if with_go: + from .tester_go import GoTester append_tester("go", GoTester(**kwargs)) if with_nanoarrow: + from .tester_nanoarrow import NanoarrowTester append_tester("nanoarrow", NanoarrowTester(**kwargs)) if with_rust: + from .tester_rust import RustTester append_tester("rust", RustTester(**kwargs)) static_json_files = get_static_json_files() From 7cc07681189e71ef986653129a6804a81d32ce8f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 4 Oct 2024 17:54:20 +0200 Subject: [PATCH 06/38] MINOR: [JS] Bump rollup from 4.19.2 to 4.22.4 in /js (#44207) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [rollup](https://github.com/rollup/rollup) from 4.19.2 to 4.22.4.
Release notes

Sourced from rollup's releases.

v4.22.4

4.22.4

2024-09-21

Bug Fixes

  • Fix a vulnerability in generated code that affects IIFE, UMD and CJS bundles when run in a browser context (#5671)

Pull Requests

v4.22.3

4.22.3

2024-09-21

Bug Fixes

  • Ensure that mutations in modules without side effects are observed while properly handling transitive dependencies (#5669)

Pull Requests

v4.22.2

4.22.2

2024-09-20

Bug Fixes

  • Revert fix for side effect free modules until other issues are investigated (#5667)

Pull Requests

v4.22.1

4.22.1

2024-09-20

Bug Fixes

  • Revert #5644 "stable chunk hashes" while issues are being investigated

Pull Requests

... (truncated)

Changelog

Sourced from rollup's changelog.

4.22.4

2024-09-21

Bug Fixes

  • Fix a vulnerability in generated code that affects IIFE, UMD and CJS bundles when run in a browser context (#5671)

Pull Requests

4.22.3

2024-09-21

Bug Fixes

  • Ensure that mutations in modules without side effects are observed while properly handling transitive dependencies (#5669)

Pull Requests

4.22.2

2024-09-20

Bug Fixes

  • Revert fix for side effect free modules until other issues are investigated (#5667)

Pull Requests

4.22.1

2024-09-20

Bug Fixes

  • Revert #5644 "stable chunk hashes" while issues are being investigated

Pull Requests

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=rollup&package-manager=npm_and_yarn&previous-version=4.19.2&new-version=4.22.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apache/arrow/network/alerts).
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Raúl Cumplido --- js/package.json | 2 +- js/yarn.lock | 198 ++++++++++++++++++++++++------------------------ 2 files changed, 100 insertions(+), 100 deletions(-) diff --git a/js/package.json b/js/package.json index d8a784b784d3c..643f049b6bfaf 100644 --- a/js/package.json +++ b/js/package.json @@ -102,7 +102,7 @@ "mkdirp": "3.0.1", "multistream": "4.1.0", "regenerator-runtime": "0.14.1", - "rollup": "4.19.2", + "rollup": "4.22.4", "rxjs": "7.8.1", "ts-jest": "29.1.4", "ts-node": "10.9.2", diff --git a/js/yarn.lock b/js/yarn.lock index e8223fba9aad2..e237d09469f4d 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1017,85 +1017,85 @@ estree-walker "^2.0.2" picomatch "^2.3.1" -"@rollup/rollup-android-arm-eabi@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.19.2.tgz#6b991cb44bf69e50163528ea85bed545330ba821" - integrity sha512-OHflWINKtoCFSpm/WmuQaWW4jeX+3Qt3XQDepkkiFTsoxFc5BpF3Z5aDxFZgBqRjO6ATP5+b1iilp4kGIZVWlA== - -"@rollup/rollup-android-arm64@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.19.2.tgz#5d3c8c2f9742d62ba258cc378bd2d4720f0c431c" - integrity sha512-k0OC/b14rNzMLDOE6QMBCjDRm3fQOHAL8Ldc9bxEWvMo4Ty9RY6rWmGetNTWhPo+/+FNd1lsQYRd0/1OSix36A== - -"@rollup/rollup-darwin-arm64@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.19.2.tgz#8eac8682a34a705bb6a57eb3e739fd6bbedfabed" - integrity sha512-IIARRgWCNWMTeQH+kr/gFTHJccKzwEaI0YSvtqkEBPj7AshElFq89TyreKNFAGh5frLfDCbodnq+Ye3dqGKPBw== - -"@rollup/rollup-darwin-x64@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.19.2.tgz#70a9953fc624bd7f645901f4250f6b5807ac7e92" - integrity sha512-52udDMFDv54BTAdnw+KXNF45QCvcJOcYGl3vQkp4vARyrcdI/cXH8VXTEv/8QWfd6Fru8QQuw1b2uNersXOL0g== - -"@rollup/rollup-linux-arm-gnueabihf@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.19.2.tgz#8f6c4ff4c4972413ff94345080380d4e3caa3c69" - integrity sha512-r+SI2t8srMPYZeoa1w0o/AfoVt9akI1ihgazGYPQGRilVAkuzMGiTtexNZkrPkQsyFrvqq/ni8f3zOnHw4hUbA== - -"@rollup/rollup-linux-arm-musleabihf@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.19.2.tgz#5d3c0fe5ea5ddf2feb511b3cb031df17eaa7e33d" - integrity sha512-+tYiL4QVjtI3KliKBGtUU7yhw0GMcJJuB9mLTCEauHEsqfk49gtUBXGtGP3h1LW8MbaTY6rSFIQV1XOBps1gBA== - -"@rollup/rollup-linux-arm64-gnu@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.19.2.tgz#b7f104388b2f5624d9f8adfff10ba59af8ab8ed1" - integrity sha512-OR5DcvZiYN75mXDNQQxlQPTv4D+uNCUsmSCSY2FolLf9W5I4DSoJyg7z9Ea3TjKfhPSGgMJiey1aWvlWuBzMtg== - -"@rollup/rollup-linux-arm64-musl@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.19.2.tgz#6d5ca6d3904309bec285ea5202d589cebb93dee4" - integrity sha512-Hw3jSfWdUSauEYFBSFIte6I8m6jOj+3vifLg8EU3lreWulAUpch4JBjDMtlKosrBzkr0kwKgL9iCfjA8L3geoA== - -"@rollup/rollup-linux-powerpc64le-gnu@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.19.2.tgz#4df9be1396ea9eb0ca99fd0f2e858008d7f063e3" - integrity sha512-rhjvoPBhBwVnJRq/+hi2Q3EMiVF538/o9dBuj9TVLclo9DuONqt5xfWSaE6MYiFKpo/lFPJ/iSI72rYWw5Hc7w== - -"@rollup/rollup-linux-riscv64-gnu@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.19.2.tgz#80d63c5562915a2f8616a04251fcaee0218112b0" - integrity sha512-EAz6vjPwHHs2qOCnpQkw4xs14XJq84I81sDRGPEjKPFVPBw7fwvtwhVjcZR6SLydCv8zNK8YGFblKWd/vRmP8g== - -"@rollup/rollup-linux-s390x-gnu@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.19.2.tgz#ef62e9bc5cc3b84fcfe96ec0a42d1989691217b3" - integrity sha512-IJSUX1xb8k/zN9j2I7B5Re6B0NNJDJ1+soezjNojhT8DEVeDNptq2jgycCOpRhyGj0+xBn7Cq+PK7Q+nd2hxLA== - -"@rollup/rollup-linux-x64-gnu@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.19.2.tgz#6a275282a0080fee98ddd9fda0de23c4c6bafd48" - integrity sha512-OgaToJ8jSxTpgGkZSkwKE+JQGihdcaqnyHEFOSAU45utQ+yLruE1dkonB2SDI8t375wOKgNn8pQvaWY9kPzxDQ== - -"@rollup/rollup-linux-x64-musl@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.19.2.tgz#64f0c704107e6b45b26dd8c2e1ff64246e4a1251" - integrity sha512-5V3mPpWkB066XZZBgSd1lwozBk7tmOkKtquyCJ6T4LN3mzKENXyBwWNQn8d0Ci81hvlBw5RoFgleVpL6aScLYg== - -"@rollup/rollup-win32-arm64-msvc@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.19.2.tgz#bada17b0c5017ff58d0feba401c43ff5a646c693" - integrity sha512-ayVstadfLeeXI9zUPiKRVT8qF55hm7hKa+0N1V6Vj+OTNFfKSoUxyZvzVvgtBxqSb5URQ8sK6fhwxr9/MLmxdA== - -"@rollup/rollup-win32-ia32-msvc@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.19.2.tgz#a716d862f6ac39d88bdb825e27f63aeb0387cd66" - integrity sha512-Mda7iG4fOLHNsPqjWSjANvNZYoW034yxgrndof0DwCy0D3FvTjeNo+HGE6oGWgvcLZNLlcp0hLEFcRs+UGsMLg== - -"@rollup/rollup-win32-x64-msvc@4.19.2": - version "4.19.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.19.2.tgz#d67206c5f2e4b2832ce360bbbde194e96d16dc51" - integrity sha512-DPi0ubYhSow/00YqmG1jWm3qt1F8aXziHc/UNy8bo9cpCacqhuWu+iSq/fp2SyEQK7iYTZ60fBU9cat3MXTjIQ== +"@rollup/rollup-android-arm-eabi@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.22.4.tgz#8b613b9725e8f9479d142970b106b6ae878610d5" + integrity sha512-Fxamp4aEZnfPOcGA8KSNEohV8hX7zVHOemC8jVBoBUHu5zpJK/Eu3uJwt6BMgy9fkvzxDaurgj96F/NiLukF2w== + +"@rollup/rollup-android-arm64@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.22.4.tgz#654ca1049189132ff602bfcf8df14c18da1f15fb" + integrity sha512-VXoK5UMrgECLYaMuGuVTOx5kcuap1Jm8g/M83RnCHBKOqvPPmROFJGQaZhGccnsFtfXQ3XYa4/jMCJvZnbJBdA== + +"@rollup/rollup-darwin-arm64@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.22.4.tgz#6d241d099d1518ef0c2205d96b3fa52e0fe1954b" + integrity sha512-xMM9ORBqu81jyMKCDP+SZDhnX2QEVQzTcC6G18KlTQEzWK8r/oNZtKuZaCcHhnsa6fEeOBionoyl5JsAbE/36Q== + +"@rollup/rollup-darwin-x64@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.22.4.tgz#42bd19d292a57ee11734c980c4650de26b457791" + integrity sha512-aJJyYKQwbHuhTUrjWjxEvGnNNBCnmpHDvrb8JFDbeSH3m2XdHcxDd3jthAzvmoI8w/kSjd2y0udT+4okADsZIw== + +"@rollup/rollup-linux-arm-gnueabihf@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.22.4.tgz#f23555ee3d8fe941c5c5fd458cd22b65eb1c2232" + integrity sha512-j63YtCIRAzbO+gC2L9dWXRh5BFetsv0j0va0Wi9epXDgU/XUi5dJKo4USTttVyK7fGw2nPWK0PbAvyliz50SCQ== + +"@rollup/rollup-linux-arm-musleabihf@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.22.4.tgz#f3bbd1ae2420f5539d40ac1fde2b38da67779baa" + integrity sha512-dJnWUgwWBX1YBRsuKKMOlXCzh2Wu1mlHzv20TpqEsfdZLb3WoJW2kIEsGwLkroYf24IrPAvOT/ZQ2OYMV6vlrg== + +"@rollup/rollup-linux-arm64-gnu@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.22.4.tgz#7abe900120113e08a1f90afb84c7c28774054d15" + integrity sha512-AdPRoNi3NKVLolCN/Sp4F4N1d98c4SBnHMKoLuiG6RXgoZ4sllseuGioszumnPGmPM2O7qaAX/IJdeDU8f26Aw== + +"@rollup/rollup-linux-arm64-musl@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.22.4.tgz#9e655285c8175cd44f57d6a1e8e5dedfbba1d820" + integrity sha512-Gl0AxBtDg8uoAn5CCqQDMqAx22Wx22pjDOjBdmG0VIWX3qUBHzYmOKh8KXHL4UpogfJ14G4wk16EQogF+v8hmA== + +"@rollup/rollup-linux-powerpc64le-gnu@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.22.4.tgz#9a79ae6c9e9d8fe83d49e2712ecf4302db5bef5e" + integrity sha512-3aVCK9xfWW1oGQpTsYJJPF6bfpWfhbRnhdlyhak2ZiyFLDaayz0EP5j9V1RVLAAxlmWKTDfS9wyRyY3hvhPoOg== + +"@rollup/rollup-linux-riscv64-gnu@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.22.4.tgz#67ac70eca4ace8e2942fabca95164e8874ab8128" + integrity sha512-ePYIir6VYnhgv2C5Xe9u+ico4t8sZWXschR6fMgoPUK31yQu7hTEJb7bCqivHECwIClJfKgE7zYsh1qTP3WHUA== + +"@rollup/rollup-linux-s390x-gnu@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.22.4.tgz#9f883a7440f51a22ed7f99e1d070bd84ea5005fc" + integrity sha512-GqFJ9wLlbB9daxhVlrTe61vJtEY99/xB3C8e4ULVsVfflcpmR6c8UZXjtkMA6FhNONhj2eA5Tk9uAVw5orEs4Q== + +"@rollup/rollup-linux-x64-gnu@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.22.4.tgz#70116ae6c577fe367f58559e2cffb5641a1dd9d0" + integrity sha512-87v0ol2sH9GE3cLQLNEy0K/R0pz1nvg76o8M5nhMR0+Q+BBGLnb35P0fVz4CQxHYXaAOhE8HhlkaZfsdUOlHwg== + +"@rollup/rollup-linux-x64-musl@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.22.4.tgz#f473f88219feb07b0b98b53a7923be716d1d182f" + integrity sha512-UV6FZMUgePDZrFjrNGIWzDo/vABebuXBhJEqrHxrGiU6HikPy0Z3LfdtciIttEUQfuDdCn8fqh7wiFJjCNwO+g== + +"@rollup/rollup-win32-arm64-msvc@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.22.4.tgz#4349482d17f5d1c58604d1c8900540d676f420e0" + integrity sha512-BjI+NVVEGAXjGWYHz/vv0pBqfGoUH0IGZ0cICTn7kB9PyjrATSkX+8WkguNjWoj2qSr1im/+tTGRaY+4/PdcQw== + +"@rollup/rollup-win32-ia32-msvc@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.22.4.tgz#a6fc39a15db618040ec3c2a24c1e26cb5f4d7422" + integrity sha512-SiWG/1TuUdPvYmzmYnmd3IEifzR61Tragkbx9D3+R8mzQqDBz8v+BvZNDlkiTtI9T15KYZhP0ehn3Dld4n9J5g== + +"@rollup/rollup-win32-x64-msvc@4.22.4": + version "4.22.4" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.22.4.tgz#3dd5d53e900df2a40841882c02e56f866c04d202" + integrity sha512-j8pPKp53/lq9lMXN57S8cFz0MynJk8OWNuUnXct/9KCpKU7DgU3bYMJhwWmcqC0UU29p8Lr0/7KEVcaM6bf47Q== "@rollup/stream@3.0.1": version "3.0.1" @@ -6179,29 +6179,29 @@ rimraf@^3.0.2: dependencies: glob "^7.1.3" -rollup@4.19.2: - version "4.19.2" - resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.19.2.tgz#4985cd2028965157e8d674a70e49f33aca9038eb" - integrity sha512-6/jgnN1svF9PjNYJ4ya3l+cqutg49vOZ4rVgsDKxdl+5gpGPnByFXWGyfH9YGx9i3nfBwSu1Iyu6vGwFFA0BdQ== +rollup@4.22.4: + version "4.22.4" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.22.4.tgz#4135a6446671cd2a2453e1ad42a45d5973ec3a0f" + integrity sha512-vD8HJ5raRcWOyymsR6Z3o6+RzfEPCnVLMFJ6vRslO1jt4LO6dUo5Qnpg7y4RkZFM2DMe3WUirkI5c16onjrc6A== dependencies: "@types/estree" "1.0.5" optionalDependencies: - "@rollup/rollup-android-arm-eabi" "4.19.2" - "@rollup/rollup-android-arm64" "4.19.2" - "@rollup/rollup-darwin-arm64" "4.19.2" - "@rollup/rollup-darwin-x64" "4.19.2" - "@rollup/rollup-linux-arm-gnueabihf" "4.19.2" - "@rollup/rollup-linux-arm-musleabihf" "4.19.2" - "@rollup/rollup-linux-arm64-gnu" "4.19.2" - "@rollup/rollup-linux-arm64-musl" "4.19.2" - "@rollup/rollup-linux-powerpc64le-gnu" "4.19.2" - "@rollup/rollup-linux-riscv64-gnu" "4.19.2" - "@rollup/rollup-linux-s390x-gnu" "4.19.2" - "@rollup/rollup-linux-x64-gnu" "4.19.2" - "@rollup/rollup-linux-x64-musl" "4.19.2" - "@rollup/rollup-win32-arm64-msvc" "4.19.2" - "@rollup/rollup-win32-ia32-msvc" "4.19.2" - "@rollup/rollup-win32-x64-msvc" "4.19.2" + "@rollup/rollup-android-arm-eabi" "4.22.4" + "@rollup/rollup-android-arm64" "4.22.4" + "@rollup/rollup-darwin-arm64" "4.22.4" + "@rollup/rollup-darwin-x64" "4.22.4" + "@rollup/rollup-linux-arm-gnueabihf" "4.22.4" + "@rollup/rollup-linux-arm-musleabihf" "4.22.4" + "@rollup/rollup-linux-arm64-gnu" "4.22.4" + "@rollup/rollup-linux-arm64-musl" "4.22.4" + "@rollup/rollup-linux-powerpc64le-gnu" "4.22.4" + "@rollup/rollup-linux-riscv64-gnu" "4.22.4" + "@rollup/rollup-linux-s390x-gnu" "4.22.4" + "@rollup/rollup-linux-x64-gnu" "4.22.4" + "@rollup/rollup-linux-x64-musl" "4.22.4" + "@rollup/rollup-win32-arm64-msvc" "4.22.4" + "@rollup/rollup-win32-ia32-msvc" "4.22.4" + "@rollup/rollup-win32-x64-msvc" "4.22.4" fsevents "~2.3.2" run-parallel@^1.1.9: From 7817e3c8fb624ee5479827a25497bd61cc96d63b Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Sat, 5 Oct 2024 05:31:46 +0800 Subject: [PATCH 07/38] MINOR: [Docs] Refine docs for some compute functions (#44305) ### Rationale for this change Some trivial inaccurate and missing part in compute functions. ### What changes are included in this PR? Refined the inaccurate and missing part. ### Are these changes tested? No need. ### Are there any user-facing changes? None. Authored-by: Ruoxi Sun Signed-off-by: Sutou Kouhei --- docs/source/cpp/compute.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 4131bbdf6f912..093b160d8e9a0 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1707,7 +1707,7 @@ These functions select and return a subset of their input. +---------------+--------+--------------+--------------+--------------+-------------------------+-----------+ | array_take | Binary | Any | Integer | Input type 1 | :struct:`TakeOptions` | \(3) | +---------------+--------+--------------+--------------+--------------+-------------------------+-----------+ -| drop_null | Unary | Any | - | Input type 1 | | \(1) | +| drop_null | Unary | Any | | Input type 1 | | \(1) | +---------------+--------+--------------+--------------+--------------+-------------------------+-----------+ | filter | Binary | Any | Boolean | Input type 1 | :struct:`FilterOptions` | \(2) | +---------------+--------+--------------+--------------+--------------+-------------------------+-----------+ @@ -1851,15 +1851,18 @@ Structural transforms index *n* and the type code at index *n* is 2. * The indices ``2`` and ``7`` are invalid. +Replace functions +~~~~~~~~~~~~~~~~~ + These functions create a copy of the first input with some elements replaced, based on the remaining inputs. +--------------------------+------------+-----------------------+--------------+--------------+--------------+-------+ | Function name | Arity | Input type 1 | Input type 2 | Input type 3 | Output type | Notes | +==========================+============+=======================+==============+==============+==============+=======+ -| fill_null_backward | Unary | Fixed-width or binary | N/A | N/A | N/A | \(1) | +| fill_null_backward | Unary | Fixed-width or binary | | | Input type 1 | \(1) | +--------------------------+------------+-----------------------+--------------+--------------+--------------+-------+ -| fill_null_forward | Unary | Fixed-width or binary | N/A | N/A | N/A | \(1) | +| fill_null_forward | Unary | Fixed-width or binary | | | Input type 1 | \(1) | +--------------------------+------------+-----------------------+--------------+--------------+--------------+-------+ | replace_with_mask | Ternary | Fixed-width or binary | Boolean | Input type 1 | Input type 1 | \(2) | +--------------------------+------------+-----------------------+--------------+--------------+--------------+-------+ @@ -1872,7 +1875,7 @@ replaced, based on the remaining inputs. Also see: :ref:`if_else `. Pairwise functions -~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~ Pairwise functions are unary vector functions that perform a binary operation on a pair of elements in the input array, typically on adjacent elements. The n-th output is computed by applying the binary operation to the n-th and (n-p)-th inputs, From 3fb7777a9a656e8bcc6f59396c4a5f6024a5f78e Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Fri, 4 Oct 2024 22:41:23 +0100 Subject: [PATCH 08/38] GH-44256: [C++][FS][Azure] Fix edgecase where GetFileInfo incorrectly returns NotFound on flat namespace and Azurite (#44302) ### Rationale for this change Fix a bug where `GetFileInfo` incorrectly returns `FileType::NotFound` on flat namespace and Azurite. ### What changes are included in this PR? Fix by detecting the exact edgecase and doing an extra listing operation to disambiguate. ### Are these changes tested? Yes, updated automated test ### Are there any user-facing changes? Only a bug fix. * GitHub Issue: #44256 Authored-by: Thomas Newton Signed-off-by: Sutou Kouhei --- cpp/src/arrow/filesystem/azurefs.cc | 21 +++++++++++++++++++++ cpp/src/arrow/filesystem/azurefs_test.cc | 14 ++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index a9f58c4e00c31..d9a69800bb87e 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -1793,6 +1793,8 @@ class AzureFileSystem::Impl { // BlobPrefixes. A BlobPrefix always ends with kDelimiter ("/"), so we can // distinguish between a directory and a file by checking if we received a // prefix or a blob. + // This strategy allows us to implement GetFileInfo with just 1 blob storage + // operation in almost every case. if (!list_response.BlobPrefixes.empty()) { // Ensure the returned BlobPrefixes[0] string doesn't contain more characters than // the requested Prefix. For instance, if we request with Prefix="dir/abra" and @@ -1814,6 +1816,25 @@ class AzureFileSystem::Impl { info.set_mtime( std::chrono::system_clock::time_point{blob.Details.LastModified}); return info; + } else if (blob.Name[options.Prefix.Value().length()] < internal::kSep) { + // First list result did not indicate a directory and there is definitely no + // exactly matching blob. However, there may still be a directory that we + // initially missed because the first list result came before + // `options.Prefix + internal::kSep` lexigraphically. + // For example the flat namespace storage account has the following blobs: + // - container/dir.txt + // - container/dir/file.txt + // GetFileInfo(container/dir) should return FileType::Directory but in this + // edge case `blob = "dir.txt"`, so without further checks we would incorrectly + // return FileType::NotFound. + // Therefore we make an extra list operation with the trailing slash to confirm + // whether the path is a directory. + options.Prefix = internal::EnsureTrailingSlash(location.path); + auto list_with_trailing_slash_response = container_client.ListBlobs(options); + if (!list_with_trailing_slash_response.Blobs.empty()) { + info.set_type(FileType::Directory); + return info; + } } } info.set_type(FileType::NotFound); diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 494c2f7e0c9e4..3697c3bcc319d 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -2065,6 +2065,20 @@ void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() { ASSERT_OK(output->Write(lorem_ipsum)); ASSERT_OK(output->Close()); + // . is immediately before "/" lexicographically, ensure that this doesn't + // cause unexpected issues. NOTE: Its seems real Azure blob storage doesn't + // allow blob names to end in `.` + ASSERT_OK_AND_ASSIGN(output, fs()->OpenOutputStream( + data.ContainerPath("test-object-dir/some_other_dir.a"), + /*metadata=*/{})); + ASSERT_OK(output->Write(lorem_ipsum)); + ASSERT_OK(output->Close()); + ASSERT_OK_AND_ASSIGN(output, + fs()->OpenOutputStream(data.ContainerPath(kObjectName + ".a"), + /*metadata=*/{})); + ASSERT_OK(output->Write(lorem_ipsum)); + ASSERT_OK(output->Close()); + AssertFileInfo(fs(), data.ContainerPath(kObjectName), FileType::File); AssertFileInfo(fs(), data.ContainerPath(kObjectName) + "/", FileType::NotFound); AssertFileInfo(fs(), data.ContainerPath("test-object-dir"), FileType::Directory); From 993a27c67cf95c6b6fe4b09aec62b00fe4c9988a Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Sat, 5 Oct 2024 22:31:12 +0100 Subject: [PATCH 09/38] GH-44303: [C++][FS][Azure] Fix minor hierarchical namespace bugs (#44307) ### Rationale for this change There are a couple of minor bugs in the `AzureFileSystem` for hierarchical namespaces accounts. These cause failures in `TestAzureHierarchicalNSGeneric.GetFileInfoSelectorWithRecursion` and `TestAzureHierarchicalNSGeneric.Empty` which do not run automatically in CI. ### What changes are included in this PR? - Fix incorrectly returning Not found on recursive get file info on container root. - Implement `selector.max_recursion` for hierarchical namespace. This is implemented completely artificially after `directory_client.ListPaths(/*recursive=*/true)`. - Enable a couple of features on the generic tests that were disabled but are actually supported. ### Are these changes tested? There already failing tests for these but they don't run on CI because they require connect to a real Azure blob storage account. I made sure to run all the tests locally including the ones that connect to real Azure storage, both flat and hierarchical and all the tests passed. ### Are there any user-facing changes? * GitHub Issue: #44303 Lead-authored-by: Thomas Newton Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- cpp/src/arrow/filesystem/azurefs.cc | 18 +++++++++++++++--- cpp/src/arrow/filesystem/azurefs_test.cc | 18 +++++++++++++----- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index d9a69800bb87e..78f4ad1edd9a9 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -1916,18 +1916,22 @@ class AzureFileSystem::Impl { /// \brief List the paths at the root of a filesystem or some dir in a filesystem. /// /// \pre adlfs_client is the client for the filesystem named like the first - /// segment of select.base_dir. + /// segment of select.base_dir. The filesystem is know to exist. Status GetFileInfoWithSelectorFromFileSystem( const DataLake::DataLakeFileSystemClient& adlfs_client, const Core::Context& context, Azure::Nullable page_size_hint, const FileSelector& select, FileInfoVector* acc_results) { ARROW_ASSIGN_OR_RAISE(auto base_location, AzureLocation::FromString(select.base_dir)); + // The filesystem a.k.a. the container is known to exist so if the path is empty then + // we have already found the base_location, so initialize found to true. + bool found = base_location.path.empty(); + auto directory_client = adlfs_client.GetDirectoryClient(base_location.path); - bool found = false; DataLake::ListPathsOptions options; options.PageSizeHint = page_size_hint; + auto base_path_depth = internal::GetAbstractPathDepth(base_location.path); try { auto list_response = directory_client.ListPaths(select.recursive, options, context); for (; list_response.HasPage(); list_response.MoveToNextPage(context)) { @@ -1939,7 +1943,15 @@ class AzureFileSystem::Impl { if (path.Name == base_location.path && !path.IsDirectory) { return NotADir(base_location); } - acc_results->push_back(FileInfoFromPath(base_location.container, path)); + // Subtract 1 because with `max_recursion=0` we want to list the base path, + // which will produce results with depth 1 greater that the base path's depth. + // NOTE: `select.max_recursion` + anything will cause integer overflows because + // `select.max_recursion` defaults to `INT32_MAX`. Therefore, options to + // rewrite this condition in a more readable way are limited. + if (internal::GetAbstractPathDepth(path.Name) - base_path_depth - 1 <= + select.max_recursion) { + acc_results->push_back(FileInfoFromPath(base_location.container, path)); + } } } } catch (const Storage::StorageException& exception) { diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 3697c3bcc319d..242c2c29505ac 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -364,9 +364,9 @@ class TestGeneric : public ::testing::Test, public GenericFileSystemTest { std::shared_ptr GetEmptyFileSystem() override { return fs_; } bool have_implicit_directories() const override { return true; } - bool allow_write_file_over_dir() const override { return true; } - bool allow_read_dir_as_file() const override { return true; } - bool allow_move_dir() const override { return false; } + bool allow_write_file_over_dir() const override { return false; } + bool allow_read_dir_as_file() const override { return false; } + bool allow_move_dir() const override { return true; } bool allow_move_file() const override { return true; } bool allow_append_to_file() const override { return true; } bool have_directory_mtimes() const override { return true; } @@ -404,7 +404,11 @@ class TestAzuriteGeneric : public TestGeneric { } protected: - // Azurite doesn't support moving files over containers. + // Azurite doesn't block writing files over directories. + bool allow_write_file_over_dir() const override { return true; } + // Azurite doesn't support moving directories. + bool allow_move_dir() const override { return false; } + // Azurite doesn't support moving files. bool allow_move_file() const override { return false; } // Azurite doesn't support directory mtime. bool have_directory_mtimes() const override { return false; } @@ -426,7 +430,11 @@ class TestAzureFlatNSGeneric : public TestGeneric { } protected: - // Flat namespace account doesn't support moving files over containers. + // Flat namespace account doesn't block writing files over directories. + bool allow_write_file_over_dir() const override { return true; } + // Flat namespace account doesn't support moving directories. + bool allow_move_dir() const override { return false; } + // Flat namespace account doesn't support moving files. bool allow_move_file() const override { return false; } // Flat namespace account doesn't support directory mtime. bool have_directory_mtimes() const override { return false; } From 596c3cfc251d1d0905d1e25c50c991b8469edeee Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Sun, 6 Oct 2024 17:23:43 -0700 Subject: [PATCH 10/38] GH-44290: [Java][Flight] Add ActionType description getter (#44291) * GitHub Issue: #44290 Authored-by: Devin Smith Signed-off-by: David Li --- .../org/apache/arrow/flight/ActionType.java | 4 +++ .../apache/arrow/flight/TestActionType.java | 32 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestActionType.java diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ActionType.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ActionType.java index 669bf00adbd92..46ef3ae5ca104 100644 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ActionType.java +++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ActionType.java @@ -45,6 +45,10 @@ public String getType() { return type; } + public String getDescription() { + return description; + } + /** Converts the POJO to the corresponding protocol buffer type. */ Flight.ActionType toProtocol() { return Flight.ActionType.newBuilder().setType(type).setDescription(description).build(); diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestActionType.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestActionType.java new file mode 100644 index 0000000000000..03a7835d55ce9 --- /dev/null +++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestActionType.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.flight; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.Test; + +public class TestActionType { + @Test + void testActionType() { + final String type = "MyType"; + final String description = "My Description"; + final ActionType actionType = new ActionType(type, description); + assertEquals(type, actionType.getType()); + assertEquals(description, actionType.getDescription()); + } +} From 81bb362657896c668d9f9d7e6150ba60507238dc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 10:18:53 +0900 Subject: [PATCH 11/38] MINOR: [Java] Bump com.fasterxml.jackson:jackson-bom from 2.17.2 to 2.18.0 in /java (#44262) Bumps [com.fasterxml.jackson:jackson-bom](https://github.com/FasterXML/jackson-bom) from 2.17.2 to 2.18.0.
Commits
  • 7236550 [maven-release-plugin] prepare release jackson-bom-2.18.0
  • 58c2791 Prepare for 2.18.0 release
  • 3775318 Merge pull request #73 from FasterXML/dependabot/github_actions/github-action...
  • 540b7e7 Bump actions/setup-java from 4.2.1 to 4.2.2 in the github-actions group
  • 6cc8c64 Back to snapshot deps
  • 3c90e0e [maven-release-plugin] prepare for next development iteration
  • 7b9cf20 [maven-release-plugin] prepare release jackson-bom-2.18.0-rc1
  • f2d90ac Prepare for 2.18.0-rc1 release
  • e910a1e Update deps to 2.18.0-rc1-SNAPSHOT
  • b35d27c Merge branch '2.17' into 2.18
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.fasterxml.jackson:jackson-bom&package-manager=maven&previous-version=2.17.2&new-version=2.18.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index 1d67476bfa709..a95c399192e67 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -99,7 +99,7 @@ under the License. 4.1.113.Final 1.65.0 3.25.4 - 2.17.2 + 2.18.0 3.4.0 24.3.25 1.12.0 From f5663dc32ee863d0f15970ad328f7c7aa39c6f43 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 10:20:10 +0900 Subject: [PATCH 12/38] MINOR: [Java] Bump dep.junit.jupiter.version from 5.11.0 to 5.11.1 in /java (#44261) Bumps `dep.junit.jupiter.version` from 5.11.0 to 5.11.1. Updates `org.junit.jupiter:junit-jupiter-engine` from 5.11.0 to 5.11.1
Release notes

Sourced from org.junit.jupiter:junit-jupiter-engine's releases.

JUnit 5.11.1 = Platform 1.11.1 + Jupiter 5.11.1 + Vintage 5.11.1

See Release Notes.

Full Changelog: https://github.com/junit-team/junit5/compare/r5.11.0...r5.11.1

Commits

Updates `org.junit.jupiter:junit-jupiter-api` from 5.11.0 to 5.11.1
Release notes

Sourced from org.junit.jupiter:junit-jupiter-api's releases.

JUnit 5.11.1 = Platform 1.11.1 + Jupiter 5.11.1 + Vintage 5.11.1

See Release Notes.

Full Changelog: https://github.com/junit-team/junit5/compare/r5.11.0...r5.11.1

Commits

Updates `org.junit.jupiter:junit-jupiter-params` from 5.11.0 to 5.11.1
Release notes

Sourced from org.junit.jupiter:junit-jupiter-params's releases.

JUnit 5.11.1 = Platform 1.11.1 + Jupiter 5.11.1 + Vintage 5.11.1

See Release Notes.

Full Changelog: https://github.com/junit-team/junit5/compare/r5.11.0...r5.11.1

Commits

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index a95c399192e67..e71ae2f6b98db 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -93,7 +93,7 @@ under the License. ${project.build.directory}/generated-sources 1.9.0 - 5.11.0 + 5.11.1 2.0.16 33.3.0-jre 4.1.113.Final From f8333cfa5fc56c105443ecb1a722bb318089a6d4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 10:28:10 +0900 Subject: [PATCH 13/38] MINOR: [Java] Bump com.google.guava:guava-bom from 33.3.0-jre to 33.3.1-jre in /java (#44264) Bumps [com.google.guava:guava-bom](https://github.com/google/guava) from 33.3.0-jre to 33.3.1-jre.
Release notes

Sourced from com.google.guava:guava-bom's releases.

33.3.1

Maven

<dependency>
  <groupId>com.google.guava</groupId>
  <artifactId>guava</artifactId>
  <version>33.3.1-jre</version>
  <!-- or, for Android: -->
  <version>33.3.1-android</version>
</dependency>

Jar files

Guava requires one runtime dependency, which you can download here:

Javadoc

JDiff

Changelog

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.google.guava:guava-bom&package-manager=maven&previous-version=33.3.0-jre&new-version=33.3.1-jre)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index e71ae2f6b98db..43a737d1c1bad 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -95,7 +95,7 @@ under the License. 1.9.0 5.11.1 2.0.16 - 33.3.0-jre + 33.3.1-jre 4.1.113.Final 1.65.0 3.25.4 From 36e0d11739f5c1cc0ec3023957c103574094ca85 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 10:29:59 +0900 Subject: [PATCH 14/38] MINOR: [Java] Bump com.google.api.grpc:proto-google-common-protos from 2.44.0 to 2.45.1 in /java (#44263) Bumps [com.google.api.grpc:proto-google-common-protos](https://github.com/googleapis/sdk-platform-java) from 2.44.0 to 2.45.1.
Release notes

Sourced from com.google.api.grpc:proto-google-common-protos's releases.

v2.45.0

2.45.0 (2024-09-09)

Features

  • add Batcher#close(timeout) and Batcher#cancelOutstanding (#3141) (b5a92e4)
  • add full RetrySettings sample code to Settings classes (#3056) (8fe3a2d)
  • add toString to futures returned by operations (#3140) (afecb8c)
  • bake gapic-generator-java into the hermetic build docker image (#3067) (a372e82)

Bug Fixes

  • gax: prevent truncation/overflow when converting time values (#3095) (699074e)

Dependencies

  • add opentelemetry exporter-metrics and shared-resoucemapping to shared dependencies (#3078) (fc8d80d)
  • update dependency certifi to v2024.8.30 (#3150) (c18b705)
  • update dependency com.google.api-client:google-api-client-bom to v2.7.0 (#3151) (5f43e43)
  • update dependency com.google.errorprone:error_prone_annotations to v2.31.0 (#3153) (3071509)
  • update dependency com.google.errorprone:error_prone_annotations to v2.31.0 (#3154) (335ee63)
  • update dependency com.google.guava:guava to v33.3.0-jre (#3119) (41174b0)
  • update dependency dev.cel:cel to v0.7.1 (#3155) (b1ddd16)
  • update dependency filelock to v3.16.0 (#3175) (6681113)
  • update dependency idna to v3.8 (#3156) (82f5326)
  • update dependency io.netty:netty-tcnative-boringssl-static to v2.0.66.final (#3148) (a7efaa8)
  • update dependency net.bytebuddy:byte-buddy to v1.15.1 (#3115) (0e06c5f)
  • update dependency org.apache.commons:commons-lang3 to v3.17.0 (#3157) (8d3b9fd)
  • update dependency org.checkerframework:checker-qual to v3.47.0 (#3166) (365674d)
  • update dependency org.yaml:snakeyaml to v2.3 (#3158) (e67ea9a)
  • update dependency platformdirs to v4.3.2 (#3176) (4f2f9e0)
  • update dependency virtualenv to v20.26.4 (#3177) (080e078)
  • update google api dependencies (#3118) (67342ea)
  • update google auth library dependencies to v1.25.0 (#3168) (715884a)
  • update google http client dependencies to v1.45.0 (#3159) (a3fe612)
  • update googleapis/java-cloud-bom digest to 6626f91 (#3147) (658e40e)
  • update junit5 monorepo to v5.11.0 (#3111) (6bf84c8)
  • update netty dependencies to v4.1.113.final (#3165) (9b5957d)
  • update opentelemetry-java monorepo to v1.42.0 (#3172) (413c44e)

Documentation

Changelog

Sourced from com.google.api.grpc:proto-google-common-protos's changelog.

Changelog

2.46.1 (2024-09-25)

Dependencies

2.46.0 (2024-09-23)

Features

  • expose property in GrpcTransportChannel if it uses direct path. (#3170) (9a432f7)
  • generate a GAPIC library from api definition (#3208) (b6b5d7b)
  • Metrics tracer addAttribute map overload (#3202) (1a988df)

Bug Fixes

  • generate pr description with repo level change (#3182) (edd2168)

Dependencies

  • update dependency com.google.errorprone:error_prone_annotations to v2.32.0 (#3192) (b280706)
  • update dependency com.google.errorprone:error_prone_annotations to v2.32.0 (#3193) (ed0cd17)
  • update dependency filelock to v3.16.1 (#3210) (703ac3d)
  • update dependency idna to v3.10 (#3201) (211c3ec)
  • update dependency org.threeten:threetenbp to v1.7.0 (#3205) (c88a722)
  • update dependency org.threeten:threetenbp to v1.7.0 (#3206) (3e9fbac)
  • update dependency platformdirs to v4.3.3 (#3200) (b62b05d)
  • update dependency platformdirs to v4.3.6 (#3209) (227ffa5)
  • update dependency urllib3 to v2.2.3 (#3194) (f69d511)
  • update dependency virtualenv to v20.26.5 (#3212) (d3ef97a)
  • update google api dependencies (#3183) (02eea8d)
  • update google auth library dependencies to v1.26.0 (#3216) (0b369e9)
  • update google auth library dependencies to v1.27.0 (#3221) (a3cb9e7)
  • update googleapis/java-cloud-bom digest to 06f632d (#3198) (49dcd35)
  • update googleapis/java-cloud-bom digest to e7d8909 (#3207) (de497ee)
  • update opentelemetry-java monorepo to v1.42.1 (#3189) (38117d8)

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.google.api.grpc:proto-google-common-protos&package-manager=maven&previous-version=2.44.0&new-version=2.45.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/flight/flight-core/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index a9724289c2252..c2427272eaa89 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -134,7 +134,7 @@ under the License. com.google.api.grpc proto-google-common-protos - 2.44.0 + 2.45.1 test From ab95a4d25142ff5723117c9d3a1c6453a6640cf6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 10:30:41 +0900 Subject: [PATCH 15/38] MINOR: [Java] Bump org.cyclonedx:cyclonedx-maven-plugin from 2.8.1 to 2.8.2 in /java (#44267) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [org.cyclonedx:cyclonedx-maven-plugin](https://github.com/CycloneDX/cyclonedx-maven-plugin) from 2.8.1 to 2.8.2.
Release notes

Sourced from org.cyclonedx:cyclonedx-maven-plugin's releases.

2.8.2

🐛 Bug Fixes

📦 Dependency updates

Commits
  • a3f468f [maven-release-plugin] prepare release cyclonedx-maven-plugin-2.8.2
  • 85e3bd8 Bump plugin-tools.version from 3.13.1 to 3.15.0
  • 314424d Bump org.apache.maven.plugins:maven-project-info-reports-plugin
  • 41808a0 display configured classifier from #506
  • be1b73a Bump org.apache.commons:commons-lang3 from 3.14.0 to 3.17.0
  • bb865c4 Bump commons-codec:commons-codec from 1.17.0 to 1.17.1
  • cb74626 [maven-release-plugin] prepare for next development iteration
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.cyclonedx:cyclonedx-maven-plugin&package-manager=maven&previous-version=2.8.1&new-version=2.8.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index 43a737d1c1bad..4fb81c722c116 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -530,7 +530,7 @@ under the License. org.cyclonedx cyclonedx-maven-plugin - 2.8.1 + 2.8.2 org.apache.drill.tools From e62fbaafd129931b1c217fcaa1b4c254087ab289 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 7 Oct 2024 11:13:49 +0200 Subject: [PATCH 16/38] GH-43879: [Go] Remove go related code (#44293) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Go code has been mode to its own repository at https://github.com/apache/arrow-go ### What changes are included in this PR? Removing the folder `go/` from the https://github.com/apache/arrow repository ### Are these changes tested? I'll validate on CI there's no failures ### Are there any user-facing changes? The Go code is no longer present on the Arrow repository. * GitHub Issue: #43879 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- go/LICENSE.txt | 1791 - go/README.md | 147 - go/arrow/.editorconfig | 21 - go/arrow/.gitignore | 35 - go/arrow/Gopkg.lock | 44 - go/arrow/Gopkg.toml | 23 - go/arrow/Makefile | 54 - go/arrow/_examples/helloworld/main.go | 52 - go/arrow/_tools/tmpl/main.go | 268 - go/arrow/_tools/tmpl/main_test.go | 73 - go/arrow/_tools/tools.go | 25 - go/arrow/array.go | 129 - go/arrow/array/array.go | 186 - go/arrow/array/array_test.go | 346 - go/arrow/array/binary.go | 453 - go/arrow/array/binary_test.go | 726 - go/arrow/array/binarybuilder.go | 704 - go/arrow/array/binarybuilder_test.go | 151 - go/arrow/array/boolean.go | 126 - go/arrow/array/boolean_test.go | 322 - go/arrow/array/booleanbuilder.go | 263 - go/arrow/array/booleanbuilder_test.go | 103 - go/arrow/array/bufferbuilder.go | 261 - go/arrow/array/bufferbuilder_byte.go | 30 - go/arrow/array/bufferbuilder_numeric.gen.go | 124 - .../array/bufferbuilder_numeric.gen.go.tmpl | 61 - go/arrow/array/bufferbuilder_numeric_test.go | 106 - go/arrow/array/builder.go | 374 - go/arrow/array/builder_test.go | 123 - go/arrow/array/compare.go | 854 - go/arrow/array/compare_test.go | 728 - go/arrow/array/concat.go | 933 - go/arrow/array/concat_test.go | 789 - go/arrow/array/data.go | 277 - go/arrow/array/data_test.go | 138 - go/arrow/array/decimal128.go | 368 - go/arrow/array/decimal128_test.go | 283 - go/arrow/array/decimal256.go | 368 - go/arrow/array/decimal256_test.go | 293 - go/arrow/array/decimal_test.go | 222 - go/arrow/array/dictionary.go | 1958 - go/arrow/array/dictionary_test.go | 1918 - go/arrow/array/diff.go | 315 - go/arrow/array/diff_test.go | 878 - go/arrow/array/doc.go | 20 - go/arrow/array/encoded.go | 520 - go/arrow/array/encoded_test.go | 459 - go/arrow/array/extension.go | 244 - go/arrow/array/extension_builder.go | 25 - go/arrow/array/extension_test.go | 86 - go/arrow/array/fixed_size_list.go | 385 - go/arrow/array/fixed_size_list_test.go | 257 - go/arrow/array/fixedsize_binary.go | 123 - go/arrow/array/fixedsize_binary_test.go | 189 - go/arrow/array/fixedsize_binarybuilder.go | 261 - .../array/fixedsize_binarybuilder_test.go | 107 - go/arrow/array/float16.go | 123 - go/arrow/array/float16_builder.go | 263 - go/arrow/array/float16_builder_test.go | 156 - go/arrow/array/interval.go | 953 - go/arrow/array/interval_test.go | 524 - go/arrow/array/json_reader.go | 205 - go/arrow/array/json_reader_test.go | 141 - go/arrow/array/list.go | 1574 - go/arrow/array/list_test.go | 864 - go/arrow/array/map.go | 361 - go/arrow/array/map_test.go | 254 - go/arrow/array/null.go | 218 - go/arrow/array/null_test.go | 110 - go/arrow/array/numeric.gen.go | 1452 - go/arrow/array/numeric.gen.go.tmpl | 192 - go/arrow/array/numeric_test.go | 779 - go/arrow/array/numericbuilder.gen.go | 3664 - go/arrow/array/numericbuilder.gen.go.tmpl | 447 - go/arrow/array/numericbuilder.gen_test.go | 3125 - .../array/numericbuilder.gen_test.go.tmpl | 299 - go/arrow/array/record.go | 411 - go/arrow/array/record_test.go | 787 - go/arrow/array/string.go | 718 - go/arrow/array/string_test.go | 794 - go/arrow/array/struct.go | 491 - go/arrow/array/struct_test.go | 532 - go/arrow/array/table.go | 421 - go/arrow/array/table_test.go | 833 - go/arrow/array/timestamp.go | 380 - go/arrow/array/timestamp_test.go | 300 - go/arrow/array/union.go | 1370 - go/arrow/array/union_test.go | 1117 - go/arrow/array/util.go | 523 - go/arrow/array/util_test.go | 545 - go/arrow/arrio/arrio.go | 92 - go/arrow/arrio/arrio_test.go | 197 - go/arrow/avro/avro2parquet/main.go | 119 - go/arrow/avro/loader.go | 85 - go/arrow/avro/reader.go | 337 - go/arrow/avro/reader_test.go | 364 - go/arrow/avro/reader_types.go | 875 - go/arrow/avro/schema.go | 423 - go/arrow/avro/schema_test.go | 362 - go/arrow/avro/testdata/arrayrecordmap.avro | Bin 582 -> 0 bytes .../avro/testdata/githubsamplecommits.avro | Bin 95131 -> 0 bytes go/arrow/bitutil/Makefile | 62 - go/arrow/bitutil/_lib/bitmap_ops.c | 46 - go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s | 410 - go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s | 530 - go/arrow/bitutil/bitmap_ops.go | 109 - go/arrow/bitutil/bitmap_ops_amd64.go | 41 - go/arrow/bitutil/bitmap_ops_arm64.go | 27 - go/arrow/bitutil/bitmap_ops_avx2_amd64.go | 52 - go/arrow/bitutil/bitmap_ops_avx2_amd64.s | 373 - go/arrow/bitutil/bitmap_ops_noasm.go | 27 - go/arrow/bitutil/bitmap_ops_ppc64le.go | 27 - go/arrow/bitutil/bitmap_ops_s390x.go | 27 - go/arrow/bitutil/bitmap_ops_sse4_amd64.go | 52 - go/arrow/bitutil/bitmap_ops_sse4_amd64.s | 501 - go/arrow/bitutil/bitmaps.go | 747 - go/arrow/bitutil/bitmaps_test.go | 580 - go/arrow/bitutil/bitutil.go | 186 - go/arrow/bitutil/bitutil_bytes.go | 37 - go/arrow/bitutil/bitutil_test.go | 320 - go/arrow/bitutil/endian_default.go | 34 - go/arrow/bitutil/endian_s390x.go | 32 - go/arrow/cdata/arrow/c/abi.h | 111 - go/arrow/cdata/arrow/c/helpers.h | 117 - go/arrow/cdata/cdata.go | 1028 - go/arrow/cdata/cdata_allocate.go | 57 - go/arrow/cdata/cdata_exports.go | 480 - go/arrow/cdata/cdata_fulltest.c | 494 - go/arrow/cdata/cdata_test.go | 1027 - go/arrow/cdata/cdata_test_framework.go | 451 - go/arrow/cdata/exports.go | 157 - go/arrow/cdata/import_allocator.go | 58 - go/arrow/cdata/interface.go | 284 - go/arrow/cdata/test/test_cimport.go | 178 - go/arrow/cdata/test/test_export_to_cgo.py | 230 - go/arrow/cdata/trampoline.c | 34 - go/arrow/cdata/utils.h | 45 - go/arrow/compare.go | 153 - go/arrow/compare_test.go | 397 - go/arrow/compute/arithmetic.go | 1229 - go/arrow/compute/arithmetic_test.go | 3504 - go/arrow/compute/cast.go | 587 - go/arrow/compute/cast_test.go | 2867 - go/arrow/compute/datum.go | 305 - go/arrow/compute/datumkind_string.go | 30 - go/arrow/compute/doc.go | 34 - go/arrow/compute/example_test.go | 91 - go/arrow/compute/exec.go | 199 - go/arrow/compute/exec/hash_util.go | 24 - go/arrow/compute/exec/kernel.go | 695 - go/arrow/compute/exec/kernel_test.go | 588 - go/arrow/compute/exec/span.go | 634 - go/arrow/compute/exec/span_offsets.go | 36 - go/arrow/compute/exec/span_test.go | 835 - go/arrow/compute/exec/utils.go | 276 - go/arrow/compute/exec/utils_test.go | 111 - go/arrow/compute/exec_internals_test.go | 585 - go/arrow/compute/exec_test.go | 379 - go/arrow/compute/executor.go | 1122 - go/arrow/compute/expression.go | 904 - go/arrow/compute/expression_test.go | 259 - go/arrow/compute/exprs/builders.go | 445 - go/arrow/compute/exprs/builders_test.go | 92 - go/arrow/compute/exprs/exec.go | 620 - go/arrow/compute/exprs/exec_internal_test.go | 114 - go/arrow/compute/exprs/exec_test.go | 461 - go/arrow/compute/exprs/extension_types.go | 149 - go/arrow/compute/exprs/field_refs.go | 254 - go/arrow/compute/exprs/types.go | 745 - go/arrow/compute/fieldref.go | 587 - go/arrow/compute/fieldref_hash.go | 39 - go/arrow/compute/fieldref_test.go | 316 - go/arrow/compute/funckind_string.go | 29 - go/arrow/compute/functions.go | 430 - go/arrow/compute/functions_test.go | 69 - go/arrow/compute/internal/kernels/Makefile | 110 - .../internal/kernels/_lib/base_arithmetic.cc | 484 - .../kernels/_lib/base_arithmetic_avx2_amd64.s | 34899 -------- .../kernels/_lib/base_arithmetic_sse4_amd64.s | 38194 --------- .../internal/kernels/_lib/cast_numeric.cc | 104 - .../kernels/_lib/cast_numeric_avx2_amd64.s | 11337 --- .../internal/kernels/_lib/cast_numeric_neon.s | 6088 -- .../kernels/_lib/cast_numeric_sse4_amd64.s | 8837 -- .../internal/kernels/_lib/constant_factor.c | 35 - .../kernels/_lib/constant_factor_avx2_amd64.s | 837 - .../kernels/_lib/constant_factor_sse4_amd64.s | 654 - .../kernels/_lib/scalar_comparison.cc | 241 - .../_lib/scalar_comparison_avx2_amd64.s | 67763 ---------------- .../_lib/scalar_comparison_sse4_amd64.s | 59819 -------------- .../compute/internal/kernels/_lib/types.h | 708 - .../kernels/_lib/vendored/safe-math.h | 1072 - .../internal/kernels/base_arithmetic.go | 897 - .../internal/kernels/base_arithmetic_amd64.go | 152 - .../kernels/base_arithmetic_avx2_amd64.go | 60 - .../kernels/base_arithmetic_avx2_amd64.s | 35529 -------- .../kernels/base_arithmetic_sse4_amd64.go | 60 - .../kernels/base_arithmetic_sse4_amd64.s | 39139 --------- .../kernels/basic_arithmetic_noasm.go | 33 - .../compute/internal/kernels/boolean_cast.go | 107 - go/arrow/compute/internal/kernels/cast.go | 116 - .../compute/internal/kernels/cast_numeric.go | 87 - .../internal/kernels/cast_numeric_amd64.go | 33 - .../kernels/cast_numeric_avx2_amd64.go | 32 - .../kernels/cast_numeric_avx2_amd64.s | 11545 --- .../kernels/cast_numeric_neon_arm64.go | 41 - .../kernels/cast_numeric_neon_arm64.s | 6087 -- .../kernels/cast_numeric_sse4_amd64.go | 32 - .../kernels/cast_numeric_sse4_amd64.s | 9045 --- .../compute/internal/kernels/cast_temporal.go | 463 - .../kernels/compareoperator_string.go | 30 - .../internal/kernels/constant_factor.go | 81 - .../internal/kernels/constant_factor_amd64.go | 57 - .../kernels/constant_factor_avx2_amd64.go | 77 - .../kernels/constant_factor_avx2_amd64.s | 781 - .../kernels/constant_factor_sse4_amd64.go | 77 - .../kernels/constant_factor_sse4_amd64.s | 597 - go/arrow/compute/internal/kernels/doc.go | 19 - go/arrow/compute/internal/kernels/helpers.go | 989 - .../compute/internal/kernels/numeric_cast.go | 866 - go/arrow/compute/internal/kernels/rounding.go | 809 - .../internal/kernels/roundmode_string.go | 34 - .../internal/kernels/scalar_arithmetic.go | 412 - .../internal/kernels/scalar_boolean.go | 334 - .../kernels/scalar_comparison_amd64.go | 109 - .../kernels/scalar_comparison_avx2_amd64.go | 109 - .../kernels/scalar_comparison_avx2_amd64.s | 67310 --------------- .../kernels/scalar_comparison_noasm.go | 25 - .../kernels/scalar_comparison_sse4_amd64.go | 109 - .../kernels/scalar_comparison_sse4_amd64.s | 58288 ------------- .../internal/kernels/scalar_comparisons.go | 701 - .../compute/internal/kernels/string_casts.go | 409 - go/arrow/compute/internal/kernels/types.go | 109 - .../compute/internal/kernels/vector_hash.go | 565 - .../internal/kernels/vector_run_end_encode.go | 957 - .../internal/kernels/vector_selection.go | 1789 - go/arrow/compute/registry.go | 209 - go/arrow/compute/registry_test.go | 182 - go/arrow/compute/scalar_bool.go | 133 - go/arrow/compute/scalar_bool_test.go | 154 - go/arrow/compute/scalar_compare.go | 137 - go/arrow/compute/scalar_compare_test.go | 1484 - go/arrow/compute/selection.go | 729 - go/arrow/compute/utils.go | 400 - go/arrow/compute/vector_hash.go | 59 - go/arrow/compute/vector_hash_test.go | 517 - go/arrow/compute/vector_run_end_test.go | 423 - go/arrow/compute/vector_run_ends.go | 90 - go/arrow/compute/vector_selection_test.go | 1652 - go/arrow/csv/common.go | 269 - go/arrow/csv/reader.go | 1030 - go/arrow/csv/reader_test.go | 956 - go/arrow/csv/testdata/header.csv | 21 - go/arrow/csv/testdata/simple.csv | 28 - go/arrow/csv/testdata/types.csv | 21 - go/arrow/csv/transformer.go | 282 - go/arrow/csv/writer.go | 116 - go/arrow/csv/writer_test.go | 430 - go/arrow/datatype.go | 411 - go/arrow/datatype_binary.go | 139 - go/arrow/datatype_binary_test.go | 113 - go/arrow/datatype_encoded.go | 69 - go/arrow/datatype_extension.go | 180 - go/arrow/datatype_extension_test.go | 76 - go/arrow/datatype_fixedwidth.go | 816 - go/arrow/datatype_fixedwidth_test.go | 440 - go/arrow/datatype_nested.go | 993 - go/arrow/datatype_nested_test.go | 633 - go/arrow/datatype_null.go | 31 - go/arrow/datatype_null_test.go | 38 - go/arrow/datatype_numeric.gen.go | 206 - go/arrow/datatype_numeric.gen.go.tmpl | 45 - go/arrow/datatype_numeric.gen.go.tmpldata | 66 - go/arrow/datatype_viewheader.go | 141 - go/arrow/datatype_viewheader_inline.go | 31 - go/arrow/datatype_viewheader_inline_go1.19.go | 35 - go/arrow/datatype_viewheader_inline_tinygo.go | 35 - go/arrow/decimal128/decimal128.go | 627 - go/arrow/decimal128/decimal128_test.go | 709 - go/arrow/decimal256/decimal256.go | 708 - go/arrow/decimal256/decimal256_test.go | 623 - go/arrow/doc.go | 46 - go/arrow/encoded/ree_utils.go | 219 - go/arrow/encoded/ree_utils_test.go | 156 - go/arrow/endian/big.go | 30 - go/arrow/endian/endian.go | 41 - go/arrow/endian/little.go | 30 - go/arrow/errors.go | 28 - go/arrow/example_test.go | 832 - go/arrow/extensions/bool8.go | 216 - go/arrow/extensions/bool8_test.go | 316 - go/arrow/extensions/doc.go | 20 - go/arrow/extensions/extensions.go | 36 - go/arrow/extensions/extensions_test.go | 105 - go/arrow/extensions/json.go | 148 - go/arrow/extensions/json_test.go | 268 - go/arrow/extensions/opaque.go | 106 - go/arrow/extensions/opaque_test.go | 197 - go/arrow/extensions/uuid.go | 265 - go/arrow/extensions/uuid_test.go | 257 - go/arrow/flight/basic_auth_flight_test.go | 202 - go/arrow/flight/client.go | 453 - go/arrow/flight/client_auth.go | 91 - go/arrow/flight/cookie_middleware.go | 146 - go/arrow/flight/cookie_middleware_test.go | 301 - go/arrow/flight/doc.go | 77 - go/arrow/flight/example_flight_server_test.go | 90 - go/arrow/flight/flight_middleware_test.go | 361 - go/arrow/flight/flight_test.go | 486 - go/arrow/flight/flightsql/client.go | 1436 - go/arrow/flight/flightsql/client_test.go | 741 - go/arrow/flight/flightsql/column_metadata.go | 217 - go/arrow/flight/flightsql/driver/README.md | 226 - go/arrow/flight/flightsql/driver/config.go | 209 - .../flight/flightsql/driver/config_test.go | 427 - go/arrow/flight/flightsql/driver/driver.go | 615 - .../flight/flightsql/driver/driver_test.go | 1858 - go/arrow/flight/flightsql/driver/errors.go | 26 - go/arrow/flight/flightsql/driver/utils.go | 287 - .../flight/flightsql/driver/utils_test.go | 138 - .../cmd/sqlite_flightsql_server/main.go | 64 - .../flightsql/example/sql_batch_reader.go | 341 - .../flight/flightsql/example/sqlite_info.go | 201 - .../flight/flightsql/example/sqlite_server.go | 800 - .../sqlite_tables_schema_batch_reader.go | 203 - .../flight/flightsql/example/type_info.go | 118 - .../flightsql/schema_ref/reference_schemas.go | 106 - go/arrow/flight/flightsql/server.go | 1400 - go/arrow/flight/flightsql/server_test.go | 1046 - go/arrow/flight/flightsql/sql_info.go | 93 - .../flight/flightsql/sqlite_server_test.go | 910 - go/arrow/flight/flightsql/types.go | 899 - go/arrow/flight/gen.go | 20 - go/arrow/flight/gen/flight/Flight.pb.go | 2832 - go/arrow/flight/gen/flight/FlightSql.pb.go | 6082 -- go/arrow/flight/gen/flight/Flight_grpc.pb.go | 775 - go/arrow/flight/record_batch_reader.go | 264 - go/arrow/flight/record_batch_writer.go | 119 - go/arrow/flight/server.go | 405 - go/arrow/flight/server_auth.go | 245 - go/arrow/flight/server_example_test.go | 80 - go/arrow/flight/session/cookies.go | 80 - .../flight/session/example_session_test.go | 77 - go/arrow/flight/session/session.go | 240 - go/arrow/flight/session/stateful_session.go | 197 - go/arrow/flight/session/stateless_session.go | 122 - go/arrow/float16/float16.go | 203 - go/arrow/float16/float16_test.go | 293 - go/arrow/gen-flatbuffers.go | 123 - go/arrow/internal/arrdata/arrdata.go | 1835 - go/arrow/internal/arrdata/ioutil.go | 275 - go/arrow/internal/arrjson/arrjson.go | 2462 - go/arrow/internal/arrjson/arrjson_test.go | 6388 -- go/arrow/internal/arrjson/option.go | 57 - go/arrow/internal/arrjson/reader.go | 111 - go/arrow/internal/arrjson/writer.go | 101 - .../internal/cdata_integration/entrypoints.go | 193 - go/arrow/internal/debug/assert_off.go | 25 - go/arrow/internal/debug/assert_on.go | 29 - go/arrow/internal/debug/doc.go | 30 - go/arrow/internal/debug/log_off.go | 22 - go/arrow/internal/debug/log_on.go | 33 - go/arrow/internal/debug/util.go | 38 - go/arrow/internal/dictutils/dict.go | 411 - go/arrow/internal/dictutils/dict_test.go | 181 - go/arrow/internal/flatbuf/Binary.go | 51 - go/arrow/internal/flatbuf/BinaryView.go | 57 - go/arrow/internal/flatbuf/Block.go | 77 - go/arrow/internal/flatbuf/BodyCompression.go | 89 - .../internal/flatbuf/BodyCompressionMethod.go | 52 - go/arrow/internal/flatbuf/Bool.go | 50 - go/arrow/internal/flatbuf/Buffer.go | 75 - go/arrow/internal/flatbuf/CompressionType.go | 45 - go/arrow/internal/flatbuf/Date.go | 71 - go/arrow/internal/flatbuf/DateUnit.go | 45 - go/arrow/internal/flatbuf/Decimal.go | 107 - go/arrow/internal/flatbuf/DictionaryBatch.go | 108 - .../internal/flatbuf/DictionaryEncoding.go | 135 - go/arrow/internal/flatbuf/DictionaryKind.go | 47 - go/arrow/internal/flatbuf/Duration.go | 65 - go/arrow/internal/flatbuf/Endianness.go | 47 - go/arrow/internal/flatbuf/Feature.go | 71 - go/arrow/internal/flatbuf/Field.go | 188 - go/arrow/internal/flatbuf/FieldNode.go | 78 - go/arrow/internal/flatbuf/FixedSizeBinary.go | 67 - go/arrow/internal/flatbuf/FixedSizeList.go | 67 - go/arrow/internal/flatbuf/FloatingPoint.go | 65 - go/arrow/internal/flatbuf/Footer.go | 162 - go/arrow/internal/flatbuf/Int.go | 80 - go/arrow/internal/flatbuf/Interval.go | 65 - go/arrow/internal/flatbuf/IntervalUnit.go | 48 - go/arrow/internal/flatbuf/KeyValue.go | 75 - go/arrow/internal/flatbuf/LargeBinary.go | 52 - go/arrow/internal/flatbuf/LargeList.go | 52 - go/arrow/internal/flatbuf/LargeListView.go | 52 - go/arrow/internal/flatbuf/LargeUtf8.go | 52 - go/arrow/internal/flatbuf/List.go | 50 - go/arrow/internal/flatbuf/ListView.go | 53 - go/arrow/internal/flatbuf/Map.go | 92 - go/arrow/internal/flatbuf/Message.go | 133 - go/arrow/internal/flatbuf/MessageHeader.go | 65 - go/arrow/internal/flatbuf/MetadataVersion.go | 65 - go/arrow/internal/flatbuf/Null.go | 51 - go/arrow/internal/flatbuf/Precision.go | 48 - go/arrow/internal/flatbuf/RecordBatch.go | 214 - go/arrow/internal/flatbuf/RunEndEncoded.go | 55 - go/arrow/internal/flatbuf/RunLengthEncoded.go | 50 - go/arrow/internal/flatbuf/Schema.go | 159 - .../flatbuf/SparseMatrixCompressedAxis.go | 45 - .../internal/flatbuf/SparseMatrixIndexCSR.go | 181 - .../internal/flatbuf/SparseMatrixIndexCSX.go | 200 - go/arrow/internal/flatbuf/SparseTensor.go | 175 - .../internal/flatbuf/SparseTensorIndex.go | 51 - .../internal/flatbuf/SparseTensorIndexCOO.go | 179 - .../internal/flatbuf/SparseTensorIndexCSF.go | 291 - go/arrow/internal/flatbuf/Struct_.go | 53 - go/arrow/internal/flatbuf/Tensor.go | 163 - go/arrow/internal/flatbuf/TensorDim.go | 83 - go/arrow/internal/flatbuf/Time.go | 94 - go/arrow/internal/flatbuf/TimeUnit.go | 51 - go/arrow/internal/flatbuf/Timestamp.go | 201 - go/arrow/internal/flatbuf/Type.go | 123 - go/arrow/internal/flatbuf/Union.go | 101 - go/arrow/internal/flatbuf/UnionMode.go | 45 - go/arrow/internal/flatbuf/Utf8.go | 51 - go/arrow/internal/flatbuf/Utf8View.go | 57 - .../arrow-flight-integration-client/main.go | 54 - .../arrow-flight-integration-server/main.go | 43 - .../internal/flight_integration/middleware.go | 58 - .../internal/flight_integration/scenario.go | 3082 - .../internal/testing/gen/random_array_gen.go | 594 - go/arrow/internal/testing/tools/bits.go | 40 - go/arrow/internal/testing/tools/bits_test.go | 42 - go/arrow/internal/testing/tools/bool.go | 25 - go/arrow/internal/testing/tools/data_types.go | 52 - go/arrow/internal/utils.go | 59 - go/arrow/ipc/cmd/arrow-cat/main.go | 215 - go/arrow/ipc/cmd/arrow-cat/main_test.go | 573 - go/arrow/ipc/cmd/arrow-file-to-stream/main.go | 84 - .../ipc/cmd/arrow-file-to-stream/main_test.go | 68 - .../cmd/arrow-json-integration-test/main.go | 224 - .../arrow-json-integration-test/main_test.go | 89 - go/arrow/ipc/cmd/arrow-ls/main.go | 201 - go/arrow/ipc/cmd/arrow-ls/main_test.go | 332 - go/arrow/ipc/cmd/arrow-stream-to-file/main.go | 72 - .../ipc/cmd/arrow-stream-to-file/main_test.go | 78 - go/arrow/ipc/compression.go | 137 - go/arrow/ipc/endian_swap.go | 166 - go/arrow/ipc/endian_swap_test.go | 300 - go/arrow/ipc/file_reader.go | 770 - go/arrow/ipc/file_test.go | 117 - go/arrow/ipc/file_writer.go | 354 - go/arrow/ipc/ipc.go | 203 - go/arrow/ipc/ipc_test.go | 690 - go/arrow/ipc/message.go | 242 - go/arrow/ipc/message_test.go | 103 - go/arrow/ipc/metadata.go | 1317 - go/arrow/ipc/metadata_test.go | 227 - go/arrow/ipc/reader.go | 286 - go/arrow/ipc/reader_test.go | 185 - go/arrow/ipc/stream_test.go | 102 - go/arrow/ipc/writer.go | 1095 - go/arrow/ipc/writer_test.go | 256 - go/arrow/math/Makefile | 110 - go/arrow/math/_lib/.gitignore | 18 - go/arrow/math/_lib/CMakeLists.txt | 22 - go/arrow/math/_lib/arch.h | 29 - go/arrow/math/_lib/float64.c | 26 - go/arrow/math/_lib/float64_avx2.s | 176 - go/arrow/math/_lib/float64_neon.s | 60 - go/arrow/math/_lib/float64_sse4.s | 103 - go/arrow/math/_lib/int64.c | 27 - go/arrow/math/_lib/int64_avx2.s | 181 - go/arrow/math/_lib/int64_neon.s | 60 - go/arrow/math/_lib/int64_sse4.s | 108 - go/arrow/math/_lib/uint64.c | 27 - go/arrow/math/_lib/uint64_avx2.s | 181 - go/arrow/math/_lib/uint64_neon.s | 60 - go/arrow/math/_lib/uint64_sse4.s | 108 - go/arrow/math/doc.go | 30 - go/arrow/math/float64.go | 47 - go/arrow/math/float64.tmpldata | 4 - go/arrow/math/float64_amd64.go | 34 - go/arrow/math/float64_arm64.go | 30 - go/arrow/math/float64_avx2_amd64.go | 42 - go/arrow/math/float64_avx2_amd64.s | 167 - go/arrow/math/float64_neon_arm64.go | 42 - go/arrow/math/float64_neon_arm64.s | 43 - go/arrow/math/float64_noasm.go | 26 - go/arrow/math/float64_ppc64le.go | 26 - go/arrow/math/float64_s390x.go | 26 - go/arrow/math/float64_sse4_amd64.go | 42 - go/arrow/math/float64_sse4_amd64.s | 94 - go/arrow/math/float64_test.go | 86 - go/arrow/math/int64.go | 47 - go/arrow/math/int64.tmpldata | 4 - go/arrow/math/int64_amd64.go | 34 - go/arrow/math/int64_arm64.go | 30 - go/arrow/math/int64_avx2_amd64.go | 42 - go/arrow/math/int64_avx2_amd64.s | 173 - go/arrow/math/int64_neon_arm64.go | 42 - go/arrow/math/int64_neon_arm64.s | 58 - go/arrow/math/int64_noasm.go | 26 - go/arrow/math/int64_ppc64le.go | 26 - go/arrow/math/int64_s390x.go | 26 - go/arrow/math/int64_sse4_amd64.go | 42 - go/arrow/math/int64_sse4_amd64.s | 100 - go/arrow/math/int64_test.go | 86 - go/arrow/math/math_amd64.go | 52 - go/arrow/math/math_arm64.go | 44 - go/arrow/math/math_noasm.go | 30 - go/arrow/math/math_ppc64le.go | 30 - go/arrow/math/math_s390x.go | 30 - go/arrow/math/type.go.tmpl | 48 - go/arrow/math/type_amd64.go.tmpl | 33 - go/arrow/math/type_arm64.go.tmpl | 29 - go/arrow/math/type_noasm.go.tmpl | 25 - go/arrow/math/type_ppc64le.go.tmpl | 25 - go/arrow/math/type_s390x.go.tmpl | 25 - go/arrow/math/type_simd_amd64.go.tmpl | 42 - go/arrow/math/type_simd_arm64.go.tmpl | 42 - go/arrow/math/type_test.go.tmpl | 87 - go/arrow/math/uint64.go | 47 - go/arrow/math/uint64.tmpldata | 4 - go/arrow/math/uint64_amd64.go | 34 - go/arrow/math/uint64_arm64.go | 30 - go/arrow/math/uint64_avx2_amd64.go | 42 - go/arrow/math/uint64_avx2_amd64.s | 173 - go/arrow/math/uint64_neon_arm64.go | 42 - go/arrow/math/uint64_neon_arm64.s | 58 - go/arrow/math/uint64_noasm.go | 26 - go/arrow/math/uint64_ppc64le.go | 26 - go/arrow/math/uint64_s390x.go | 26 - go/arrow/math/uint64_sse4_amd64.go | 42 - go/arrow/math/uint64_sse4_amd64.s | 100 - go/arrow/math/uint64_test.go | 86 - go/arrow/memory/Makefile | 66 - go/arrow/memory/_lib/.gitignore | 18 - go/arrow/memory/_lib/CMakeLists.txt | 22 - go/arrow/memory/_lib/arch.h | 29 - go/arrow/memory/_lib/memory.c | 27 - go/arrow/memory/_lib/memory_avx2.s | 97 - go/arrow/memory/_lib/memory_neon.s | 46 - go/arrow/memory/_lib/memory_sse4.s | 96 - go/arrow/memory/allocator.go | 27 - go/arrow/memory/buffer.go | 157 - go/arrow/memory/buffer_test.go | 71 - go/arrow/memory/cgo_allocator.go | 108 - go/arrow/memory/cgo_allocator_defaults.go | 22 - go/arrow/memory/cgo_allocator_logging.go | 22 - go/arrow/memory/cgo_allocator_test.go | 82 - go/arrow/memory/checked_allocator.go | 221 - go/arrow/memory/default_allocator.go | 25 - go/arrow/memory/default_mallocator.go | 29 - go/arrow/memory/default_mallocator_test.go | 31 - go/arrow/memory/doc.go | 22 - go/arrow/memory/go_allocator.go | 47 - go/arrow/memory/go_allocator_test.go | 76 - .../memory/internal/cgoalloc/allocator.cc | 71 - .../memory/internal/cgoalloc/allocator.go | 108 - go/arrow/memory/internal/cgoalloc/allocator.h | 39 - go/arrow/memory/internal/cgoalloc/helpers.h | 52 - go/arrow/memory/mallocator/doc.go | 21 - go/arrow/memory/mallocator/mallocator.go | 123 - go/arrow/memory/mallocator/mallocator_test.go | 127 - go/arrow/memory/mallocator/mallocator_util.go | 26 - go/arrow/memory/memory.go | 33 - go/arrow/memory/memory_amd64.go | 34 - go/arrow/memory/memory_arm64.go | 32 - go/arrow/memory/memory_avx2_amd64.go | 42 - go/arrow/memory/memory_avx2_amd64.s | 85 - go/arrow/memory/memory_js_wasm.go | 24 - go/arrow/memory/memory_neon_arm64.go | 32 - go/arrow/memory/memory_neon_arm64.s | 43 - go/arrow/memory/memory_noasm.go | 24 - go/arrow/memory/memory_sse4_amd64.go | 32 - go/arrow/memory/memory_sse4_amd64.s | 84 - go/arrow/memory/memory_test.go | 125 - go/arrow/memory/util.go | 45 - go/arrow/memory/util_test.go | 61 - go/arrow/numeric.schema.json | 15 - go/arrow/numeric.tmpldata | 135 - go/arrow/record.go | 49 - go/arrow/scalar/append.go | 263 - go/arrow/scalar/append_test.go | 244 - go/arrow/scalar/binary.go | 203 - go/arrow/scalar/compare.go | 97 - go/arrow/scalar/nested.go | 808 - go/arrow/scalar/numeric.gen.go | 797 - go/arrow/scalar/numeric.gen.go.tmpl | 149 - go/arrow/scalar/numeric.gen.go.tmpldata | 52 - go/arrow/scalar/numeric.gen_test.go | 377 - go/arrow/scalar/numeric.gen_test.go.tmpl | 63 - go/arrow/scalar/parse.go | 777 - go/arrow/scalar/scalar.go | 1064 - go/arrow/scalar/scalar_test.go | 1490 - go/arrow/scalar/temporal.go | 481 - go/arrow/schema.go | 301 - go/arrow/schema_test.go | 480 - go/arrow/table.go | 195 - go/arrow/tensor/numeric.gen.go | 326 - go/arrow/tensor/numeric.gen.go.tmpl | 55 - go/arrow/tensor/numeric.gen_test.go | 1170 - go/arrow/tensor/numeric.gen_test.go.tmpl | 126 - go/arrow/tensor/tensor.go | 246 - go/arrow/tensor/tensor_test.go | 166 - go/arrow/tools.go | 25 - go/arrow/type_string.go | 65 - go/arrow/type_traits.go | 162 - go/arrow/type_traits_boolean.go | 28 - go/arrow/type_traits_decimal128.go | 58 - go/arrow/type_traits_decimal256.go | 53 - go/arrow/type_traits_float16.go | 57 - go/arrow/type_traits_interval.go | 135 - go/arrow/type_traits_numeric.gen.go | 524 - go/arrow/type_traits_numeric.gen.go.tmpl | 78 - go/arrow/type_traits_numeric.gen_test.go | 536 - go/arrow/type_traits_numeric.gen_test.go.tmpl | 61 - go/arrow/type_traits_test.go | 315 - go/arrow/type_traits_timestamp.go | 54 - go/arrow/type_traits_view.go | 48 - go/arrow/unionmode_string.go | 25 - go/arrow/util/byte_size.go | 79 - go/arrow/util/byte_size_test.go | 110 - go/arrow/util/messages/README.md | 25 - go/arrow/util/messages/types.proto | 102 - go/arrow/util/protobuf_reflect.go | 876 - go/arrow/util/protobuf_reflect_test.go | 498 - go/arrow/util/util_message/types.pb.go | 1135 - go/go.mod | 89 - go/go.sum | 165 - go/internal/bitutils/bit_block_counter.go | 452 - .../bitutils/bit_block_counter_test.go | 201 - go/internal/bitutils/bit_run_reader.go | 151 - go/internal/bitutils/bit_run_reader_test.go | 158 - go/internal/bitutils/bit_set_run_reader.go | 361 - .../bitutils/bit_set_run_reader_test.go | 274 - go/internal/bitutils/bitmap_generate.go | 109 - go/internal/bitutils/bitmap_generate_test.go | 68 - go/internal/hashing/hash_funcs.go | 90 - go/internal/hashing/hash_string.go | 30 - go/internal/hashing/hashing_test.go | 114 - go/internal/hashing/types.tmpldata | 42 - go/internal/hashing/xxh3_memo_table.gen.go | 2833 - .../hashing/xxh3_memo_table.gen.go.tmpl | 349 - go/internal/hashing/xxh3_memo_table.go | 436 - go/internal/json/json.go | 51 - go/internal/json/json_tinygo.go | 51 - go/internal/types/extension_types.go | 325 - go/internal/utils/Makefile | 80 - go/internal/utils/_lib/arch.h | 29 - go/internal/utils/_lib/min_max.c | 125 - go/internal/utils/_lib/min_max_avx2_amd64.s | 1009 - go/internal/utils/_lib/min_max_neon.s | 318 - go/internal/utils/_lib/min_max_sse4_amd64.s | 1091 - go/internal/utils/_lib/transpose_ints.c | 57 - .../utils/_lib/transpose_ints_avx2_amd64.s | 3334 - .../utils/_lib/transpose_ints_sse4_amd64.s | 3334 - go/internal/utils/buf_reader.go | 212 - go/internal/utils/endians_default.go | 30 - go/internal/utils/endians_s390x.go | 33 - go/internal/utils/math.go | 33 - go/internal/utils/min_max.go | 212 - go/internal/utils/min_max_amd64.go | 55 - go/internal/utils/min_max_arm64.go | 66 - go/internal/utils/min_max_avx2_amd64.go | 90 - go/internal/utils/min_max_avx2_amd64.s | 927 - go/internal/utils/min_max_neon_arm64.go | 56 - go/internal/utils/min_max_neon_arm64.s | 324 - go/internal/utils/min_max_noasm.go | 31 - go/internal/utils/min_max_ppc64le.go | 30 - go/internal/utils/min_max_s390x.go | 30 - go/internal/utils/min_max_sse4_amd64.go | 88 - go/internal/utils/min_max_sse4_amd64.s | 1044 - go/internal/utils/recovery.go | 31 - go/internal/utils/recovery_test.go | 62 - go/internal/utils/ref_count.go | 26 - go/internal/utils/transpose_ints.go | 407 - go/internal/utils/transpose_ints.go.tmpl | 34 - go/internal/utils/transpose_ints.tmpldata | 34 - go/internal/utils/transpose_ints_amd64.go | 325 - .../utils/transpose_ints_amd64.go.tmpl | 75 - go/internal/utils/transpose_ints_arm64.go | 96 - .../utils/transpose_ints_avx2_amd64.go | 473 - go/internal/utils/transpose_ints_avx2_amd64.s | 3074 - go/internal/utils/transpose_ints_def.go | 227 - go/internal/utils/transpose_ints_noasm.go | 96 - .../utils/transpose_ints_noasm.go.tmpl | 34 - go/internal/utils/transpose_ints_ppc64le.go | 96 - go/internal/utils/transpose_ints_s390x.go | 96 - .../utils/transpose_ints_s390x.go.tmpl | 34 - go/internal/utils/transpose_ints_simd.go.tmpl | 42 - .../utils/transpose_ints_sse4_amd64.go | 473 - go/internal/utils/transpose_ints_sse4_amd64.s | 3074 - go/internal/utils/transpose_ints_test.go | 49 - go/parquet/.gitignore | 31 - go/parquet/cmd/parquet_reader/README.md | 106 - go/parquet/cmd/parquet_reader/dumper.go | 182 - go/parquet/cmd/parquet_reader/main.go | 382 - go/parquet/cmd/parquet_reader/v0.7.1.parquet | Bin 4372 -> 0 bytes go/parquet/cmd/parquet_schema/main.go | 44 - go/parquet/compress/brotli.go | 114 - go/parquet/compress/compress.go | 181 - go/parquet/compress/compress_test.go | 140 - go/parquet/compress/gzip.go | 97 - go/parquet/compress/lz4_raw.go | 66 - go/parquet/compress/snappy.go | 61 - go/parquet/compress/zstd.go | 112 - go/parquet/doc.go | 81 - go/parquet/encryption_properties.go | 711 - go/parquet/encryption_properties_test.go | 217 - go/parquet/encryption_read_config_test.go | 473 - go/parquet/encryption_write_config_test.go | 321 - go/parquet/file/column_reader.go | 526 - go/parquet/file/column_reader_test.go | 644 - go/parquet/file/column_reader_types.gen.go | 299 - .../file/column_reader_types.gen.go.tmpl | 64 - go/parquet/file/column_writer.go | 677 - go/parquet/file/column_writer_test.go | 791 - go/parquet/file/column_writer_types.gen.go | 1594 - .../file/column_writer_types.gen.go.tmpl | 263 - go/parquet/file/file_reader.go | 317 - go/parquet/file/file_reader_mmap.go | 77 - go/parquet/file/file_reader_mmap_windows.go | 30 - go/parquet/file/file_reader_test.go | 822 - go/parquet/file/file_writer.go | 304 - go/parquet/file/file_writer_test.go | 598 - go/parquet/file/level_conversion.go | 267 - go/parquet/file/level_conversion_test.go | 194 - go/parquet/file/page_reader.go | 617 - go/parquet/file/page_writer.go | 468 - go/parquet/file/record_reader.go | 986 - go/parquet/file/row_group_reader.go | 144 - go/parquet/file/row_group_writer.go | 255 - go/parquet/file/row_group_writer_test.go | 97 - go/parquet/internal/bmi/Makefile | 47 - go/parquet/internal/bmi/_lib/arch.h | 26 - go/parquet/internal/bmi/_lib/bitmap_bmi2.c | 47 - go/parquet/internal/bmi/_lib/bitmap_bmi2.s | 140 - go/parquet/internal/bmi/_lib/bitmap_neon.s | 95 - go/parquet/internal/bmi/bitmap_bmi2_386.go | 25 - go/parquet/internal/bmi/bitmap_bmi2_amd64.go | 51 - go/parquet/internal/bmi/bitmap_bmi2_amd64.s | 117 - go/parquet/internal/bmi/bitmap_bmi2_noasm.go | 25 - .../internal/bmi/bitmap_bmi2_ppc64le.go | 25 - go/parquet/internal/bmi/bitmap_bmi2_s390x.go | 25 - go/parquet/internal/bmi/bitmap_neon_arm64.go | 41 - go/parquet/internal/bmi/bitmap_neon_arm64.s | 84 - go/parquet/internal/bmi/bmi.go | 275 - go/parquet/internal/bmi/bmi_amd64.go | 37 - go/parquet/internal/bmi/bmi_arm64.go | 64 - go/parquet/internal/bmi/bmi_test.go | 47 - go/parquet/internal/debug/assert_off.go | 25 - go/parquet/internal/debug/assert_on.go | 29 - go/parquet/internal/debug/doc.go | 23 - go/parquet/internal/debug/log_off.go | 25 - go/parquet/internal/debug/log_on.go | 33 - .../internal/encoding/boolean_decoder.go | 189 - .../internal/encoding/boolean_encoder.go | 144 - .../internal/encoding/byte_array_decoder.go | 130 - .../internal/encoding/byte_array_encoder.go | 158 - .../internal/encoding/byte_stream_split.go | 389 - go/parquet/internal/encoding/decoder.go | 238 - .../internal/encoding/delta_bit_packing.go | 421 - .../internal/encoding/delta_byte_array.go | 238 - .../encoding/delta_byte_array_test.go | 48 - .../encoding/delta_length_byte_array.go | 148 - go/parquet/internal/encoding/encoder.go | 412 - .../encoding/encoding_benchmarks_test.go | 681 - go/parquet/internal/encoding/encoding_test.go | 873 - .../encoding/fixed_len_byte_array_decoder.go | 137 - .../encoding/fixed_len_byte_array_encoder.go | 176 - go/parquet/internal/encoding/levels.go | 289 - go/parquet/internal/encoding/levels_test.go | 293 - go/parquet/internal/encoding/memo_table.go | 411 - .../internal/encoding/memo_table_test.go | 293 - .../internal/encoding/memo_table_types.gen.go | 398 - .../encoding/memo_table_types.gen.go.tmpl | 123 - .../internal/encoding/physical_types.tmpldata | 52 - .../encoding/plain_encoder_types.gen.go | 641 - .../encoding/plain_encoder_types.gen.go.tmpl | 184 - .../internal/encoding/typed_encoder.gen.go | 1735 - .../encoding/typed_encoder.gen.go.tmpl | 419 - go/parquet/internal/encoding/types.go | 467 - go/parquet/internal/encryption/aes.go | 310 - go/parquet/internal/encryption/decryptor.go | 268 - go/parquet/internal/encryption/encryptor.go | 237 - .../internal/encryption/key_handling.go | 61 - .../gen-go/parquet/GoUnusedProtection__.go | 5 - .../internal/gen-go/parquet/parquet-consts.go | 30 - go/parquet/internal/gen-go/parquet/parquet.go | 12796 --- .../internal/gen-go/parquet/staticcheck.conf | 17 - go/parquet/internal/testutils/pagebuilder.go | 305 - .../internal/testutils/primitive_typed.go | 305 - go/parquet/internal/testutils/random.go | 538 - go/parquet/internal/testutils/random_arrow.go | 518 - go/parquet/internal/testutils/utils.go | 42 - go/parquet/internal/thrift/helpers.go | 87 - go/parquet/internal/utils/Makefile | 78 - go/parquet/internal/utils/_lib/README.md | 154 - go/parquet/internal/utils/_lib/arch.h | 29 - .../internal/utils/_lib/bit_packing_avx2.c | 1879 - .../internal/utils/_lib/bit_packing_avx2.s | 4012 - .../internal/utils/_lib/bit_packing_neon.c | 3196 - go/parquet/internal/utils/_lib/script.sed | 22 - go/parquet/internal/utils/_lib/unpack_bool.c | 30 - .../internal/utils/_lib/unpack_bool_avx2.s | 104 - .../internal/utils/_lib/unpack_bool_neon.s | 89 - .../internal/utils/_lib/unpack_bool_sse4.s | 104 - .../internal/utils/bit_benchmark_test.go | 132 - .../internal/utils/bit_packing_amd64.go | 32 - .../internal/utils/bit_packing_arm64.go | 35 - .../internal/utils/bit_packing_avx2_amd64.go | 54 - .../internal/utils/bit_packing_avx2_amd64.s | 3439 - .../internal/utils/bit_packing_default.go | 1943 - .../internal/utils/bit_packing_neon_arm64.go | 54 - .../internal/utils/bit_packing_neon_arm64.s | 6926 -- go/parquet/internal/utils/bit_reader.go | 349 - go/parquet/internal/utils/bit_reader_test.go | 654 - go/parquet/internal/utils/bit_writer.go | 188 - go/parquet/internal/utils/bitmap_writer.go | 173 - .../internal/utils/bitmap_writer_test.go | 304 - go/parquet/internal/utils/clib_amd64.s | 105 - go/parquet/internal/utils/dictionary.go | 87 - .../internal/utils/physical_types.tmpldata | 52 - go/parquet/internal/utils/rle.go | 594 - .../internal/utils/typed_rle_dict.gen.go | 1377 - .../internal/utils/typed_rle_dict.gen.go.tmpl | 220 - go/parquet/internal/utils/unpack_bool.go | 26 - .../internal/utils/unpack_bool_amd64.go | 42 - .../internal/utils/unpack_bool_arm64.go | 66 - .../internal/utils/unpack_bool_avx2_amd64.go | 30 - .../internal/utils/unpack_bool_avx2_amd64.s | 88 - .../internal/utils/unpack_bool_default.go | 26 - .../internal/utils/unpack_bool_neon_arm64.go | 30 - .../internal/utils/unpack_bool_neon_arm64.s | 81 - .../internal/utils/unpack_bool_noasm.go | 26 - .../internal/utils/unpack_bool_sse4_amd64.go | 30 - .../internal/utils/unpack_bool_sse4_amd64.s | 88 - go/parquet/internal/utils/write_utils.go | 57 - go/parquet/metadata/app_version.go | 185 - go/parquet/metadata/column_chunk.go | 423 - go/parquet/metadata/file.go | 527 - go/parquet/metadata/metadata_test.go | 381 - go/parquet/metadata/row_group.go | 177 - go/parquet/metadata/stat_compare_test.go | 268 - go/parquet/metadata/statistics.go | 617 - go/parquet/metadata/statistics_test.go | 262 - go/parquet/metadata/statistics_types.gen.go | 2742 - .../metadata/statistics_types.gen.go.tmpl | 530 - go/parquet/metadata/statistics_types.tmpldata | 60 - go/parquet/pqarrow/column_readers.go | 969 - go/parquet/pqarrow/doc.go | 21 - go/parquet/pqarrow/encode_arrow.go | 715 - go/parquet/pqarrow/encode_arrow_test.go | 2266 - go/parquet/pqarrow/encode_dict_compute.go | 160 - go/parquet/pqarrow/encode_dict_nocompute.go | 30 - go/parquet/pqarrow/encode_dictionary_test.go | 748 - go/parquet/pqarrow/file_reader.go | 775 - go/parquet/pqarrow/file_reader_test.go | 375 - go/parquet/pqarrow/file_writer.go | 340 - go/parquet/pqarrow/file_writer_test.go | 135 - go/parquet/pqarrow/helpers.go | 45 - go/parquet/pqarrow/path_builder.go | 751 - go/parquet/pqarrow/path_builder_test.go | 676 - go/parquet/pqarrow/properties.go | 193 - go/parquet/pqarrow/reader_writer_test.go | 388 - go/parquet/pqarrow/schema.go | 1160 - go/parquet/pqarrow/schema_test.go | 450 - go/parquet/reader_properties.go | 88 - go/parquet/reader_writer_properties_test.go | 73 - go/parquet/schema/column.go | 108 - go/parquet/schema/converted_types.go | 187 - go/parquet/schema/converted_types_test.go | 50 - go/parquet/schema/helpers.go | 129 - go/parquet/schema/helpers_test.go | 122 - go/parquet/schema/logical_types.go | 1192 - go/parquet/schema/logical_types_test.go | 572 - go/parquet/schema/node.go | 629 - go/parquet/schema/reflection.go | 829 - go/parquet/schema/reflection_test.go | 411 - go/parquet/schema/schema.go | 334 - go/parquet/schema/schema_element_test.go | 521 - go/parquet/schema/schema_flatten_test.go | 157 - go/parquet/schema/schema_test.go | 670 - go/parquet/tools.go | 27 - go/parquet/types.go | 373 - go/parquet/version_string.go | 25 - go/parquet/writer_properties.go | 552 - 888 files changed, 764039 deletions(-) delete mode 100644 go/LICENSE.txt delete mode 100644 go/README.md delete mode 100644 go/arrow/.editorconfig delete mode 100644 go/arrow/.gitignore delete mode 100644 go/arrow/Gopkg.lock delete mode 100644 go/arrow/Gopkg.toml delete mode 100644 go/arrow/Makefile delete mode 100644 go/arrow/_examples/helloworld/main.go delete mode 100644 go/arrow/_tools/tmpl/main.go delete mode 100644 go/arrow/_tools/tmpl/main_test.go delete mode 100644 go/arrow/_tools/tools.go delete mode 100644 go/arrow/array.go delete mode 100644 go/arrow/array/array.go delete mode 100644 go/arrow/array/array_test.go delete mode 100644 go/arrow/array/binary.go delete mode 100644 go/arrow/array/binary_test.go delete mode 100644 go/arrow/array/binarybuilder.go delete mode 100644 go/arrow/array/binarybuilder_test.go delete mode 100644 go/arrow/array/boolean.go delete mode 100644 go/arrow/array/boolean_test.go delete mode 100644 go/arrow/array/booleanbuilder.go delete mode 100644 go/arrow/array/booleanbuilder_test.go delete mode 100644 go/arrow/array/bufferbuilder.go delete mode 100644 go/arrow/array/bufferbuilder_byte.go delete mode 100644 go/arrow/array/bufferbuilder_numeric.gen.go delete mode 100644 go/arrow/array/bufferbuilder_numeric.gen.go.tmpl delete mode 100644 go/arrow/array/bufferbuilder_numeric_test.go delete mode 100644 go/arrow/array/builder.go delete mode 100644 go/arrow/array/builder_test.go delete mode 100644 go/arrow/array/compare.go delete mode 100644 go/arrow/array/compare_test.go delete mode 100644 go/arrow/array/concat.go delete mode 100644 go/arrow/array/concat_test.go delete mode 100644 go/arrow/array/data.go delete mode 100644 go/arrow/array/data_test.go delete mode 100644 go/arrow/array/decimal128.go delete mode 100644 go/arrow/array/decimal128_test.go delete mode 100644 go/arrow/array/decimal256.go delete mode 100644 go/arrow/array/decimal256_test.go delete mode 100644 go/arrow/array/decimal_test.go delete mode 100644 go/arrow/array/dictionary.go delete mode 100644 go/arrow/array/dictionary_test.go delete mode 100644 go/arrow/array/diff.go delete mode 100644 go/arrow/array/diff_test.go delete mode 100644 go/arrow/array/doc.go delete mode 100644 go/arrow/array/encoded.go delete mode 100644 go/arrow/array/encoded_test.go delete mode 100644 go/arrow/array/extension.go delete mode 100644 go/arrow/array/extension_builder.go delete mode 100644 go/arrow/array/extension_test.go delete mode 100644 go/arrow/array/fixed_size_list.go delete mode 100644 go/arrow/array/fixed_size_list_test.go delete mode 100644 go/arrow/array/fixedsize_binary.go delete mode 100644 go/arrow/array/fixedsize_binary_test.go delete mode 100644 go/arrow/array/fixedsize_binarybuilder.go delete mode 100644 go/arrow/array/fixedsize_binarybuilder_test.go delete mode 100644 go/arrow/array/float16.go delete mode 100644 go/arrow/array/float16_builder.go delete mode 100644 go/arrow/array/float16_builder_test.go delete mode 100644 go/arrow/array/interval.go delete mode 100644 go/arrow/array/interval_test.go delete mode 100644 go/arrow/array/json_reader.go delete mode 100644 go/arrow/array/json_reader_test.go delete mode 100644 go/arrow/array/list.go delete mode 100644 go/arrow/array/list_test.go delete mode 100644 go/arrow/array/map.go delete mode 100644 go/arrow/array/map_test.go delete mode 100644 go/arrow/array/null.go delete mode 100644 go/arrow/array/null_test.go delete mode 100644 go/arrow/array/numeric.gen.go delete mode 100644 go/arrow/array/numeric.gen.go.tmpl delete mode 100644 go/arrow/array/numeric_test.go delete mode 100644 go/arrow/array/numericbuilder.gen.go delete mode 100644 go/arrow/array/numericbuilder.gen.go.tmpl delete mode 100644 go/arrow/array/numericbuilder.gen_test.go delete mode 100644 go/arrow/array/numericbuilder.gen_test.go.tmpl delete mode 100644 go/arrow/array/record.go delete mode 100644 go/arrow/array/record_test.go delete mode 100644 go/arrow/array/string.go delete mode 100644 go/arrow/array/string_test.go delete mode 100644 go/arrow/array/struct.go delete mode 100644 go/arrow/array/struct_test.go delete mode 100644 go/arrow/array/table.go delete mode 100644 go/arrow/array/table_test.go delete mode 100644 go/arrow/array/timestamp.go delete mode 100644 go/arrow/array/timestamp_test.go delete mode 100644 go/arrow/array/union.go delete mode 100644 go/arrow/array/union_test.go delete mode 100644 go/arrow/array/util.go delete mode 100644 go/arrow/array/util_test.go delete mode 100644 go/arrow/arrio/arrio.go delete mode 100644 go/arrow/arrio/arrio_test.go delete mode 100644 go/arrow/avro/avro2parquet/main.go delete mode 100644 go/arrow/avro/loader.go delete mode 100644 go/arrow/avro/reader.go delete mode 100644 go/arrow/avro/reader_test.go delete mode 100644 go/arrow/avro/reader_types.go delete mode 100644 go/arrow/avro/schema.go delete mode 100644 go/arrow/avro/schema_test.go delete mode 100644 go/arrow/avro/testdata/arrayrecordmap.avro delete mode 100644 go/arrow/avro/testdata/githubsamplecommits.avro delete mode 100644 go/arrow/bitutil/Makefile delete mode 100644 go/arrow/bitutil/_lib/bitmap_ops.c delete mode 100644 go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s delete mode 100644 go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s delete mode 100644 go/arrow/bitutil/bitmap_ops.go delete mode 100644 go/arrow/bitutil/bitmap_ops_amd64.go delete mode 100644 go/arrow/bitutil/bitmap_ops_arm64.go delete mode 100644 go/arrow/bitutil/bitmap_ops_avx2_amd64.go delete mode 100644 go/arrow/bitutil/bitmap_ops_avx2_amd64.s delete mode 100644 go/arrow/bitutil/bitmap_ops_noasm.go delete mode 100644 go/arrow/bitutil/bitmap_ops_ppc64le.go delete mode 100644 go/arrow/bitutil/bitmap_ops_s390x.go delete mode 100644 go/arrow/bitutil/bitmap_ops_sse4_amd64.go delete mode 100644 go/arrow/bitutil/bitmap_ops_sse4_amd64.s delete mode 100644 go/arrow/bitutil/bitmaps.go delete mode 100644 go/arrow/bitutil/bitmaps_test.go delete mode 100644 go/arrow/bitutil/bitutil.go delete mode 100644 go/arrow/bitutil/bitutil_bytes.go delete mode 100644 go/arrow/bitutil/bitutil_test.go delete mode 100644 go/arrow/bitutil/endian_default.go delete mode 100644 go/arrow/bitutil/endian_s390x.go delete mode 100644 go/arrow/cdata/arrow/c/abi.h delete mode 100644 go/arrow/cdata/arrow/c/helpers.h delete mode 100644 go/arrow/cdata/cdata.go delete mode 100644 go/arrow/cdata/cdata_allocate.go delete mode 100644 go/arrow/cdata/cdata_exports.go delete mode 100644 go/arrow/cdata/cdata_fulltest.c delete mode 100644 go/arrow/cdata/cdata_test.go delete mode 100644 go/arrow/cdata/cdata_test_framework.go delete mode 100644 go/arrow/cdata/exports.go delete mode 100644 go/arrow/cdata/import_allocator.go delete mode 100644 go/arrow/cdata/interface.go delete mode 100644 go/arrow/cdata/test/test_cimport.go delete mode 100644 go/arrow/cdata/test/test_export_to_cgo.py delete mode 100644 go/arrow/cdata/trampoline.c delete mode 100644 go/arrow/cdata/utils.h delete mode 100644 go/arrow/compare.go delete mode 100644 go/arrow/compare_test.go delete mode 100644 go/arrow/compute/arithmetic.go delete mode 100644 go/arrow/compute/arithmetic_test.go delete mode 100644 go/arrow/compute/cast.go delete mode 100644 go/arrow/compute/cast_test.go delete mode 100644 go/arrow/compute/datum.go delete mode 100644 go/arrow/compute/datumkind_string.go delete mode 100644 go/arrow/compute/doc.go delete mode 100644 go/arrow/compute/example_test.go delete mode 100644 go/arrow/compute/exec.go delete mode 100644 go/arrow/compute/exec/hash_util.go delete mode 100644 go/arrow/compute/exec/kernel.go delete mode 100644 go/arrow/compute/exec/kernel_test.go delete mode 100644 go/arrow/compute/exec/span.go delete mode 100644 go/arrow/compute/exec/span_offsets.go delete mode 100644 go/arrow/compute/exec/span_test.go delete mode 100644 go/arrow/compute/exec/utils.go delete mode 100644 go/arrow/compute/exec/utils_test.go delete mode 100644 go/arrow/compute/exec_internals_test.go delete mode 100644 go/arrow/compute/exec_test.go delete mode 100644 go/arrow/compute/executor.go delete mode 100644 go/arrow/compute/expression.go delete mode 100644 go/arrow/compute/expression_test.go delete mode 100644 go/arrow/compute/exprs/builders.go delete mode 100644 go/arrow/compute/exprs/builders_test.go delete mode 100644 go/arrow/compute/exprs/exec.go delete mode 100644 go/arrow/compute/exprs/exec_internal_test.go delete mode 100644 go/arrow/compute/exprs/exec_test.go delete mode 100644 go/arrow/compute/exprs/extension_types.go delete mode 100644 go/arrow/compute/exprs/field_refs.go delete mode 100644 go/arrow/compute/exprs/types.go delete mode 100644 go/arrow/compute/fieldref.go delete mode 100644 go/arrow/compute/fieldref_hash.go delete mode 100644 go/arrow/compute/fieldref_test.go delete mode 100644 go/arrow/compute/funckind_string.go delete mode 100644 go/arrow/compute/functions.go delete mode 100644 go/arrow/compute/functions_test.go delete mode 100644 go/arrow/compute/internal/kernels/Makefile delete mode 100644 go/arrow/compute/internal/kernels/_lib/base_arithmetic.cc delete mode 100644 go/arrow/compute/internal/kernels/_lib/base_arithmetic_avx2_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/_lib/base_arithmetic_sse4_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/_lib/cast_numeric.cc delete mode 100644 go/arrow/compute/internal/kernels/_lib/cast_numeric_avx2_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/_lib/cast_numeric_neon.s delete mode 100644 go/arrow/compute/internal/kernels/_lib/cast_numeric_sse4_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/_lib/constant_factor.c delete mode 100644 go/arrow/compute/internal/kernels/_lib/constant_factor_avx2_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/_lib/constant_factor_sse4_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/_lib/scalar_comparison.cc delete mode 100644 go/arrow/compute/internal/kernels/_lib/scalar_comparison_avx2_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/_lib/scalar_comparison_sse4_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/_lib/types.h delete mode 100644 go/arrow/compute/internal/kernels/_lib/vendored/safe-math.h delete mode 100644 go/arrow/compute/internal/kernels/base_arithmetic.go delete mode 100644 go/arrow/compute/internal/kernels/base_arithmetic_amd64.go delete mode 100644 go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go delete mode 100644 go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go delete mode 100644 go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go delete mode 100644 go/arrow/compute/internal/kernels/boolean_cast.go delete mode 100644 go/arrow/compute/internal/kernels/cast.go delete mode 100644 go/arrow/compute/internal/kernels/cast_numeric.go delete mode 100644 go/arrow/compute/internal/kernels/cast_numeric_amd64.go delete mode 100644 go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go delete mode 100644 go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go delete mode 100644 go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s delete mode 100644 go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go delete mode 100644 go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/cast_temporal.go delete mode 100644 go/arrow/compute/internal/kernels/compareoperator_string.go delete mode 100644 go/arrow/compute/internal/kernels/constant_factor.go delete mode 100644 go/arrow/compute/internal/kernels/constant_factor_amd64.go delete mode 100644 go/arrow/compute/internal/kernels/constant_factor_avx2_amd64.go delete mode 100644 go/arrow/compute/internal/kernels/constant_factor_avx2_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/constant_factor_sse4_amd64.go delete mode 100644 go/arrow/compute/internal/kernels/constant_factor_sse4_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/doc.go delete mode 100644 go/arrow/compute/internal/kernels/helpers.go delete mode 100644 go/arrow/compute/internal/kernels/numeric_cast.go delete mode 100644 go/arrow/compute/internal/kernels/rounding.go delete mode 100644 go/arrow/compute/internal/kernels/roundmode_string.go delete mode 100644 go/arrow/compute/internal/kernels/scalar_arithmetic.go delete mode 100644 go/arrow/compute/internal/kernels/scalar_boolean.go delete mode 100644 go/arrow/compute/internal/kernels/scalar_comparison_amd64.go delete mode 100644 go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go delete mode 100644 go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/scalar_comparison_noasm.go delete mode 100644 go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go delete mode 100644 go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.s delete mode 100644 go/arrow/compute/internal/kernels/scalar_comparisons.go delete mode 100644 go/arrow/compute/internal/kernels/string_casts.go delete mode 100644 go/arrow/compute/internal/kernels/types.go delete mode 100644 go/arrow/compute/internal/kernels/vector_hash.go delete mode 100644 go/arrow/compute/internal/kernels/vector_run_end_encode.go delete mode 100644 go/arrow/compute/internal/kernels/vector_selection.go delete mode 100644 go/arrow/compute/registry.go delete mode 100644 go/arrow/compute/registry_test.go delete mode 100644 go/arrow/compute/scalar_bool.go delete mode 100644 go/arrow/compute/scalar_bool_test.go delete mode 100644 go/arrow/compute/scalar_compare.go delete mode 100644 go/arrow/compute/scalar_compare_test.go delete mode 100644 go/arrow/compute/selection.go delete mode 100644 go/arrow/compute/utils.go delete mode 100644 go/arrow/compute/vector_hash.go delete mode 100644 go/arrow/compute/vector_hash_test.go delete mode 100644 go/arrow/compute/vector_run_end_test.go delete mode 100644 go/arrow/compute/vector_run_ends.go delete mode 100644 go/arrow/compute/vector_selection_test.go delete mode 100644 go/arrow/csv/common.go delete mode 100644 go/arrow/csv/reader.go delete mode 100644 go/arrow/csv/reader_test.go delete mode 100644 go/arrow/csv/testdata/header.csv delete mode 100644 go/arrow/csv/testdata/simple.csv delete mode 100644 go/arrow/csv/testdata/types.csv delete mode 100644 go/arrow/csv/transformer.go delete mode 100644 go/arrow/csv/writer.go delete mode 100644 go/arrow/csv/writer_test.go delete mode 100644 go/arrow/datatype.go delete mode 100644 go/arrow/datatype_binary.go delete mode 100644 go/arrow/datatype_binary_test.go delete mode 100644 go/arrow/datatype_encoded.go delete mode 100644 go/arrow/datatype_extension.go delete mode 100644 go/arrow/datatype_extension_test.go delete mode 100644 go/arrow/datatype_fixedwidth.go delete mode 100644 go/arrow/datatype_fixedwidth_test.go delete mode 100644 go/arrow/datatype_nested.go delete mode 100644 go/arrow/datatype_nested_test.go delete mode 100644 go/arrow/datatype_null.go delete mode 100644 go/arrow/datatype_null_test.go delete mode 100644 go/arrow/datatype_numeric.gen.go delete mode 100644 go/arrow/datatype_numeric.gen.go.tmpl delete mode 100644 go/arrow/datatype_numeric.gen.go.tmpldata delete mode 100644 go/arrow/datatype_viewheader.go delete mode 100644 go/arrow/datatype_viewheader_inline.go delete mode 100644 go/arrow/datatype_viewheader_inline_go1.19.go delete mode 100644 go/arrow/datatype_viewheader_inline_tinygo.go delete mode 100644 go/arrow/decimal128/decimal128.go delete mode 100644 go/arrow/decimal128/decimal128_test.go delete mode 100644 go/arrow/decimal256/decimal256.go delete mode 100644 go/arrow/decimal256/decimal256_test.go delete mode 100644 go/arrow/doc.go delete mode 100644 go/arrow/encoded/ree_utils.go delete mode 100644 go/arrow/encoded/ree_utils_test.go delete mode 100644 go/arrow/endian/big.go delete mode 100644 go/arrow/endian/endian.go delete mode 100644 go/arrow/endian/little.go delete mode 100644 go/arrow/errors.go delete mode 100644 go/arrow/example_test.go delete mode 100644 go/arrow/extensions/bool8.go delete mode 100644 go/arrow/extensions/bool8_test.go delete mode 100644 go/arrow/extensions/doc.go delete mode 100644 go/arrow/extensions/extensions.go delete mode 100644 go/arrow/extensions/extensions_test.go delete mode 100644 go/arrow/extensions/json.go delete mode 100644 go/arrow/extensions/json_test.go delete mode 100644 go/arrow/extensions/opaque.go delete mode 100644 go/arrow/extensions/opaque_test.go delete mode 100644 go/arrow/extensions/uuid.go delete mode 100644 go/arrow/extensions/uuid_test.go delete mode 100755 go/arrow/flight/basic_auth_flight_test.go delete mode 100644 go/arrow/flight/client.go delete mode 100644 go/arrow/flight/client_auth.go delete mode 100644 go/arrow/flight/cookie_middleware.go delete mode 100644 go/arrow/flight/cookie_middleware_test.go delete mode 100644 go/arrow/flight/doc.go delete mode 100755 go/arrow/flight/example_flight_server_test.go delete mode 100755 go/arrow/flight/flight_middleware_test.go delete mode 100755 go/arrow/flight/flight_test.go delete mode 100644 go/arrow/flight/flightsql/client.go delete mode 100644 go/arrow/flight/flightsql/client_test.go delete mode 100644 go/arrow/flight/flightsql/column_metadata.go delete mode 100644 go/arrow/flight/flightsql/driver/README.md delete mode 100644 go/arrow/flight/flightsql/driver/config.go delete mode 100644 go/arrow/flight/flightsql/driver/config_test.go delete mode 100644 go/arrow/flight/flightsql/driver/driver.go delete mode 100644 go/arrow/flight/flightsql/driver/driver_test.go delete mode 100644 go/arrow/flight/flightsql/driver/errors.go delete mode 100644 go/arrow/flight/flightsql/driver/utils.go delete mode 100644 go/arrow/flight/flightsql/driver/utils_test.go delete mode 100644 go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go delete mode 100644 go/arrow/flight/flightsql/example/sql_batch_reader.go delete mode 100644 go/arrow/flight/flightsql/example/sqlite_info.go delete mode 100644 go/arrow/flight/flightsql/example/sqlite_server.go delete mode 100644 go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go delete mode 100644 go/arrow/flight/flightsql/example/type_info.go delete mode 100644 go/arrow/flight/flightsql/schema_ref/reference_schemas.go delete mode 100644 go/arrow/flight/flightsql/server.go delete mode 100644 go/arrow/flight/flightsql/server_test.go delete mode 100644 go/arrow/flight/flightsql/sql_info.go delete mode 100644 go/arrow/flight/flightsql/sqlite_server_test.go delete mode 100644 go/arrow/flight/flightsql/types.go delete mode 100644 go/arrow/flight/gen.go delete mode 100644 go/arrow/flight/gen/flight/Flight.pb.go delete mode 100644 go/arrow/flight/gen/flight/FlightSql.pb.go delete mode 100644 go/arrow/flight/gen/flight/Flight_grpc.pb.go delete mode 100644 go/arrow/flight/record_batch_reader.go delete mode 100644 go/arrow/flight/record_batch_writer.go delete mode 100644 go/arrow/flight/server.go delete mode 100644 go/arrow/flight/server_auth.go delete mode 100644 go/arrow/flight/server_example_test.go delete mode 100644 go/arrow/flight/session/cookies.go delete mode 100644 go/arrow/flight/session/example_session_test.go delete mode 100644 go/arrow/flight/session/session.go delete mode 100644 go/arrow/flight/session/stateful_session.go delete mode 100644 go/arrow/flight/session/stateless_session.go delete mode 100644 go/arrow/float16/float16.go delete mode 100644 go/arrow/float16/float16_test.go delete mode 100644 go/arrow/gen-flatbuffers.go delete mode 100644 go/arrow/internal/arrdata/arrdata.go delete mode 100644 go/arrow/internal/arrdata/ioutil.go delete mode 100644 go/arrow/internal/arrjson/arrjson.go delete mode 100644 go/arrow/internal/arrjson/arrjson_test.go delete mode 100644 go/arrow/internal/arrjson/option.go delete mode 100644 go/arrow/internal/arrjson/reader.go delete mode 100644 go/arrow/internal/arrjson/writer.go delete mode 100644 go/arrow/internal/cdata_integration/entrypoints.go delete mode 100644 go/arrow/internal/debug/assert_off.go delete mode 100644 go/arrow/internal/debug/assert_on.go delete mode 100644 go/arrow/internal/debug/doc.go delete mode 100644 go/arrow/internal/debug/log_off.go delete mode 100644 go/arrow/internal/debug/log_on.go delete mode 100644 go/arrow/internal/debug/util.go delete mode 100644 go/arrow/internal/dictutils/dict.go delete mode 100644 go/arrow/internal/dictutils/dict_test.go delete mode 100644 go/arrow/internal/flatbuf/Binary.go delete mode 100644 go/arrow/internal/flatbuf/BinaryView.go delete mode 100644 go/arrow/internal/flatbuf/Block.go delete mode 100644 go/arrow/internal/flatbuf/BodyCompression.go delete mode 100644 go/arrow/internal/flatbuf/BodyCompressionMethod.go delete mode 100644 go/arrow/internal/flatbuf/Bool.go delete mode 100644 go/arrow/internal/flatbuf/Buffer.go delete mode 100644 go/arrow/internal/flatbuf/CompressionType.go delete mode 100644 go/arrow/internal/flatbuf/Date.go delete mode 100644 go/arrow/internal/flatbuf/DateUnit.go delete mode 100644 go/arrow/internal/flatbuf/Decimal.go delete mode 100644 go/arrow/internal/flatbuf/DictionaryBatch.go delete mode 100644 go/arrow/internal/flatbuf/DictionaryEncoding.go delete mode 100644 go/arrow/internal/flatbuf/DictionaryKind.go delete mode 100644 go/arrow/internal/flatbuf/Duration.go delete mode 100644 go/arrow/internal/flatbuf/Endianness.go delete mode 100644 go/arrow/internal/flatbuf/Feature.go delete mode 100644 go/arrow/internal/flatbuf/Field.go delete mode 100644 go/arrow/internal/flatbuf/FieldNode.go delete mode 100644 go/arrow/internal/flatbuf/FixedSizeBinary.go delete mode 100644 go/arrow/internal/flatbuf/FixedSizeList.go delete mode 100644 go/arrow/internal/flatbuf/FloatingPoint.go delete mode 100644 go/arrow/internal/flatbuf/Footer.go delete mode 100644 go/arrow/internal/flatbuf/Int.go delete mode 100644 go/arrow/internal/flatbuf/Interval.go delete mode 100644 go/arrow/internal/flatbuf/IntervalUnit.go delete mode 100644 go/arrow/internal/flatbuf/KeyValue.go delete mode 100644 go/arrow/internal/flatbuf/LargeBinary.go delete mode 100644 go/arrow/internal/flatbuf/LargeList.go delete mode 100644 go/arrow/internal/flatbuf/LargeListView.go delete mode 100644 go/arrow/internal/flatbuf/LargeUtf8.go delete mode 100644 go/arrow/internal/flatbuf/List.go delete mode 100644 go/arrow/internal/flatbuf/ListView.go delete mode 100644 go/arrow/internal/flatbuf/Map.go delete mode 100644 go/arrow/internal/flatbuf/Message.go delete mode 100644 go/arrow/internal/flatbuf/MessageHeader.go delete mode 100644 go/arrow/internal/flatbuf/MetadataVersion.go delete mode 100644 go/arrow/internal/flatbuf/Null.go delete mode 100644 go/arrow/internal/flatbuf/Precision.go delete mode 100644 go/arrow/internal/flatbuf/RecordBatch.go delete mode 100644 go/arrow/internal/flatbuf/RunEndEncoded.go delete mode 100644 go/arrow/internal/flatbuf/RunLengthEncoded.go delete mode 100644 go/arrow/internal/flatbuf/Schema.go delete mode 100644 go/arrow/internal/flatbuf/SparseMatrixCompressedAxis.go delete mode 100644 go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go delete mode 100644 go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go delete mode 100644 go/arrow/internal/flatbuf/SparseTensor.go delete mode 100644 go/arrow/internal/flatbuf/SparseTensorIndex.go delete mode 100644 go/arrow/internal/flatbuf/SparseTensorIndexCOO.go delete mode 100644 go/arrow/internal/flatbuf/SparseTensorIndexCSF.go delete mode 100644 go/arrow/internal/flatbuf/Struct_.go delete mode 100644 go/arrow/internal/flatbuf/Tensor.go delete mode 100644 go/arrow/internal/flatbuf/TensorDim.go delete mode 100644 go/arrow/internal/flatbuf/Time.go delete mode 100644 go/arrow/internal/flatbuf/TimeUnit.go delete mode 100644 go/arrow/internal/flatbuf/Timestamp.go delete mode 100644 go/arrow/internal/flatbuf/Type.go delete mode 100644 go/arrow/internal/flatbuf/Union.go delete mode 100644 go/arrow/internal/flatbuf/UnionMode.go delete mode 100644 go/arrow/internal/flatbuf/Utf8.go delete mode 100644 go/arrow/internal/flatbuf/Utf8View.go delete mode 100755 go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go delete mode 100644 go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go delete mode 100644 go/arrow/internal/flight_integration/middleware.go delete mode 100644 go/arrow/internal/flight_integration/scenario.go delete mode 100644 go/arrow/internal/testing/gen/random_array_gen.go delete mode 100644 go/arrow/internal/testing/tools/bits.go delete mode 100644 go/arrow/internal/testing/tools/bits_test.go delete mode 100644 go/arrow/internal/testing/tools/bool.go delete mode 100644 go/arrow/internal/testing/tools/data_types.go delete mode 100644 go/arrow/internal/utils.go delete mode 100644 go/arrow/ipc/cmd/arrow-cat/main.go delete mode 100644 go/arrow/ipc/cmd/arrow-cat/main_test.go delete mode 100644 go/arrow/ipc/cmd/arrow-file-to-stream/main.go delete mode 100644 go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go delete mode 100644 go/arrow/ipc/cmd/arrow-json-integration-test/main.go delete mode 100644 go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go delete mode 100644 go/arrow/ipc/cmd/arrow-ls/main.go delete mode 100644 go/arrow/ipc/cmd/arrow-ls/main_test.go delete mode 100644 go/arrow/ipc/cmd/arrow-stream-to-file/main.go delete mode 100644 go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go delete mode 100644 go/arrow/ipc/compression.go delete mode 100644 go/arrow/ipc/endian_swap.go delete mode 100644 go/arrow/ipc/endian_swap_test.go delete mode 100644 go/arrow/ipc/file_reader.go delete mode 100644 go/arrow/ipc/file_test.go delete mode 100644 go/arrow/ipc/file_writer.go delete mode 100644 go/arrow/ipc/ipc.go delete mode 100644 go/arrow/ipc/ipc_test.go delete mode 100644 go/arrow/ipc/message.go delete mode 100644 go/arrow/ipc/message_test.go delete mode 100644 go/arrow/ipc/metadata.go delete mode 100644 go/arrow/ipc/metadata_test.go delete mode 100644 go/arrow/ipc/reader.go delete mode 100644 go/arrow/ipc/reader_test.go delete mode 100644 go/arrow/ipc/stream_test.go delete mode 100644 go/arrow/ipc/writer.go delete mode 100644 go/arrow/ipc/writer_test.go delete mode 100644 go/arrow/math/Makefile delete mode 100644 go/arrow/math/_lib/.gitignore delete mode 100644 go/arrow/math/_lib/CMakeLists.txt delete mode 100644 go/arrow/math/_lib/arch.h delete mode 100644 go/arrow/math/_lib/float64.c delete mode 100644 go/arrow/math/_lib/float64_avx2.s delete mode 100644 go/arrow/math/_lib/float64_neon.s delete mode 100644 go/arrow/math/_lib/float64_sse4.s delete mode 100644 go/arrow/math/_lib/int64.c delete mode 100644 go/arrow/math/_lib/int64_avx2.s delete mode 100644 go/arrow/math/_lib/int64_neon.s delete mode 100644 go/arrow/math/_lib/int64_sse4.s delete mode 100644 go/arrow/math/_lib/uint64.c delete mode 100644 go/arrow/math/_lib/uint64_avx2.s delete mode 100644 go/arrow/math/_lib/uint64_neon.s delete mode 100644 go/arrow/math/_lib/uint64_sse4.s delete mode 100644 go/arrow/math/doc.go delete mode 100644 go/arrow/math/float64.go delete mode 100644 go/arrow/math/float64.tmpldata delete mode 100644 go/arrow/math/float64_amd64.go delete mode 100644 go/arrow/math/float64_arm64.go delete mode 100644 go/arrow/math/float64_avx2_amd64.go delete mode 100644 go/arrow/math/float64_avx2_amd64.s delete mode 100755 go/arrow/math/float64_neon_arm64.go delete mode 100755 go/arrow/math/float64_neon_arm64.s delete mode 100644 go/arrow/math/float64_noasm.go delete mode 100644 go/arrow/math/float64_ppc64le.go delete mode 100644 go/arrow/math/float64_s390x.go delete mode 100644 go/arrow/math/float64_sse4_amd64.go delete mode 100644 go/arrow/math/float64_sse4_amd64.s delete mode 100644 go/arrow/math/float64_test.go delete mode 100644 go/arrow/math/int64.go delete mode 100644 go/arrow/math/int64.tmpldata delete mode 100644 go/arrow/math/int64_amd64.go delete mode 100644 go/arrow/math/int64_arm64.go delete mode 100644 go/arrow/math/int64_avx2_amd64.go delete mode 100644 go/arrow/math/int64_avx2_amd64.s delete mode 100755 go/arrow/math/int64_neon_arm64.go delete mode 100755 go/arrow/math/int64_neon_arm64.s delete mode 100644 go/arrow/math/int64_noasm.go delete mode 100644 go/arrow/math/int64_ppc64le.go delete mode 100644 go/arrow/math/int64_s390x.go delete mode 100644 go/arrow/math/int64_sse4_amd64.go delete mode 100644 go/arrow/math/int64_sse4_amd64.s delete mode 100644 go/arrow/math/int64_test.go delete mode 100644 go/arrow/math/math_amd64.go delete mode 100644 go/arrow/math/math_arm64.go delete mode 100644 go/arrow/math/math_noasm.go delete mode 100644 go/arrow/math/math_ppc64le.go delete mode 100644 go/arrow/math/math_s390x.go delete mode 100644 go/arrow/math/type.go.tmpl delete mode 100644 go/arrow/math/type_amd64.go.tmpl delete mode 100755 go/arrow/math/type_arm64.go.tmpl delete mode 100644 go/arrow/math/type_noasm.go.tmpl delete mode 100644 go/arrow/math/type_ppc64le.go.tmpl delete mode 100644 go/arrow/math/type_s390x.go.tmpl delete mode 100644 go/arrow/math/type_simd_amd64.go.tmpl delete mode 100755 go/arrow/math/type_simd_arm64.go.tmpl delete mode 100644 go/arrow/math/type_test.go.tmpl delete mode 100644 go/arrow/math/uint64.go delete mode 100644 go/arrow/math/uint64.tmpldata delete mode 100644 go/arrow/math/uint64_amd64.go delete mode 100644 go/arrow/math/uint64_arm64.go delete mode 100644 go/arrow/math/uint64_avx2_amd64.go delete mode 100644 go/arrow/math/uint64_avx2_amd64.s delete mode 100755 go/arrow/math/uint64_neon_arm64.go delete mode 100755 go/arrow/math/uint64_neon_arm64.s delete mode 100644 go/arrow/math/uint64_noasm.go delete mode 100644 go/arrow/math/uint64_ppc64le.go delete mode 100644 go/arrow/math/uint64_s390x.go delete mode 100644 go/arrow/math/uint64_sse4_amd64.go delete mode 100644 go/arrow/math/uint64_sse4_amd64.s delete mode 100644 go/arrow/math/uint64_test.go delete mode 100644 go/arrow/memory/Makefile delete mode 100644 go/arrow/memory/_lib/.gitignore delete mode 100644 go/arrow/memory/_lib/CMakeLists.txt delete mode 100644 go/arrow/memory/_lib/arch.h delete mode 100644 go/arrow/memory/_lib/memory.c delete mode 100644 go/arrow/memory/_lib/memory_avx2.s delete mode 100644 go/arrow/memory/_lib/memory_neon.s delete mode 100644 go/arrow/memory/_lib/memory_sse4.s delete mode 100644 go/arrow/memory/allocator.go delete mode 100644 go/arrow/memory/buffer.go delete mode 100644 go/arrow/memory/buffer_test.go delete mode 100644 go/arrow/memory/cgo_allocator.go delete mode 100644 go/arrow/memory/cgo_allocator_defaults.go delete mode 100644 go/arrow/memory/cgo_allocator_logging.go delete mode 100644 go/arrow/memory/cgo_allocator_test.go delete mode 100644 go/arrow/memory/checked_allocator.go delete mode 100644 go/arrow/memory/default_allocator.go delete mode 100644 go/arrow/memory/default_mallocator.go delete mode 100644 go/arrow/memory/default_mallocator_test.go delete mode 100644 go/arrow/memory/doc.go delete mode 100644 go/arrow/memory/go_allocator.go delete mode 100644 go/arrow/memory/go_allocator_test.go delete mode 100644 go/arrow/memory/internal/cgoalloc/allocator.cc delete mode 100644 go/arrow/memory/internal/cgoalloc/allocator.go delete mode 100644 go/arrow/memory/internal/cgoalloc/allocator.h delete mode 100644 go/arrow/memory/internal/cgoalloc/helpers.h delete mode 100644 go/arrow/memory/mallocator/doc.go delete mode 100644 go/arrow/memory/mallocator/mallocator.go delete mode 100644 go/arrow/memory/mallocator/mallocator_test.go delete mode 100644 go/arrow/memory/mallocator/mallocator_util.go delete mode 100644 go/arrow/memory/memory.go delete mode 100644 go/arrow/memory/memory_amd64.go delete mode 100755 go/arrow/memory/memory_arm64.go delete mode 100644 go/arrow/memory/memory_avx2_amd64.go delete mode 100644 go/arrow/memory/memory_avx2_amd64.s delete mode 100644 go/arrow/memory/memory_js_wasm.go delete mode 100755 go/arrow/memory/memory_neon_arm64.go delete mode 100755 go/arrow/memory/memory_neon_arm64.s delete mode 100644 go/arrow/memory/memory_noasm.go delete mode 100644 go/arrow/memory/memory_sse4_amd64.go delete mode 100644 go/arrow/memory/memory_sse4_amd64.s delete mode 100644 go/arrow/memory/memory_test.go delete mode 100644 go/arrow/memory/util.go delete mode 100644 go/arrow/memory/util_test.go delete mode 100644 go/arrow/numeric.schema.json delete mode 100644 go/arrow/numeric.tmpldata delete mode 100644 go/arrow/record.go delete mode 100644 go/arrow/scalar/append.go delete mode 100644 go/arrow/scalar/append_test.go delete mode 100644 go/arrow/scalar/binary.go delete mode 100644 go/arrow/scalar/compare.go delete mode 100644 go/arrow/scalar/nested.go delete mode 100644 go/arrow/scalar/numeric.gen.go delete mode 100644 go/arrow/scalar/numeric.gen.go.tmpl delete mode 100644 go/arrow/scalar/numeric.gen.go.tmpldata delete mode 100644 go/arrow/scalar/numeric.gen_test.go delete mode 100644 go/arrow/scalar/numeric.gen_test.go.tmpl delete mode 100644 go/arrow/scalar/parse.go delete mode 100644 go/arrow/scalar/scalar.go delete mode 100644 go/arrow/scalar/scalar_test.go delete mode 100644 go/arrow/scalar/temporal.go delete mode 100644 go/arrow/schema.go delete mode 100644 go/arrow/schema_test.go delete mode 100644 go/arrow/table.go delete mode 100644 go/arrow/tensor/numeric.gen.go delete mode 100644 go/arrow/tensor/numeric.gen.go.tmpl delete mode 100644 go/arrow/tensor/numeric.gen_test.go delete mode 100644 go/arrow/tensor/numeric.gen_test.go.tmpl delete mode 100644 go/arrow/tensor/tensor.go delete mode 100644 go/arrow/tensor/tensor_test.go delete mode 100644 go/arrow/tools.go delete mode 100644 go/arrow/type_string.go delete mode 100644 go/arrow/type_traits.go delete mode 100644 go/arrow/type_traits_boolean.go delete mode 100644 go/arrow/type_traits_decimal128.go delete mode 100644 go/arrow/type_traits_decimal256.go delete mode 100644 go/arrow/type_traits_float16.go delete mode 100644 go/arrow/type_traits_interval.go delete mode 100644 go/arrow/type_traits_numeric.gen.go delete mode 100644 go/arrow/type_traits_numeric.gen.go.tmpl delete mode 100644 go/arrow/type_traits_numeric.gen_test.go delete mode 100644 go/arrow/type_traits_numeric.gen_test.go.tmpl delete mode 100644 go/arrow/type_traits_test.go delete mode 100644 go/arrow/type_traits_timestamp.go delete mode 100644 go/arrow/type_traits_view.go delete mode 100644 go/arrow/unionmode_string.go delete mode 100644 go/arrow/util/byte_size.go delete mode 100644 go/arrow/util/byte_size_test.go delete mode 100644 go/arrow/util/messages/README.md delete mode 100644 go/arrow/util/messages/types.proto delete mode 100644 go/arrow/util/protobuf_reflect.go delete mode 100644 go/arrow/util/protobuf_reflect_test.go delete mode 100644 go/arrow/util/util_message/types.pb.go delete mode 100644 go/go.mod delete mode 100644 go/go.sum delete mode 100644 go/internal/bitutils/bit_block_counter.go delete mode 100644 go/internal/bitutils/bit_block_counter_test.go delete mode 100644 go/internal/bitutils/bit_run_reader.go delete mode 100644 go/internal/bitutils/bit_run_reader_test.go delete mode 100644 go/internal/bitutils/bit_set_run_reader.go delete mode 100644 go/internal/bitutils/bit_set_run_reader_test.go delete mode 100644 go/internal/bitutils/bitmap_generate.go delete mode 100644 go/internal/bitutils/bitmap_generate_test.go delete mode 100644 go/internal/hashing/hash_funcs.go delete mode 100644 go/internal/hashing/hash_string.go delete mode 100644 go/internal/hashing/hashing_test.go delete mode 100644 go/internal/hashing/types.tmpldata delete mode 100644 go/internal/hashing/xxh3_memo_table.gen.go delete mode 100644 go/internal/hashing/xxh3_memo_table.gen.go.tmpl delete mode 100644 go/internal/hashing/xxh3_memo_table.go delete mode 100644 go/internal/json/json.go delete mode 100644 go/internal/json/json_tinygo.go delete mode 100644 go/internal/types/extension_types.go delete mode 100644 go/internal/utils/Makefile delete mode 100644 go/internal/utils/_lib/arch.h delete mode 100644 go/internal/utils/_lib/min_max.c delete mode 100644 go/internal/utils/_lib/min_max_avx2_amd64.s delete mode 100644 go/internal/utils/_lib/min_max_neon.s delete mode 100644 go/internal/utils/_lib/min_max_sse4_amd64.s delete mode 100644 go/internal/utils/_lib/transpose_ints.c delete mode 100644 go/internal/utils/_lib/transpose_ints_avx2_amd64.s delete mode 100644 go/internal/utils/_lib/transpose_ints_sse4_amd64.s delete mode 100644 go/internal/utils/buf_reader.go delete mode 100644 go/internal/utils/endians_default.go delete mode 100644 go/internal/utils/endians_s390x.go delete mode 100644 go/internal/utils/math.go delete mode 100644 go/internal/utils/min_max.go delete mode 100644 go/internal/utils/min_max_amd64.go delete mode 100644 go/internal/utils/min_max_arm64.go delete mode 100644 go/internal/utils/min_max_avx2_amd64.go delete mode 100644 go/internal/utils/min_max_avx2_amd64.s delete mode 100755 go/internal/utils/min_max_neon_arm64.go delete mode 100755 go/internal/utils/min_max_neon_arm64.s delete mode 100644 go/internal/utils/min_max_noasm.go delete mode 100644 go/internal/utils/min_max_ppc64le.go delete mode 100644 go/internal/utils/min_max_s390x.go delete mode 100644 go/internal/utils/min_max_sse4_amd64.go delete mode 100644 go/internal/utils/min_max_sse4_amd64.s delete mode 100644 go/internal/utils/recovery.go delete mode 100644 go/internal/utils/recovery_test.go delete mode 100644 go/internal/utils/ref_count.go delete mode 100644 go/internal/utils/transpose_ints.go delete mode 100644 go/internal/utils/transpose_ints.go.tmpl delete mode 100644 go/internal/utils/transpose_ints.tmpldata delete mode 100644 go/internal/utils/transpose_ints_amd64.go delete mode 100644 go/internal/utils/transpose_ints_amd64.go.tmpl delete mode 100644 go/internal/utils/transpose_ints_arm64.go delete mode 100644 go/internal/utils/transpose_ints_avx2_amd64.go delete mode 100644 go/internal/utils/transpose_ints_avx2_amd64.s delete mode 100644 go/internal/utils/transpose_ints_def.go delete mode 100644 go/internal/utils/transpose_ints_noasm.go delete mode 100644 go/internal/utils/transpose_ints_noasm.go.tmpl delete mode 100644 go/internal/utils/transpose_ints_ppc64le.go delete mode 100644 go/internal/utils/transpose_ints_s390x.go delete mode 100644 go/internal/utils/transpose_ints_s390x.go.tmpl delete mode 100644 go/internal/utils/transpose_ints_simd.go.tmpl delete mode 100644 go/internal/utils/transpose_ints_sse4_amd64.go delete mode 100644 go/internal/utils/transpose_ints_sse4_amd64.s delete mode 100644 go/internal/utils/transpose_ints_test.go delete mode 100644 go/parquet/.gitignore delete mode 100644 go/parquet/cmd/parquet_reader/README.md delete mode 100644 go/parquet/cmd/parquet_reader/dumper.go delete mode 100644 go/parquet/cmd/parquet_reader/main.go delete mode 100644 go/parquet/cmd/parquet_reader/v0.7.1.parquet delete mode 100644 go/parquet/cmd/parquet_schema/main.go delete mode 100644 go/parquet/compress/brotli.go delete mode 100644 go/parquet/compress/compress.go delete mode 100644 go/parquet/compress/compress_test.go delete mode 100644 go/parquet/compress/gzip.go delete mode 100644 go/parquet/compress/lz4_raw.go delete mode 100644 go/parquet/compress/snappy.go delete mode 100644 go/parquet/compress/zstd.go delete mode 100644 go/parquet/doc.go delete mode 100644 go/parquet/encryption_properties.go delete mode 100644 go/parquet/encryption_properties_test.go delete mode 100644 go/parquet/encryption_read_config_test.go delete mode 100644 go/parquet/encryption_write_config_test.go delete mode 100644 go/parquet/file/column_reader.go delete mode 100755 go/parquet/file/column_reader_test.go delete mode 100644 go/parquet/file/column_reader_types.gen.go delete mode 100644 go/parquet/file/column_reader_types.gen.go.tmpl delete mode 100755 go/parquet/file/column_writer.go delete mode 100755 go/parquet/file/column_writer_test.go delete mode 100644 go/parquet/file/column_writer_types.gen.go delete mode 100644 go/parquet/file/column_writer_types.gen.go.tmpl delete mode 100644 go/parquet/file/file_reader.go delete mode 100644 go/parquet/file/file_reader_mmap.go delete mode 100644 go/parquet/file/file_reader_mmap_windows.go delete mode 100644 go/parquet/file/file_reader_test.go delete mode 100644 go/parquet/file/file_writer.go delete mode 100644 go/parquet/file/file_writer_test.go delete mode 100755 go/parquet/file/level_conversion.go delete mode 100644 go/parquet/file/level_conversion_test.go delete mode 100644 go/parquet/file/page_reader.go delete mode 100644 go/parquet/file/page_writer.go delete mode 100755 go/parquet/file/record_reader.go delete mode 100644 go/parquet/file/row_group_reader.go delete mode 100644 go/parquet/file/row_group_writer.go delete mode 100644 go/parquet/file/row_group_writer_test.go delete mode 100644 go/parquet/internal/bmi/Makefile delete mode 100755 go/parquet/internal/bmi/_lib/arch.h delete mode 100644 go/parquet/internal/bmi/_lib/bitmap_bmi2.c delete mode 100644 go/parquet/internal/bmi/_lib/bitmap_bmi2.s delete mode 100644 go/parquet/internal/bmi/_lib/bitmap_neon.s delete mode 100644 go/parquet/internal/bmi/bitmap_bmi2_386.go delete mode 100644 go/parquet/internal/bmi/bitmap_bmi2_amd64.go delete mode 100644 go/parquet/internal/bmi/bitmap_bmi2_amd64.s delete mode 100644 go/parquet/internal/bmi/bitmap_bmi2_noasm.go delete mode 100644 go/parquet/internal/bmi/bitmap_bmi2_ppc64le.go delete mode 100644 go/parquet/internal/bmi/bitmap_bmi2_s390x.go delete mode 100755 go/parquet/internal/bmi/bitmap_neon_arm64.go delete mode 100755 go/parquet/internal/bmi/bitmap_neon_arm64.s delete mode 100644 go/parquet/internal/bmi/bmi.go delete mode 100644 go/parquet/internal/bmi/bmi_amd64.go delete mode 100755 go/parquet/internal/bmi/bmi_arm64.go delete mode 100644 go/parquet/internal/bmi/bmi_test.go delete mode 100644 go/parquet/internal/debug/assert_off.go delete mode 100644 go/parquet/internal/debug/assert_on.go delete mode 100644 go/parquet/internal/debug/doc.go delete mode 100644 go/parquet/internal/debug/log_off.go delete mode 100644 go/parquet/internal/debug/log_on.go delete mode 100644 go/parquet/internal/encoding/boolean_decoder.go delete mode 100644 go/parquet/internal/encoding/boolean_encoder.go delete mode 100644 go/parquet/internal/encoding/byte_array_decoder.go delete mode 100644 go/parquet/internal/encoding/byte_array_encoder.go delete mode 100644 go/parquet/internal/encoding/byte_stream_split.go delete mode 100644 go/parquet/internal/encoding/decoder.go delete mode 100644 go/parquet/internal/encoding/delta_bit_packing.go delete mode 100644 go/parquet/internal/encoding/delta_byte_array.go delete mode 100644 go/parquet/internal/encoding/delta_byte_array_test.go delete mode 100644 go/parquet/internal/encoding/delta_length_byte_array.go delete mode 100644 go/parquet/internal/encoding/encoder.go delete mode 100644 go/parquet/internal/encoding/encoding_benchmarks_test.go delete mode 100644 go/parquet/internal/encoding/encoding_test.go delete mode 100644 go/parquet/internal/encoding/fixed_len_byte_array_decoder.go delete mode 100644 go/parquet/internal/encoding/fixed_len_byte_array_encoder.go delete mode 100644 go/parquet/internal/encoding/levels.go delete mode 100644 go/parquet/internal/encoding/levels_test.go delete mode 100644 go/parquet/internal/encoding/memo_table.go delete mode 100644 go/parquet/internal/encoding/memo_table_test.go delete mode 100644 go/parquet/internal/encoding/memo_table_types.gen.go delete mode 100644 go/parquet/internal/encoding/memo_table_types.gen.go.tmpl delete mode 100644 go/parquet/internal/encoding/physical_types.tmpldata delete mode 100644 go/parquet/internal/encoding/plain_encoder_types.gen.go delete mode 100644 go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl delete mode 100644 go/parquet/internal/encoding/typed_encoder.gen.go delete mode 100644 go/parquet/internal/encoding/typed_encoder.gen.go.tmpl delete mode 100644 go/parquet/internal/encoding/types.go delete mode 100644 go/parquet/internal/encryption/aes.go delete mode 100644 go/parquet/internal/encryption/decryptor.go delete mode 100644 go/parquet/internal/encryption/encryptor.go delete mode 100644 go/parquet/internal/encryption/key_handling.go delete mode 100644 go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go delete mode 100644 go/parquet/internal/gen-go/parquet/parquet-consts.go delete mode 100644 go/parquet/internal/gen-go/parquet/parquet.go delete mode 100644 go/parquet/internal/gen-go/parquet/staticcheck.conf delete mode 100644 go/parquet/internal/testutils/pagebuilder.go delete mode 100644 go/parquet/internal/testutils/primitive_typed.go delete mode 100644 go/parquet/internal/testutils/random.go delete mode 100644 go/parquet/internal/testutils/random_arrow.go delete mode 100644 go/parquet/internal/testutils/utils.go delete mode 100644 go/parquet/internal/thrift/helpers.go delete mode 100644 go/parquet/internal/utils/Makefile delete mode 100644 go/parquet/internal/utils/_lib/README.md delete mode 100644 go/parquet/internal/utils/_lib/arch.h delete mode 100644 go/parquet/internal/utils/_lib/bit_packing_avx2.c delete mode 100644 go/parquet/internal/utils/_lib/bit_packing_avx2.s delete mode 100755 go/parquet/internal/utils/_lib/bit_packing_neon.c delete mode 100644 go/parquet/internal/utils/_lib/script.sed delete mode 100644 go/parquet/internal/utils/_lib/unpack_bool.c delete mode 100644 go/parquet/internal/utils/_lib/unpack_bool_avx2.s delete mode 100644 go/parquet/internal/utils/_lib/unpack_bool_neon.s delete mode 100644 go/parquet/internal/utils/_lib/unpack_bool_sse4.s delete mode 100644 go/parquet/internal/utils/bit_benchmark_test.go delete mode 100644 go/parquet/internal/utils/bit_packing_amd64.go delete mode 100644 go/parquet/internal/utils/bit_packing_arm64.go delete mode 100644 go/parquet/internal/utils/bit_packing_avx2_amd64.go delete mode 100644 go/parquet/internal/utils/bit_packing_avx2_amd64.s delete mode 100644 go/parquet/internal/utils/bit_packing_default.go delete mode 100755 go/parquet/internal/utils/bit_packing_neon_arm64.go delete mode 100644 go/parquet/internal/utils/bit_packing_neon_arm64.s delete mode 100644 go/parquet/internal/utils/bit_reader.go delete mode 100644 go/parquet/internal/utils/bit_reader_test.go delete mode 100644 go/parquet/internal/utils/bit_writer.go delete mode 100644 go/parquet/internal/utils/bitmap_writer.go delete mode 100644 go/parquet/internal/utils/bitmap_writer_test.go delete mode 100644 go/parquet/internal/utils/clib_amd64.s delete mode 100644 go/parquet/internal/utils/dictionary.go delete mode 100644 go/parquet/internal/utils/physical_types.tmpldata delete mode 100644 go/parquet/internal/utils/rle.go delete mode 100644 go/parquet/internal/utils/typed_rle_dict.gen.go delete mode 100644 go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl delete mode 100644 go/parquet/internal/utils/unpack_bool.go delete mode 100644 go/parquet/internal/utils/unpack_bool_amd64.go delete mode 100644 go/parquet/internal/utils/unpack_bool_arm64.go delete mode 100644 go/parquet/internal/utils/unpack_bool_avx2_amd64.go delete mode 100644 go/parquet/internal/utils/unpack_bool_avx2_amd64.s delete mode 100644 go/parquet/internal/utils/unpack_bool_default.go delete mode 100755 go/parquet/internal/utils/unpack_bool_neon_arm64.go delete mode 100755 go/parquet/internal/utils/unpack_bool_neon_arm64.s delete mode 100644 go/parquet/internal/utils/unpack_bool_noasm.go delete mode 100644 go/parquet/internal/utils/unpack_bool_sse4_amd64.go delete mode 100644 go/parquet/internal/utils/unpack_bool_sse4_amd64.s delete mode 100644 go/parquet/internal/utils/write_utils.go delete mode 100644 go/parquet/metadata/app_version.go delete mode 100644 go/parquet/metadata/column_chunk.go delete mode 100644 go/parquet/metadata/file.go delete mode 100644 go/parquet/metadata/metadata_test.go delete mode 100644 go/parquet/metadata/row_group.go delete mode 100644 go/parquet/metadata/stat_compare_test.go delete mode 100644 go/parquet/metadata/statistics.go delete mode 100644 go/parquet/metadata/statistics_test.go delete mode 100644 go/parquet/metadata/statistics_types.gen.go delete mode 100644 go/parquet/metadata/statistics_types.gen.go.tmpl delete mode 100644 go/parquet/metadata/statistics_types.tmpldata delete mode 100644 go/parquet/pqarrow/column_readers.go delete mode 100644 go/parquet/pqarrow/doc.go delete mode 100644 go/parquet/pqarrow/encode_arrow.go delete mode 100644 go/parquet/pqarrow/encode_arrow_test.go delete mode 100644 go/parquet/pqarrow/encode_dict_compute.go delete mode 100644 go/parquet/pqarrow/encode_dict_nocompute.go delete mode 100644 go/parquet/pqarrow/encode_dictionary_test.go delete mode 100755 go/parquet/pqarrow/file_reader.go delete mode 100644 go/parquet/pqarrow/file_reader_test.go delete mode 100644 go/parquet/pqarrow/file_writer.go delete mode 100644 go/parquet/pqarrow/file_writer_test.go delete mode 100644 go/parquet/pqarrow/helpers.go delete mode 100644 go/parquet/pqarrow/path_builder.go delete mode 100644 go/parquet/pqarrow/path_builder_test.go delete mode 100755 go/parquet/pqarrow/properties.go delete mode 100644 go/parquet/pqarrow/reader_writer_test.go delete mode 100644 go/parquet/pqarrow/schema.go delete mode 100644 go/parquet/pqarrow/schema_test.go delete mode 100644 go/parquet/reader_properties.go delete mode 100644 go/parquet/reader_writer_properties_test.go delete mode 100644 go/parquet/schema/column.go delete mode 100644 go/parquet/schema/converted_types.go delete mode 100644 go/parquet/schema/converted_types_test.go delete mode 100644 go/parquet/schema/helpers.go delete mode 100644 go/parquet/schema/helpers_test.go delete mode 100644 go/parquet/schema/logical_types.go delete mode 100644 go/parquet/schema/logical_types_test.go delete mode 100644 go/parquet/schema/node.go delete mode 100644 go/parquet/schema/reflection.go delete mode 100644 go/parquet/schema/reflection_test.go delete mode 100644 go/parquet/schema/schema.go delete mode 100644 go/parquet/schema/schema_element_test.go delete mode 100644 go/parquet/schema/schema_flatten_test.go delete mode 100644 go/parquet/schema/schema_test.go delete mode 100644 go/parquet/tools.go delete mode 100644 go/parquet/types.go delete mode 100644 go/parquet/version_string.go delete mode 100644 go/parquet/writer_properties.go diff --git a/go/LICENSE.txt b/go/LICENSE.txt deleted file mode 100644 index 57310329835da..0000000000000 --- a/go/LICENSE.txt +++ /dev/null @@ -1,1791 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- - -src/arrow/util (some portions): Apache 2.0, and 3-clause BSD - -Some portions of this module are derived from code in the Chromium project, -copyright (c) Google inc and (c) The Chromium Authors and licensed under the -Apache 2.0 License or the under the 3-clause BSD license: - - Copyright (c) 2013 The Chromium Authors. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - * Neither the name of Google Inc. nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -This project includes code from Daniel Lemire's FrameOfReference project. - -https://github.com/lemire/FrameOfReference/blob/6ccaf9e97160f9a3b299e23a8ef739e711ef0c71/src/bpacking.cpp - -Copyright: 2013 Daniel Lemire -Home page: http://lemire.me/en/ -Project page: https://github.com/lemire/FrameOfReference -License: Apache License Version 2.0 http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This project includes code from the TensorFlow project - -Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -This project includes code from the NumPy project. - -https://github.com/numpy/numpy/blob/e1f191c46f2eebd6cb892a4bfe14d9dd43a06c4e/numpy/core/src/multiarray/multiarraymodule.c#L2910 - -https://github.com/numpy/numpy/blob/68fd82271b9ea5a9e50d4e761061dfcca851382a/numpy/core/src/multiarray/datetime.c - -Copyright (c) 2005-2017, NumPy Developers. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the NumPy Developers nor the names of any - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -This project includes code from the Boost project - -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - -This project includes code from the FlatBuffers project - -Copyright 2014 Google Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -This project includes code from the tslib project - -Copyright 2015 Microsoft Corporation. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -This project includes code from the jemalloc project - -https://github.com/jemalloc/jemalloc - -Copyright (C) 2002-2017 Jason Evans . -All rights reserved. -Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2017 Facebook, Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice(s), - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice(s), - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --------------------------------------------------------------------------------- - -This project includes code from the Go project, BSD 3-clause license + PATENTS -weak patent termination clause -(https://github.com/golang/go/blob/master/PATENTS). - -Copyright (c) 2009 The Go Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -This project includes code from the hs2client - -https://github.com/cloudera/hs2client - -Copyright 2016 Cloudera Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -The script ci/scripts/util_wait_for_it.sh has the following license - -Copyright (c) 2016 Giles Hall - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - --------------------------------------------------------------------------------- - -The script r/configure has the following license (MIT) - -Copyright (c) 2017, Jeroen Ooms and Jim Hester - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - --------------------------------------------------------------------------------- - -cpp/src/arrow/util/logging.cc, cpp/src/arrow/util/logging.h and -cpp/src/arrow/util/logging-test.cc are adapted from -Ray Project (https://github.com/ray-project/ray) (Apache 2.0). - -Copyright (c) 2016 Ray Project (https://github.com/ray-project/ray) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- -The files cpp/src/arrow/vendored/datetime/date.h, cpp/src/arrow/vendored/datetime/tz.h, -cpp/src/arrow/vendored/datetime/tz_private.h, cpp/src/arrow/vendored/datetime/ios.h, -cpp/src/arrow/vendored/datetime/ios.mm, -cpp/src/arrow/vendored/datetime/tz.cpp are adapted from -Howard Hinnant's date library (https://github.com/HowardHinnant/date) -It is licensed under MIT license. - -The MIT License (MIT) -Copyright (c) 2015, 2016, 2017 Howard Hinnant -Copyright (c) 2016 Adrian Colomitchi -Copyright (c) 2017 Florian Dang -Copyright (c) 2017 Paul Thompson -Copyright (c) 2018 Tomasz Kamiński - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - --------------------------------------------------------------------------------- - -The file cpp/src/arrow/util/utf8.h includes code adapted from the page - https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ -with the following license (MIT) - -Copyright (c) 2008-2009 Bjoern Hoehrmann - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - --------------------------------------------------------------------------------- - -The file cpp/src/arrow/vendored/string_view.hpp has the following license - -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - -The files in cpp/src/arrow/vendored/xxhash/ have the following license -(BSD 2-Clause License) - -xxHash Library -Copyright (c) 2012-2014, Yann Collet -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -You can contact the author at : -- xxHash homepage: http://www.xxhash.com -- xxHash source repository : https://github.com/Cyan4973/xxHash - --------------------------------------------------------------------------------- - -The files in cpp/src/arrow/vendored/double-conversion/ have the following license -(BSD 3-Clause License) - -Copyright 2006-2011, the V8 project authors. All rights reserved. -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - * Neither the name of Google Inc. nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -The files in cpp/src/arrow/vendored/uriparser/ have the following license -(BSD 3-Clause License) - -uriparser - RFC 3986 URI parsing library - -Copyright (C) 2007, Weijia Song -Copyright (C) 2007, Sebastian Pipping -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - - * Redistributions of source code must retain the above - copyright notice, this list of conditions and the following - disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - * Neither the name of the nor the names of its - contributors may be used to endorse or promote products - derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -The files under dev/tasks/conda-recipes have the following license - -BSD 3-clause license -Copyright (c) 2015-2018, conda-forge -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its contributors - may be used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR -TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -The files in cpp/src/arrow/vendored/utf8cpp/ have the following license - -Copyright 2006 Nemanja Trifunovic - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - -This project includes code from Apache Kudu. - - * cpp/cmake_modules/CompilerInfo.cmake is based on Kudu's cmake_modules/CompilerInfo.cmake - -Copyright: 2016 The Apache Software Foundation. -Home page: https://kudu.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This project includes code from Apache Impala (incubating), formerly -Impala. The Impala code and rights were donated to the ASF as part of the -Incubator process after the initial code imports into Apache Parquet. - -Copyright: 2012 Cloudera, Inc. -Copyright: 2016 The Apache Software Foundation. -Home page: http://impala.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This project includes code from Apache Aurora. - -* dev/release/{release,changelog,release-candidate} are based on the scripts from - Apache Aurora - -Copyright: 2016 The Apache Software Foundation. -Home page: https://aurora.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This project includes code from the Google styleguide. - -* cpp/build-support/cpplint.py is based on the scripts from the Google styleguide. - -Copyright: 2009 Google Inc. All rights reserved. -Homepage: https://github.com/google/styleguide -License: 3-clause BSD - --------------------------------------------------------------------------------- - -This project includes code from Snappy. - -* cpp/cmake_modules/{SnappyCMakeLists.txt,SnappyConfig.h} are based on code - from Google's Snappy project. - -Copyright: 2009 Google Inc. All rights reserved. -Homepage: https://github.com/google/snappy -License: 3-clause BSD - --------------------------------------------------------------------------------- - -This project includes code from the manylinux project. - -* python/manylinux1/scripts/{build_python.sh,python-tag-abi-tag.py, - requirements.txt} are based on code from the manylinux project. - -Copyright: 2016 manylinux -Homepage: https://github.com/pypa/manylinux -License: The MIT License (MIT) - --------------------------------------------------------------------------------- - -This project includes code from the cymove project: - -* python/pyarrow/includes/common.pxd includes code from the cymove project - -The MIT License (MIT) -Copyright (c) 2019 Omer Ozarslan - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -OR OTHER DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - -The projects includes code from the Ursabot project under the dev/archery -directory. - -License: BSD 2-Clause - -Copyright 2019 RStudio, Inc. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -This project include code from CMake. - -* cpp/cmake_modules/FindGTest.cmake is based on code from CMake. - -Copyright: Copyright 2000-2019 Kitware, Inc. and Contributors -Homepage: https://gitlab.kitware.com/cmake/cmake -License: 3-clause BSD - --------------------------------------------------------------------------------- - -This project include code from mingw-w64. - -* cpp/src/arrow/util/cpu-info.cc has a polyfill for mingw-w64 < 5 - -Copyright (c) 2009 - 2013 by the mingw-w64 project -Homepage: https://mingw-w64.org -License: Zope Public License (ZPL) Version 2.1. - ---------------------------------------------------------------------------------- - -This project include code from Google's Asylo project. - -* cpp/src/arrow/result.h is based on status_or.h - -Copyright (c) Copyright 2017 Asylo authors -Homepage: https://asylo.dev/ -License: Apache 2.0 - --------------------------------------------------------------------------------- - -This project includes code from Google's protobuf project - -* cpp/src/arrow/result.h ARROW_ASSIGN_OR_RAISE is based off ASSIGN_OR_RETURN - -Copyright 2008 Google Inc. All rights reserved. -Homepage: https://developers.google.com/protocol-buffers/ -License: - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Code generated by the Protocol Buffer compiler is owned by the owner -of the input file used when generating it. This code is not -standalone and requires a support library to be linked with it. This -support library is itself covered by the above license. - --------------------------------------------------------------------------------- - -3rdparty dependency LLVM is statically linked in certain binary distributions. -Additionally some sections of source code have been derived from sources in LLVM -and have been clearly labeled as such. LLVM has the following license: - -============================================================================== -LLVM Release License -============================================================================== -University of Illinois/NCSA -Open Source License - -Copyright (c) 2003-2018 University of Illinois at Urbana-Champaign. -All rights reserved. - -Developed by: - - LLVM Team - - University of Illinois at Urbana-Champaign - - http://llvm.org - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal with -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimers. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimers in the - documentation and/or other materials provided with the distribution. - - * Neither the names of the LLVM Team, University of Illinois at - Urbana-Champaign, nor the names of its contributors may be used to - endorse or promote products derived from this Software without specific - prior written permission. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS -FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE -SOFTWARE. - -============================================================================== -Copyrights and Licenses for Third Party Software Distributed with LLVM: -============================================================================== -The LLVM software contains code written by third parties. Such software will -have its own individual LICENSE.TXT file in the directory in which it appears. -This file will describe the copyrights, license, and restrictions which apply -to that code. - -The disclaimer of warranty in the University of Illinois Open Source License -applies to all code in the LLVM Distribution, and nothing in any of the -other licenses gives permission to use the names of the LLVM Team or the -University of Illinois to endorse or promote products derived from this -Software. - -The following pieces of software have additional or alternate copyrights, -licenses, and/or restrictions: - -Program Directory -------- --------- -Google Test llvm/utils/unittest/googletest -OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex} -pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT} -ARM contributions llvm/lib/Target/ARM/LICENSE.TXT -md5 contributions llvm/lib/Support/MD5.cpp llvm/include/llvm/Support/MD5.h - --------------------------------------------------------------------------------- - -3rdparty dependency gRPC is statically linked in certain binary -distributions, like the python wheels. gRPC has the following license: - -Copyright 2014 gRPC authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -3rdparty dependency Apache Thrift is statically linked in certain binary -distributions, like the python wheels. Apache Thrift has the following license: - -Apache Thrift -Copyright (C) 2006 - 2019, The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -3rdparty dependency Apache ORC is statically linked in certain binary -distributions, like the python wheels. Apache ORC has the following license: - -Apache ORC -Copyright 2013-2019 The Apache Software Foundation - -This product includes software developed by The Apache Software -Foundation (http://www.apache.org/). - -This product includes software developed by Hewlett-Packard: -(c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - --------------------------------------------------------------------------------- - -3rdparty dependency zstd is statically linked in certain binary -distributions, like the python wheels. ZSTD has the following license: - -BSD License - -For Zstandard software - -Copyright (c) 2016-present, Facebook, Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - * Neither the name Facebook nor the names of its contributors may be used to - endorse or promote products derived from this software without specific - prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -3rdparty dependency lz4 is statically linked in certain binary -distributions, like the python wheels. lz4 has the following license: - -LZ4 Library -Copyright (c) 2011-2016, Yann Collet -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -3rdparty dependency Brotli is statically linked in certain binary -distributions, like the python wheels. Brotli has the following license: - -Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - --------------------------------------------------------------------------------- - -3rdparty dependency snappy is statically linked in certain binary -distributions, like the python wheels. snappy has the following license: - -Copyright 2011, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -=== - -Some of the benchmark data in testdata/ is licensed differently: - - - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and - is licensed under the Creative Commons Attribution 3.0 license - (CC-BY-3.0). See https://creativecommons.org/licenses/by/3.0/ - for more information. - - - kppkn.gtb is taken from the Gaviota chess tablebase set, and - is licensed under the MIT License. See - https://sites.google.com/site/gaviotachessengine/Home/endgame-tablebases-1 - for more information. - - - paper-100k.pdf is an excerpt (bytes 92160 to 194560) from the paper - “Combinatorial Modeling of Chromatin Features Quantitatively Predicts DNA - Replication Timing in _Drosophila_” by Federico Comoglio and Renato Paro, - which is licensed under the CC-BY license. See - http://www.ploscompbiol.org/static/license for more ifnormation. - - - alice29.txt, asyoulik.txt, plrabn12.txt and lcet10.txt are from Project - Gutenberg. The first three have expired copyrights and are in the public - domain; the latter does not have expired copyright, but is still in the - public domain according to the license information - (http://www.gutenberg.org/ebooks/53). - --------------------------------------------------------------------------------- - -3rdparty dependency gflags is statically linked in certain binary -distributions, like the python wheels. gflags has the following license: - -Copyright (c) 2006, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -3rdparty dependency glog is statically linked in certain binary -distributions, like the python wheels. glog has the following license: - -Copyright (c) 2008, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -A function gettimeofday in utilities.cc is based on - -http://www.google.com/codesearch/p?hl=en#dR3YEbitojA/COPYING&q=GetSystemTimeAsFileTime%20license:bsd - -The license of this code is: - -Copyright (c) 2003-2008, Jouni Malinen and contributors -All Rights Reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the name(s) of the above-listed copyright holder(s) nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -3rdparty dependency re2 is statically linked in certain binary -distributions, like the python wheels. re2 has the following license: - -Copyright (c) 2009 The RE2 Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - * Neither the name of Google Inc. nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -3rdparty dependency c-ares is statically linked in certain binary -distributions, like the python wheels. c-ares has the following license: - -# c-ares license - -Copyright (c) 2007 - 2018, Daniel Stenberg with many contributors, see AUTHORS -file. - -Copyright 1998 by the Massachusetts Institute of Technology. - -Permission to use, copy, modify, and distribute this software and its -documentation for any purpose and without fee is hereby granted, provided that -the above copyright notice appear in all copies and that both that copyright -notice and this permission notice appear in supporting documentation, and that -the name of M.I.T. not be used in advertising or publicity pertaining to -distribution of the software without specific, written prior permission. -M.I.T. makes no representations about the suitability of this software for any -purpose. It is provided "as is" without express or implied warranty. - --------------------------------------------------------------------------------- - -3rdparty dependency zlib is redistributed as a dynamically linked shared -library in certain binary distributions, like the python wheels. In the future -this will likely change to static linkage. zlib has the following license: - -zlib.h -- interface of the 'zlib' general purpose compression library - version 1.2.11, January 15th, 2017 - - Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - Jean-loup Gailly Mark Adler - jloup@gzip.org madler@alumni.caltech.edu - --------------------------------------------------------------------------------- - -3rdparty dependency openssl is redistributed as a dynamically linked shared -library in certain binary distributions, like the python wheels. openssl -preceding version 3 has the following license: - - LICENSE ISSUES - ============== - - The OpenSSL toolkit stays under a double license, i.e. both the conditions of - the OpenSSL License and the original SSLeay license apply to the toolkit. - See below for the actual license texts. - - OpenSSL License - --------------- - -/* ==================================================================== - * Copyright (c) 1998-2019 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. All advertising materials mentioning features or use of this - * software must display the following acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - * endorse or promote products derived from this software without - * prior written permission. For written permission, please contact - * openssl-core@openssl.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - * nor may "OpenSSL" appear in their names without prior written - * permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - * acknowledgment: - * "This product includes software developed by the OpenSSL Project - * for use in the OpenSSL Toolkit (http://www.openssl.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - - Original SSLeay License - ----------------------- - -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - * - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to. The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code. The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - * - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * "This product includes cryptographic software written by - * Eric Young (eay@cryptsoft.com)" - * The word 'cryptographic' can be left out if the rouines from the library - * being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from - * the apps directory (application code) you must include an acknowledgement: - * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - * - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed. i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - --------------------------------------------------------------------------------- - -This project includes code from the rtools-backports project. - -* ci/scripts/PKGBUILD and ci/scripts/r_windows_build.sh are based on code - from the rtools-backports project. - -Copyright: Copyright (c) 2013 - 2019, Алексей and Jeroen Ooms. -All rights reserved. -Homepage: https://github.com/r-windows/rtools-backports -License: 3-clause BSD - --------------------------------------------------------------------------------- - -Some code from pandas has been adapted for the pyarrow codebase. pandas is -available under the 3-clause BSD license, which follows: - -pandas license -============== - -Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team -All rights reserved. - -Copyright (c) 2008-2011 AQR Capital Management, LLC -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the copyright holder nor the names of any - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -Some bits from DyND, in particular aspects of the build system, have been -adapted from libdynd and dynd-python under the terms of the BSD 2-clause -license - -The BSD 2-Clause License - - Copyright (C) 2011-12, Dynamic NDArray Developers - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Dynamic NDArray Developers list: - - * Mark Wiebe - * Continuum Analytics - --------------------------------------------------------------------------------- - -Some source code from Ibis (https://github.com/cloudera/ibis) has been adapted -for PyArrow. Ibis is released under the Apache License, Version 2.0. - --------------------------------------------------------------------------------- - -dev/tasks/homebrew-formulae/apache-arrow.rb has the following license: - -BSD 2-Clause License - -Copyright (c) 2009-present, Homebrew contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ----------------------------------------------------------------------- - -cpp/src/arrow/vendored/base64.cpp has the following license - -ZLIB License - -Copyright (C) 2004-2017 René Nyffenegger - -This source code is provided 'as-is', without any express or implied -warranty. In no event will the author be held liable for any damages arising -from the use of this software. - -Permission is granted to anyone to use this software for any purpose, including -commercial applications, and to alter it and redistribute it freely, subject to -the following restrictions: - -1. The origin of this source code must not be misrepresented; you must not - claim that you wrote the original source code. If you use this source code - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - -2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original source code. - -3. This notice may not be removed or altered from any source distribution. - -René Nyffenegger rene.nyffenegger@adp-gmbh.ch - --------------------------------------------------------------------------------- - -The file cpp/src/arrow/vendored/optional.hpp has the following license - -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - --------------------------------------------------------------------------------- - -The file cpp/src/arrow/vendored/musl/strptime.c has the following license - -Copyright © 2005-2020 Rich Felker, et al. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/go/README.md b/go/README.md deleted file mode 100644 index ec824229729a0..0000000000000 --- a/go/README.md +++ /dev/null @@ -1,147 +0,0 @@ - - -Apache Arrow for Go -=================== - -[![Go Reference](https://pkg.go.dev/badge/github.com/apache/arrow/go/v18.svg)](https://pkg.go.dev/github.com/apache/arrow/go/v18) - -[Apache Arrow][arrow] is a cross-language development platform for in-memory -data. It specifies a standardized language-independent columnar memory format -for flat and hierarchical data, organized for efficient analytic operations on -modern hardware. It also provides computational libraries and zero-copy -streaming messaging and inter-process communication. - -### A note about FlightSQL drivers - -Go FlightSQL drivers live in the -[ADBC repository](https://github.com/apache/arrow-adbc/tree/main/go/adbc). -In particular, to use the Golang `database/sql` interface: -```golang -import ( - "database/sql" - _ "github.com/apache/arrow-adbc/go/adbc/sqldriver/flightsql" -) - -func main() { - dsn := "uri=grpc://localhost:12345;username=mickeymouse;password=p@55w0RD" - db, err := sql.Open("flightsql", dsn) - ... -} -``` - -DSN option keys are expressed as `k=v`, delimited with `;`. -Some options keys are defined in ADBC, others are defined in the FlightSQL ADBC driver. -- Arrow ADBC [developer doc](https://arrow.apache.org/adbc/main/driver/flight_sql.html#client-options) -- ADBC [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/adbc.go#L149-L158) -- FlightSQL driver option keys [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/driver/flightsql/flightsql_adbc.go#L70-L81) - -Reference Counting ------------------- - -The library makes use of reference counting so that it can track when memory -buffers are no longer used. This allows Arrow to update resource accounting, -pool memory such and track overall memory usage as objects are created and -released. Types expose two methods to deal with this pattern. The `Retain` -method will increase the reference count by 1 and `Release` method will reduce -the count by 1. Once the reference count of an object is zero, any associated -object will be freed. `Retain` and `Release` are safe to call from multiple -goroutines. - -### When to call `Retain` / `Release`? - -* If you are passed an object and wish to take ownership of it, you must call - `Retain`. You must later pair this with a call to `Release` when you no - longer need the object. "Taking ownership" typically means you wish to - access the object outside the scope of the current function call. - -* You own any object you create via functions whose name begins with `New` or - `Copy` or when receiving an object over a channel. Therefore you must call - `Release` once you no longer need the object. - -* If you send an object over a channel, you must call `Retain` before sending - it as the receiver is assumed to own the object and will later call `Release` - when it no longer needs the object. - -Performance ------------ - -The arrow package makes extensive use of [c2goasm][] to leverage LLVM's -advanced optimizer and generate PLAN9 assembly functions from C/C++ code. The -arrow package can be compiled without these optimizations using the `noasm` -build tag. Alternatively, by configuring an environment variable, it is -possible to dynamically configure which architecture optimizations are used at -runtime. We use the (cpu)[https://pkg.go.dev/golang.org/x/sys/cpu] package to -check dynamically for these features. - -### Example Usage - -The following benchmarks demonstrate summing an array of 8192 values using -various optimizations. - -Disable no architecture optimizations (thus using AVX2): - -```sh -$ INTEL_DISABLE_EXT=NONE go test -bench=8192 -run=. ./math -goos: darwin -goarch: amd64 -pkg: github.com/apache/arrow/go/arrow/math -BenchmarkFloat64Funcs_Sum_8192-8 2000000 687 ns/op 95375.41 MB/s -BenchmarkInt64Funcs_Sum_8192-8 2000000 719 ns/op 91061.06 MB/s -BenchmarkUint64Funcs_Sum_8192-8 2000000 691 ns/op 94797.29 MB/s -PASS -ok github.com/apache/arrow/go/arrow/math 6.444s -``` - -**NOTE:** `NONE` is simply ignored, thus enabling optimizations for AVX2 and SSE4 - ----- - -Disable AVX2 architecture optimizations: - -```sh -$ INTEL_DISABLE_EXT=AVX2 go test -bench=8192 -run=. ./math -goos: darwin -goarch: amd64 -pkg: github.com/apache/arrow/go/arrow/math -BenchmarkFloat64Funcs_Sum_8192-8 1000000 1912 ns/op 34263.63 MB/s -BenchmarkInt64Funcs_Sum_8192-8 1000000 1392 ns/op 47065.57 MB/s -BenchmarkUint64Funcs_Sum_8192-8 1000000 1405 ns/op 46636.41 MB/s -PASS -ok github.com/apache/arrow/go/arrow/math 4.786s -``` - ----- - -Disable ALL architecture optimizations, thus using pure Go implementation: - -```sh -$ INTEL_DISABLE_EXT=ALL go test -bench=8192 -run=. ./math -goos: darwin -goarch: amd64 -pkg: github.com/apache/arrow/go/arrow/math -BenchmarkFloat64Funcs_Sum_8192-8 200000 10285 ns/op 6371.41 MB/s -BenchmarkInt64Funcs_Sum_8192-8 500000 3892 ns/op 16837.37 MB/s -BenchmarkUint64Funcs_Sum_8192-8 500000 3929 ns/op 16680.00 MB/s -PASS -ok github.com/apache/arrow/go/arrow/math 6.179s -``` - -[arrow]: https://arrow.apache.org -[c2goasm]: https://github.com/minio/c2goasm diff --git a/go/arrow/.editorconfig b/go/arrow/.editorconfig deleted file mode 100644 index a7ceaf938f92c..0000000000000 --- a/go/arrow/.editorconfig +++ /dev/null @@ -1,21 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -root = true - -[*.tmpl] -indent_style = tab -indent_size = 4 \ No newline at end of file diff --git a/go/arrow/.gitignore b/go/arrow/.gitignore deleted file mode 100644 index d4b831ae811da..0000000000000 --- a/go/arrow/.gitignore +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -### Go template -# Binaries for programs and plugins -*.exe -*.dll -*.so -*.dylib -*.o - -# Test binary, build with `go test -c` -*.test - -# Output of the go coverage tool, specifically when used with LiteIDE -*.out - -# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 -.glide/ - -bin/ -vendor/ \ No newline at end of file diff --git a/go/arrow/Gopkg.lock b/go/arrow/Gopkg.lock deleted file mode 100644 index 143e4f93b5eea..0000000000000 --- a/go/arrow/Gopkg.lock +++ /dev/null @@ -1,44 +0,0 @@ -# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. - - -[[projects]] - digest = "1:56c130d885a4aacae1dd9c7b71cfe39912c7ebc1ff7d2b46083c8812996dc43b" - name = "github.com/davecgh/go-spew" - packages = ["spew"] - pruneopts = "" - revision = "346938d642f2ec3594ed81d874461961cd0faa76" - version = "v1.1.0" - -[[projects]] - digest = "1:1d7e1867c49a6dd9856598ef7c3123604ea3daabf5b83f303ff457bcbc410b1d" - name = "github.com/pkg/errors" - packages = ["."] - pruneopts = "" - revision = "ba968bfe8b2f7e042a574c888954fccecfa385b4" - version = "v0.8.1" - -[[projects]] - digest = "1:256484dbbcd271f9ecebc6795b2df8cad4c458dd0f5fd82a8c2fa0c29f233411" - name = "github.com/pmezard/go-difflib" - packages = ["difflib"] - pruneopts = "" - revision = "792786c7400a136282c1664665ae0a8db921c6c2" - version = "v1.0.0" - -[[projects]] - digest = "1:2d0dc026c4aef5e2f3a0e06a4dabe268b840d8f63190cf6894e02134a03f52c5" - name = "github.com/stretchr/testify" - packages = ["assert"] - pruneopts = "" - revision = "b91bfb9ebec76498946beb6af7c0230c7cc7ba6c" - version = "v1.2.0" - -[solve-meta] - analyzer-name = "dep" - analyzer-version = 1 - input-imports = [ - "github.com/pkg/errors", - "github.com/stretchr/testify/assert", - ] - solver-name = "gps-cdcl" - solver-version = 1 diff --git a/go/arrow/Gopkg.toml b/go/arrow/Gopkg.toml deleted file mode 100644 index b27807d69f951..0000000000000 --- a/go/arrow/Gopkg.toml +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[[constraint]] - name = "github.com/stretchr/testify" - version = "1.2.0" - -[[constraint]] - name = "github.com/pkg/errors" - version = "0.8.1" \ No newline at end of file diff --git a/go/arrow/Makefile b/go/arrow/Makefile deleted file mode 100644 index 9c4a23262d0bd..0000000000000 --- a/go/arrow/Makefile +++ /dev/null @@ -1,54 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -GO_BUILD=go build -GO_GEN=go generate -GO_TEST?=go test -GOPATH=$(realpath ../../../../../..) - -GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') -ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') -SOURCES_NO_VENDOR := $(shell find . -path ./vendor -prune -o -name "*.go" -not -name '*_test.go' -print) - -.PHONEY: test bench assembly generate - -assembly: - @$(MAKE) -C memory assembly - @$(MAKE) -C math assembly - -generate: bin/tmpl - bin/tmpl -i -data=numeric.tmpldata type_traits_numeric.gen.go.tmpl type_traits_numeric.gen_test.go.tmpl array/numeric.gen.go.tmpl array/numericbuilder.gen_test.go.tmpl array/numericbuilder.gen.go.tmpl array/bufferbuilder_numeric.gen.go.tmpl - bin/tmpl -i -data=datatype_numeric.gen.go.tmpldata datatype_numeric.gen.go.tmpl - @$(MAKE) -C math generate - -fmt: $(SOURCES_NO_VENDOR) - goimports -w $^ - -bench: $(GO_SOURCES) | assembly - $(GO_TEST) $(GO_TEST_ARGS) -bench=. -run=- ./... - -bench-noasm: $(GO_SOURCES) - $(GO_TEST) $(GO_TEST_ARGS) -tags='noasm' -bench=. -run=- ./... - -test: $(GO_SOURCES) | assembly - $(GO_TEST) $(GO_TEST_ARGS) ./... - -test-noasm: $(GO_SOURCES) - $(GO_TEST) $(GO_TEST_ARGS) -tags='noasm' ./... - -bin/tmpl: _tools/tmpl/main.go - $(GO_BUILD) -o $@ ./_tools/tmpl - diff --git a/go/arrow/_examples/helloworld/main.go b/go/arrow/_examples/helloworld/main.go deleted file mode 100644 index 7f932801917a4..0000000000000 --- a/go/arrow/_examples/helloworld/main.go +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "os" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/math" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -func main() { - schema := arrow.NewSchema([]arrow.Field{ - {Name: "intField", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - {Name: "stringField", Type: arrow.BinaryTypes.String, Nullable: false}, - {Name: "floatField", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, nil) - - builder := array.NewRecordBuilder(memory.DefaultAllocator, schema) - defer builder.Release() - - builder.Field(0).(*array.Int64Builder).AppendValues([]int64{1, 2, 3, 4, 5}, nil) - builder.Field(1).(*array.StringBuilder).AppendValues([]string{"a", "b", "c", "d", "e"}, nil) - builder.Field(2).(*array.Float64Builder).AppendValues([]float64{1, 0, 3, 0, 5}, []bool{true, false, true, false, true}) - - rec := builder.NewRecord() - defer rec.Release() - - tbl := array.NewTableFromRecords(schema, []arrow.Record{rec}) - defer tbl.Release() - - sum := math.Float64.Sum(tbl.Column(2).Data().Chunk(0).(*array.Float64)) - if sum != 9 { - defer os.Exit(1) - } -} diff --git a/go/arrow/_tools/tmpl/main.go b/go/arrow/_tools/tmpl/main.go deleted file mode 100644 index 33cb1686981f4..0000000000000 --- a/go/arrow/_tools/tmpl/main.go +++ /dev/null @@ -1,268 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "bytes" - "flag" - "fmt" - "go/format" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "strings" - "text/template" - - "github.com/apache/arrow/go/v18/internal/json" -) - -const Ext = ".tmpl" - -type pathSpec struct { - in, out string -} - -func (p *pathSpec) String() string { return p.in + " → " + p.out } -func (p *pathSpec) IsGoFile() bool { return filepath.Ext(p.out) == ".go" } - -func parsePath(path string) (string, string) { - p := strings.IndexByte(path, '=') - if p == -1 { - if filepath.Ext(path) != Ext { - errExit("template file '%s' must have .tmpl extension", path) - } - return path, path[:len(path)-len(Ext)] - } - - return path[:p], path[p+1:] -} - -type data struct { - In interface{} - D listValue -} - -func errExit(format string, a ...interface{}) { - fmt.Fprintf(os.Stderr, format, a...) - fmt.Fprintln(os.Stderr) - os.Exit(1) -} - -type listValue map[string]string - -func (l listValue) String() string { - res := make([]string, 0, len(l)) - for k, v := range l { - res = append(res, fmt.Sprintf("%s=%s", k, v)) - } - return strings.Join(res, ", ") -} - -func (l listValue) Set(v string) error { - nv := strings.Split(v, "=") - if len(nv) != 2 { - return fmt.Errorf("expected NAME=VALUE, got %s", v) - } - l[nv[0]] = nv[1] - return nil -} - -func main() { - var ( - dataArg = flag.String("data", "", "input JSON data") - gi = flag.Bool("i", false, "run goimports") - in = &data{D: make(listValue)} - ) - - flag.Var(&in.D, "d", "-d NAME=VALUE") - - flag.Parse() - if *dataArg == "" { - errExit("data option is required") - } - - if *gi { - if _, err := exec.LookPath("goimports"); err != nil { - errExit("failed to find goimports: %s", err.Error()) - } - formatter = formatSource - } else { - formatter = format.Source - } - - paths := flag.Args() - if len(paths) == 0 { - errExit("no tmpl files specified") - } - - specs := make([]pathSpec, len(paths)) - for i, p := range paths { - in, out := parsePath(p) - specs[i] = pathSpec{in: in, out: out} - } - - in.In = readData(*dataArg) - process(in, specs) -} - -func mustReadAll(path string) []byte { - data, err := ioutil.ReadFile(path) - if err != nil { - errExit(err.Error()) - } - - return data -} - -func readData(path string) interface{} { - data := mustReadAll(path) - var v interface{} - if err := json.Unmarshal(StripComments(data), &v); err != nil { - errExit("invalid JSON data: %s", err.Error()) - } - return v -} - -func fileMode(path string) os.FileMode { - stat, err := os.Stat(path) - if err != nil { - errExit(err.Error()) - } - return stat.Mode() -} - -var funcs = template.FuncMap{ - "lower": strings.ToLower, - "upper": strings.ToUpper, -} - -func process(data interface{}, specs []pathSpec) { - for _, spec := range specs { - var ( - t *template.Template - err error - ) - t, err = template.New("gen").Funcs(funcs).Parse(string(mustReadAll(spec.in))) - if err != nil { - errExit("error processing template '%s': %s", spec.in, err.Error()) - } - - var buf bytes.Buffer - if spec.IsGoFile() { - // preamble - fmt.Fprintf(&buf, "// Code generated by %s. DO NOT EDIT.\n", spec.in) - fmt.Fprintln(&buf) - } - err = t.Execute(&buf, data) - if err != nil { - errExit("error executing template '%s': %s", spec.in, err.Error()) - } - - generated := buf.Bytes() - if spec.IsGoFile() { - generated, err = formatter(generated) - if err != nil { - errExit("error formatting '%s': %s", spec.in, err.Error()) - } - } - - os.WriteFile(spec.out, generated, fileMode(spec.in)) - } -} - -var ( - formatter func([]byte) ([]byte, error) -) - -func formatSource(in []byte) ([]byte, error) { - r := bytes.NewReader(in) - cmd := exec.Command("goimports") - cmd.Stdin = r - out, err := cmd.Output() - if err != nil { - if ee, ok := err.(*exec.ExitError); ok { - return nil, fmt.Errorf("error running goimports: %s", string(ee.Stderr)) - } - return nil, fmt.Errorf("error running goimports: %s", string(out)) - } - - return out, nil -} - -func StripComments(raw []byte) []byte { - var ( - quoted, esc bool - comment bool - ) - - buf := bytes.Buffer{} - - for i := 0; i < len(raw); i++ { - b := raw[i] - - if comment { - switch b { - case '/': - comment = false - j := bytes.IndexByte(raw[i+1:], '\n') - if j == -1 { - i = len(raw) - } else { - i += j // keep new line - } - case '*': - j := bytes.Index(raw[i+1:], []byte("*/")) - if j == -1 { - i = len(raw) - } else { - i += j + 2 - comment = false - } - } - continue - } - - if esc { - esc = false - continue - } - - if b == '\\' && quoted { - esc = true - continue - } - - if b == '"' || b == '\'' { - quoted = !quoted - } - - if b == '/' && !quoted { - comment = true - continue - } - - buf.WriteByte(b) - } - - if quoted || esc || comment { - // unexpected state, so return raw bytes - return raw - } - - return buf.Bytes() -} diff --git a/go/arrow/_tools/tmpl/main_test.go b/go/arrow/_tools/tmpl/main_test.go deleted file mode 100644 index 831cf791e3a0b..0000000000000 --- a/go/arrow/_tools/tmpl/main_test.go +++ /dev/null @@ -1,73 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "testing" -) - -func TestStripComments(t *testing.T) { - tests := []struct { - name string - in string - exp string - }{ - {name: "none", in: `[1,2,3]`, exp: `[1,2,3]`}, - {name: "single-line, line comment at end", in: `[1,2,3] // foo bar`, exp: `[1,2,3] `}, - {name: "single-line, block comment at end", in: `[1,2,3] /* foo bar */ `, exp: `[1,2,3] `}, - {name: "single-line, block comment at end", in: `[1,2,3] /* /* // */`, exp: `[1,2,3] `}, - {name: "single-line, block comment in middle", in: `[1,/* foo bar */2,3]`, exp: `[1,2,3]`}, - {name: "single-line, block comment in string", in: `[1,"/* foo bar */"]`, exp: `[1,"/* foo bar */"]`}, - {name: "single-line, malformed block comment", in: `[1,2,/*]`, exp: `[1,2,/*]`}, - {name: "single-line, malformed JSON", in: `[1,2,/]`, exp: `[1,2,/]`}, - - { - name: "multi-line", - in: `[ - 1, - 2, - 3 -]`, - exp: `[ - 1, - 2, - 3 -]`, - }, - { - name: "multi-line, multiple line comments", - in: `[ // foo - 1, // bar - 2, - 3 -] // fit`, - exp: `[ - 1, - 2, - 3 -] `, - }, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - got := string(StripComments([]byte(test.in))) - if got != test.exp { - t.Errorf("got:\n%s\nexp:\n%s", got, test.exp) - } - }) - } -} diff --git a/go/arrow/_tools/tools.go b/go/arrow/_tools/tools.go deleted file mode 100644 index 262880bca8fe4..0000000000000 --- a/go/arrow/_tools/tools.go +++ /dev/null @@ -1,25 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build tools -// +build tools - -package _tools - -import ( - _ "golang.org/x/tools/cmd/goimports" - _ "golang.org/x/tools/cmd/stringer" -) diff --git a/go/arrow/array.go b/go/arrow/array.go deleted file mode 100644 index 768b30f8e0690..0000000000000 --- a/go/arrow/array.go +++ /dev/null @@ -1,129 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package arrow - -import ( - "fmt" - - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// ArrayData is the underlying memory and metadata of an Arrow array, corresponding -// to the same-named object in the C++ implementation. -// -// The Array interface and subsequent typed objects provide strongly typed -// accessors which support marshalling and other patterns to the data. -// This interface allows direct access to the underlying raw byte buffers -// which allows for manipulating the internal data and casting. For example, -// one could cast the raw bytes from int64 to float64 like so: -// -// arrdata := GetMyInt64Data().Data() -// newdata := array.NewData(arrow.PrimitiveTypes.Float64, arrdata.Len(), -// arrdata.Buffers(), nil, arrdata.NullN(), arrdata.Offset()) -// defer newdata.Release() -// float64arr := array.NewFloat64Data(newdata) -// defer float64arr.Release() -// -// This is also useful in an analytics setting where memory may be reused. For -// example, if we had a group of operations all returning float64 such as: -// -// Log(Sqrt(Expr(arr))) -// -// The low-level implementations could have signatures such as: -// -// func Log(values arrow.ArrayData) arrow.ArrayData -// -// Another example would be a function that consumes one or more memory buffers -// in an input array and replaces them with newly-allocated data, changing the -// output data type as well. -type ArrayData interface { - // Retain increases the reference count by 1, it is safe to call - // in multiple goroutines simultaneously. - Retain() - // Release decreases the reference count by 1, it is safe to call - // in multiple goroutines simultaneously. Data is removed when reference - // count is 0. - Release() - // DataType returns the current datatype stored in the object. - DataType() DataType - // NullN returns the number of nulls for this data instance. - NullN() int - // Len returns the length of this data instance - Len() int - // Offset returns the offset into the raw buffers where this data begins - Offset() int - // Buffers returns the slice of raw data buffers for this data instance. Their - // meaning depends on the context of the data type. - Buffers() []*memory.Buffer - // Children returns the slice of children data instances, only relevant for - // nested data types. For instance, List data will have a single child containing - // elements of all the rows and Struct data will contain numfields children which - // are the arrays for each field of the struct. - Children() []ArrayData - // Reset allows reusing this ArrayData object by replacing the data in this ArrayData - // object without changing the reference count. - Reset(newtype DataType, newlength int, newbuffers []*memory.Buffer, newchildren []ArrayData, newnulls int, newoffset int) - // Dictionary returns the ArrayData object for the dictionary if this is a - // dictionary array, otherwise it will be nil. - Dictionary() ArrayData - // SizeInBytes returns the size of the ArrayData buffers and any children and/or dictionary in bytes. - SizeInBytes() uint64 -} - -// Array represents an immutable sequence of values using the Arrow in-memory format. -type Array interface { - json.Marshaler - - fmt.Stringer - - // DataType returns the type metadata for this instance. - DataType() DataType - - // NullN returns the number of null values in the array. - NullN() int - - // NullBitmapBytes returns a byte slice of the validity bitmap. - NullBitmapBytes() []byte - - // IsNull returns true if value at index is null. - // NOTE: IsNull will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. - IsNull(i int) bool - - // IsValid returns true if value at index is not null. - // NOTE: IsValid will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. - IsValid(i int) bool - // ValueStr returns the value at index as a string. - ValueStr(i int) string - - // Get single value to be marshalled with `json.Marshal` - GetOneForMarshal(i int) interface{} - - Data() ArrayData - - // Len returns the number of elements in the array. - Len() int - - // Retain increases the reference count by 1. - // Retain may be called simultaneously from multiple goroutines. - Retain() - - // Release decreases the reference count by 1. - // Release may be called simultaneously from multiple goroutines. - // When the reference count goes to zero, the memory is freed. - Release() -} diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go deleted file mode 100644 index ae33ca5417db0..0000000000000 --- a/go/arrow/array/array.go +++ /dev/null @@ -1,186 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" -) - -const ( - // UnknownNullCount specifies the NullN should be calculated from the null bitmap buffer. - UnknownNullCount = -1 - - // NullValueStr represents a null value in arrow.Array.ValueStr and in Builder.AppendValueFromString. - // It should be returned from the arrow.Array.ValueStr implementations. - // Using it as the value in Builder.AppendValueFromString should be equivalent to Builder.AppendNull. - NullValueStr = "(null)" -) - -type array struct { - refCount int64 - data *Data - nullBitmapBytes []byte -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (a *array) Retain() { - atomic.AddInt64(&a.refCount, 1) -} - -// Release decreases the reference count by 1. -// Release may be called simultaneously from multiple goroutines. -// When the reference count goes to zero, the memory is freed. -func (a *array) Release() { - debug.Assert(atomic.LoadInt64(&a.refCount) > 0, "too many releases") - - if atomic.AddInt64(&a.refCount, -1) == 0 { - a.data.Release() - a.data, a.nullBitmapBytes = nil, nil - } -} - -// DataType returns the type metadata for this instance. -func (a *array) DataType() arrow.DataType { return a.data.dtype } - -// NullN returns the number of null values in the array. -func (a *array) NullN() int { - if a.data.nulls < 0 { - a.data.nulls = a.data.length - bitutil.CountSetBits(a.nullBitmapBytes, a.data.offset, a.data.length) - } - return a.data.nulls -} - -// NullBitmapBytes returns a byte slice of the validity bitmap. -func (a *array) NullBitmapBytes() []byte { return a.nullBitmapBytes } - -func (a *array) Data() arrow.ArrayData { return a.data } - -// Len returns the number of elements in the array. -func (a *array) Len() int { return a.data.length } - -// IsNull returns true if value at index is null. -// NOTE: IsNull will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. -func (a *array) IsNull(i int) bool { - return len(a.nullBitmapBytes) != 0 && bitutil.BitIsNotSet(a.nullBitmapBytes, a.data.offset+i) -} - -// IsValid returns true if value at index is not null. -// NOTE: IsValid will panic if NullBitmapBytes is not empty and 0 > i ≥ Len. -func (a *array) IsValid(i int) bool { - return len(a.nullBitmapBytes) == 0 || bitutil.BitIsSet(a.nullBitmapBytes, a.data.offset+i) -} - -func (a *array) setData(data *Data) { - // Retain before releasing in case a.data is the same as data. - data.Retain() - - if a.data != nil { - a.data.Release() - } - - if len(data.buffers) > 0 && data.buffers[0] != nil { - a.nullBitmapBytes = data.buffers[0].Bytes() - } - a.data = data -} - -func (a *array) Offset() int { - return a.data.Offset() -} - -type arrayConstructorFn func(arrow.ArrayData) arrow.Array - -var ( - makeArrayFn [64]arrayConstructorFn -) - -func invalidDataType(data arrow.ArrayData) arrow.Array { - panic("invalid data type: " + data.DataType().ID().String()) -} - -// MakeFromData constructs a strongly-typed array instance from generic Data. -func MakeFromData(data arrow.ArrayData) arrow.Array { - return makeArrayFn[byte(data.DataType().ID()&0x3f)](data) -} - -// NewSlice constructs a zero-copy slice of the array with the indicated -// indices i and j, corresponding to array[i:j]. -// The returned array must be Release()'d after use. -// -// NewSlice panics if the slice is outside the valid range of the input array. -// NewSlice panics if j < i. -func NewSlice(arr arrow.Array, i, j int64) arrow.Array { - data := NewSliceData(arr.Data(), i, j) - slice := MakeFromData(data) - data.Release() - return slice -} - -func init() { - makeArrayFn = [...]arrayConstructorFn{ - arrow.NULL: func(data arrow.ArrayData) arrow.Array { return NewNullData(data) }, - arrow.BOOL: func(data arrow.ArrayData) arrow.Array { return NewBooleanData(data) }, - arrow.UINT8: func(data arrow.ArrayData) arrow.Array { return NewUint8Data(data) }, - arrow.INT8: func(data arrow.ArrayData) arrow.Array { return NewInt8Data(data) }, - arrow.UINT16: func(data arrow.ArrayData) arrow.Array { return NewUint16Data(data) }, - arrow.INT16: func(data arrow.ArrayData) arrow.Array { return NewInt16Data(data) }, - arrow.UINT32: func(data arrow.ArrayData) arrow.Array { return NewUint32Data(data) }, - arrow.INT32: func(data arrow.ArrayData) arrow.Array { return NewInt32Data(data) }, - arrow.UINT64: func(data arrow.ArrayData) arrow.Array { return NewUint64Data(data) }, - arrow.INT64: func(data arrow.ArrayData) arrow.Array { return NewInt64Data(data) }, - arrow.FLOAT16: func(data arrow.ArrayData) arrow.Array { return NewFloat16Data(data) }, - arrow.FLOAT32: func(data arrow.ArrayData) arrow.Array { return NewFloat32Data(data) }, - arrow.FLOAT64: func(data arrow.ArrayData) arrow.Array { return NewFloat64Data(data) }, - arrow.STRING: func(data arrow.ArrayData) arrow.Array { return NewStringData(data) }, - arrow.BINARY: func(data arrow.ArrayData) arrow.Array { return NewBinaryData(data) }, - arrow.FIXED_SIZE_BINARY: func(data arrow.ArrayData) arrow.Array { return NewFixedSizeBinaryData(data) }, - arrow.DATE32: func(data arrow.ArrayData) arrow.Array { return NewDate32Data(data) }, - arrow.DATE64: func(data arrow.ArrayData) arrow.Array { return NewDate64Data(data) }, - arrow.TIMESTAMP: func(data arrow.ArrayData) arrow.Array { return NewTimestampData(data) }, - arrow.TIME32: func(data arrow.ArrayData) arrow.Array { return NewTime32Data(data) }, - arrow.TIME64: func(data arrow.ArrayData) arrow.Array { return NewTime64Data(data) }, - arrow.INTERVAL_MONTHS: func(data arrow.ArrayData) arrow.Array { return NewMonthIntervalData(data) }, - arrow.INTERVAL_DAY_TIME: func(data arrow.ArrayData) arrow.Array { return NewDayTimeIntervalData(data) }, - arrow.DECIMAL128: func(data arrow.ArrayData) arrow.Array { return NewDecimal128Data(data) }, - arrow.DECIMAL256: func(data arrow.ArrayData) arrow.Array { return NewDecimal256Data(data) }, - arrow.LIST: func(data arrow.ArrayData) arrow.Array { return NewListData(data) }, - arrow.STRUCT: func(data arrow.ArrayData) arrow.Array { return NewStructData(data) }, - arrow.SPARSE_UNION: func(data arrow.ArrayData) arrow.Array { return NewSparseUnionData(data) }, - arrow.DENSE_UNION: func(data arrow.ArrayData) arrow.Array { return NewDenseUnionData(data) }, - arrow.DICTIONARY: func(data arrow.ArrayData) arrow.Array { return NewDictionaryData(data) }, - arrow.MAP: func(data arrow.ArrayData) arrow.Array { return NewMapData(data) }, - arrow.EXTENSION: func(data arrow.ArrayData) arrow.Array { return NewExtensionData(data) }, - arrow.FIXED_SIZE_LIST: func(data arrow.ArrayData) arrow.Array { return NewFixedSizeListData(data) }, - arrow.DURATION: func(data arrow.ArrayData) arrow.Array { return NewDurationData(data) }, - arrow.LARGE_STRING: func(data arrow.ArrayData) arrow.Array { return NewLargeStringData(data) }, - arrow.LARGE_BINARY: func(data arrow.ArrayData) arrow.Array { return NewLargeBinaryData(data) }, - arrow.LARGE_LIST: func(data arrow.ArrayData) arrow.Array { return NewLargeListData(data) }, - arrow.INTERVAL_MONTH_DAY_NANO: func(data arrow.ArrayData) arrow.Array { return NewMonthDayNanoIntervalData(data) }, - arrow.RUN_END_ENCODED: func(data arrow.ArrayData) arrow.Array { return NewRunEndEncodedData(data) }, - arrow.LIST_VIEW: func(data arrow.ArrayData) arrow.Array { return NewListViewData(data) }, - arrow.LARGE_LIST_VIEW: func(data arrow.ArrayData) arrow.Array { return NewLargeListViewData(data) }, - arrow.BINARY_VIEW: func(data arrow.ArrayData) arrow.Array { return NewBinaryViewData(data) }, - arrow.STRING_VIEW: func(data arrow.ArrayData) arrow.Array { return NewStringViewData(data) }, - // invalid data types to fill out array to size 2^6 - 1 - 63: invalidDataType, - } -} diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go deleted file mode 100644 index 4f0627c600078..0000000000000 --- a/go/arrow/array/array_test.go +++ /dev/null @@ -1,346 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/extensions" - "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -type testDataType struct { - id arrow.Type -} - -func (d *testDataType) ID() arrow.Type { return d.id } -func (d *testDataType) Name() string { panic("implement me") } -func (d *testDataType) BitWidth() int { return 8 } -func (d *testDataType) Bytes() int { return 1 } -func (d *testDataType) Fingerprint() string { return "" } -func (testDataType) Layout() arrow.DataTypeLayout { return arrow.DataTypeLayout{} } -func (testDataType) String() string { return "" } - -func TestMakeFromData(t *testing.T) { - tests := []struct { - name string - d arrow.DataType - size int - child []arrow.ArrayData - dict *array.Data - expPanic bool - expError string - }{ - // supported types - {name: "null", d: &testDataType{arrow.NULL}}, - {name: "bool", d: &testDataType{arrow.BOOL}}, - {name: "uint8", d: &testDataType{arrow.UINT8}}, - {name: "uint16", d: &testDataType{arrow.UINT16}}, - {name: "uint32", d: &testDataType{arrow.UINT32}}, - {name: "uint64", d: &testDataType{arrow.UINT64}}, - {name: "int8", d: &testDataType{arrow.INT8}}, - {name: "int16", d: &testDataType{arrow.INT16}}, - {name: "int32", d: &testDataType{arrow.INT32}}, - {name: "int64", d: &testDataType{arrow.INT64}}, - {name: "float16", d: &testDataType{arrow.FLOAT16}}, - {name: "float32", d: &testDataType{arrow.FLOAT32}}, - {name: "float64", d: &testDataType{arrow.FLOAT64}}, - {name: "string", d: &testDataType{arrow.STRING}, size: 3}, - {name: "binary", d: &testDataType{arrow.BINARY}, size: 3}, - {name: "large_string", d: &testDataType{arrow.LARGE_STRING}, size: 3}, - {name: "large_binary", d: &testDataType{arrow.LARGE_BINARY}, size: 3}, - {name: "fixed_size_binary", d: &testDataType{arrow.FIXED_SIZE_BINARY}}, - {name: "date32", d: &testDataType{arrow.DATE32}}, - {name: "date64", d: &testDataType{arrow.DATE64}}, - {name: "timestamp", d: &testDataType{arrow.TIMESTAMP}}, - {name: "time32", d: &testDataType{arrow.TIME32}}, - {name: "time64", d: &testDataType{arrow.TIME64}}, - {name: "month_interval", d: arrow.FixedWidthTypes.MonthInterval}, - {name: "day_time_interval", d: arrow.FixedWidthTypes.DayTimeInterval}, - {name: "decimal128", d: &testDataType{arrow.DECIMAL128}}, - {name: "decimal256", d: &testDataType{arrow.DECIMAL256}}, - {name: "month_day_nano_interval", d: arrow.FixedWidthTypes.MonthDayNanoInterval}, - - {name: "list", d: &testDataType{arrow.LIST}, child: []arrow.ArrayData{ - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - }}, - - {name: "large list", d: &testDataType{arrow.LARGE_LIST}, child: []arrow.ArrayData{ - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - }}, - - {name: "struct", d: &testDataType{arrow.STRUCT}}, - {name: "struct", d: &testDataType{arrow.STRUCT}, child: []arrow.ArrayData{ - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - }}, - - {name: "fixed_size_list", d: arrow.FixedSizeListOf(4, arrow.PrimitiveTypes.Int64), child: []arrow.ArrayData{ - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - }}, - {name: "duration", d: &testDataType{arrow.DURATION}}, - - {name: "map", d: &testDataType{arrow.MAP}, child: []arrow.ArrayData{ - array.NewData(&testDataType{arrow.STRUCT}, 0 /* length */, make([]*memory.Buffer, 3 /*null bitmap, values, offsets*/), []arrow.ArrayData{ - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - }, 0 /* nulls */, 0 /* offset */)}, - }, - - {name: "sparse union", d: arrow.SparseUnionOf(nil, nil), child: []arrow.ArrayData{}, size: 2}, - {name: "dense union", d: arrow.DenseUnionOf(nil, nil), child: []arrow.ArrayData{}, size: 3}, - - // various dictionary index types and value types - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: &testDataType{arrow.INT64}}, dict: array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: &testDataType{arrow.INT32}}, dict: array.NewData(&testDataType{arrow.INT32}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: &testDataType{arrow.UINT16}}, dict: array.NewData(&testDataType{arrow.UINT16}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: &testDataType{arrow.INT64}}, dict: array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: &testDataType{arrow.UINT32}}, dict: array.NewData(&testDataType{arrow.UINT32}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: &testDataType{arrow.TIMESTAMP}}, dict: array.NewData(&testDataType{arrow.TIMESTAMP}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int64, ValueType: &testDataType{arrow.UINT32}}, dict: array.NewData(&testDataType{arrow.UINT32}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - {name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: &testDataType{arrow.TIMESTAMP}}, dict: array.NewData(&testDataType{arrow.TIMESTAMP}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)}, - - {name: "extension", d: &testDataType{arrow.EXTENSION}, expPanic: true, expError: "arrow/array: DataType for ExtensionArray must implement arrow.ExtensionType"}, - {name: "extension", d: extensions.NewUUIDType()}, - - {name: "run end encoded", d: arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int64), child: []arrow.ArrayData{ - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */), - }}, - - // invalid types - {name: "invalid(-1)", d: &testDataType{arrow.Type(-1)}, expPanic: true, expError: "invalid data type: Type(-1)"}, - {name: "invalid(63)", d: &testDataType{arrow.Type(63)}, expPanic: true, expError: "invalid data type: Type(63)"}, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - var ( - b [4]*memory.Buffer - n = 4 - data arrow.ArrayData - ) - if test.size != 0 { - n = test.size - } - if test.dict != nil { - data = array.NewDataWithDictionary(test.d, 0, b[:n], 0, 0, test.dict) - } else { - data = array.NewData(test.d, 0, b[:n], test.child, 0, 0) - } - - if test.expPanic { - assert.PanicsWithValue(t, test.expError, func() { - array.MakeFromData(data) - }) - } else { - assert.NotNil(t, array.MakeFromData(data)) - } - }) - } -} - -func bbits(v ...int32) []byte { - return tools.IntsToBitsLSB(v...) -} - -func TestArray_NullN(t *testing.T) { - tests := []struct { - name string - l int - bm []byte - n int - exp int - }{ - {name: "unknown,l16", l: 16, bm: bbits(0x11001010, 0x00110011), n: array.UnknownNullCount, exp: 8}, - {name: "unknown,l12,ignores last nibble", l: 12, bm: bbits(0x11001010, 0x00111111), n: array.UnknownNullCount, exp: 6}, - {name: "unknown,l12,12 nulls", l: 12, bm: bbits(0x00000000, 0x00000000), n: array.UnknownNullCount, exp: 12}, - {name: "unknown,l12,00 nulls", l: 12, bm: bbits(0x11111111, 0x11111111), n: array.UnknownNullCount, exp: 0}, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - buf := memory.NewBufferBytes(test.bm) - data := array.NewData(arrow.FixedWidthTypes.Boolean, test.l, []*memory.Buffer{buf, nil}, nil, test.n, 0) - buf.Release() - ar := array.MakeFromData(data) - data.Release() - got := ar.NullN() - ar.Release() - assert.Equal(t, test.exp, got) - }) - } -} - -func TestArraySlice(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - valids = []bool{true, true, true, false, true, true} - vs = []float64{1, 2, 3, 0, 4, 5} - ) - - b := array.NewFloat64Builder(pool) - defer b.Release() - - for _, tc := range []struct { - i, j int - panics bool - len int - }{ - {i: 0, j: len(valids), panics: false, len: len(valids)}, - {i: len(valids), j: len(valids), panics: false, len: 0}, - {i: 0, j: 1, panics: false, len: 1}, - {i: 1, j: 1, panics: false, len: 0}, - {i: 0, j: len(valids) + 1, panics: true}, - {i: 2, j: 1, panics: true}, - {i: len(valids) + 1, j: len(valids) + 1, panics: true}, - } { - t.Run("", func(t *testing.T) { - b.AppendValues(vs, valids) - - arr := b.NewFloat64Array() - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if tc.panics { - defer func() { - e := recover() - if e == nil { - t.Fatalf("this should have panicked, but did not") - } - }() - } - - slice := array.NewSlice(arr, int64(tc.i), int64(tc.j)).(*array.Float64) - defer slice.Release() - - if got, want := slice.Len(), tc.len; got != want { - t.Fatalf("invalid slice length: got=%d, want=%d", got, want) - } - }) - } -} - -func TestArraySliceTypes(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - valids := []bool{true, true, true, false, true, true} - - for _, tc := range []struct { - values interface{} - builder array.Builder - append func(b array.Builder, vs interface{}) - }{ - { - values: []bool{true, false, true, false, true, false}, - builder: array.NewBooleanBuilder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.BooleanBuilder).AppendValues(vs.([]bool), valids) }, - }, - { - values: []uint8{1, 2, 3, 0, 4, 5}, - builder: array.NewUint8Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Uint8Builder).AppendValues(vs.([]uint8), valids) }, - }, - { - values: []uint16{1, 2, 3, 0, 4, 5}, - builder: array.NewUint16Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Uint16Builder).AppendValues(vs.([]uint16), valids) }, - }, - { - values: []uint32{1, 2, 3, 0, 4, 5}, - builder: array.NewUint32Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Uint32Builder).AppendValues(vs.([]uint32), valids) }, - }, - { - values: []uint64{1, 2, 3, 0, 4, 5}, - builder: array.NewUint64Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Uint64Builder).AppendValues(vs.([]uint64), valids) }, - }, - { - values: []int8{1, 2, 3, 0, 4, 5}, - builder: array.NewInt8Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Int8Builder).AppendValues(vs.([]int8), valids) }, - }, - { - values: []int16{1, 2, 3, 0, 4, 5}, - builder: array.NewInt16Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Int16Builder).AppendValues(vs.([]int16), valids) }, - }, - { - values: []int32{1, 2, 3, 0, 4, 5}, - builder: array.NewInt32Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Int32Builder).AppendValues(vs.([]int32), valids) }, - }, - { - values: []int64{1, 2, 3, 0, 4, 5}, - builder: array.NewInt64Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Int64Builder).AppendValues(vs.([]int64), valids) }, - }, - { - values: []float32{1, 2, 3, 0, 4, 5}, - builder: array.NewFloat32Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Float32Builder).AppendValues(vs.([]float32), valids) }, - }, - { - values: []float64{1, 2, 3, 0, 4, 5}, - builder: array.NewFloat64Builder(pool), - append: func(b array.Builder, vs interface{}) { b.(*array.Float64Builder).AppendValues(vs.([]float64), valids) }, - }, - } { - t.Run("", func(t *testing.T) { - defer tc.builder.Release() - - b := tc.builder - tc.append(b, tc.values) - - arr := b.NewArray() - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("invalid length: got=%d, want=%d", got, want) - } - - slice := array.NewSlice(arr, 2, 5) - defer slice.Release() - - if got, want := slice.Len(), 3; got != want { - t.Fatalf("invalid slice length: got=%d, want=%d", got, want) - } - - shortSlice := array.NewSlice(arr, 2, 3) - defer shortSlice.Release() - - sliceOfShortSlice := array.NewSlice(shortSlice, 0, 1) - defer sliceOfShortSlice.Release() - - if got, want := sliceOfShortSlice.Len(), 1; got != want { - t.Fatalf("invalid short slice length: got=%d, want=%d", got, want) - } - }) - } -} diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go deleted file mode 100644 index 99764270bf39d..0000000000000 --- a/go/arrow/array/binary.go +++ /dev/null @@ -1,453 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "encoding/base64" - "fmt" - "strings" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type BinaryLike interface { - arrow.Array - ValueLen(int) int - ValueBytes() []byte - ValueOffset64(int) int64 -} - -// A type which represents an immutable sequence of variable-length binary strings. -type Binary struct { - array - valueOffsets []int32 - valueBytes []byte -} - -// NewBinaryData constructs a new Binary array from data. -func NewBinaryData(data arrow.ArrayData) *Binary { - a := &Binary{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Value returns the slice at index i. This value should not be mutated. -func (a *Binary) Value(i int) []byte { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - idx := a.array.data.offset + i - return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]] -} - -// ValueStr returns a copy of the base64-encoded string value or NullValueStr -func (a *Binary) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return base64.StdEncoding.EncodeToString(a.Value(i)) -} - -// ValueString returns the string at index i without performing additional allocations. -// The string is only valid for the lifetime of the Binary array. -func (a *Binary) ValueString(i int) string { - b := a.Value(i) - return *(*string)(unsafe.Pointer(&b)) -} - -func (a *Binary) ValueOffset(i int) int { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - return int(a.valueOffsets[a.array.data.offset+i]) -} - -func (a *Binary) ValueOffset64(i int) int64 { - return int64(a.ValueOffset(i)) -} - -func (a *Binary) ValueLen(i int) int { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - beg := a.array.data.offset + i - return int(a.valueOffsets[beg+1] - a.valueOffsets[beg]) -} - -func (a *Binary) ValueOffsets() []int32 { - beg := a.array.data.offset - end := beg + a.array.data.length + 1 - return a.valueOffsets[beg:end] -} - -func (a *Binary) ValueBytes() []byte { - beg := a.array.data.offset - end := beg + a.array.data.length - return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]] -} - -func (a *Binary) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%q", a.ValueString(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Binary) setData(data *Data) { - if len(data.buffers) != 3 { - panic("len(data.buffers) != 3") - } - - a.array.setData(data) - - if valueData := data.buffers[2]; valueData != nil { - a.valueBytes = valueData.Bytes() - } - - if valueOffsets := data.buffers[1]; valueOffsets != nil { - a.valueOffsets = arrow.Int32Traits.CastFromBytes(valueOffsets.Bytes()) - } - - if a.array.data.length < 1 { - return - } - - expNumOffsets := a.array.data.offset + a.array.data.length + 1 - if len(a.valueOffsets) < expNumOffsets { - panic(fmt.Errorf("arrow/array: binary offset buffer must have at least %d values", expNumOffsets)) - } - - if int(a.valueOffsets[expNumOffsets-1]) > len(a.valueBytes) { - panic("arrow/array: binary offsets out of bounds of data buffer") - } -} - -func (a *Binary) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.Value(i) -} - -func (a *Binary) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - // golang marshal standard says that []byte will be marshalled - // as a base64-encoded string - return json.Marshal(vals) -} - -func arrayEqualBinary(left, right *Binary) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !bytes.Equal(left.Value(i), right.Value(i)) { - return false - } - } - return true -} - -type LargeBinary struct { - array - valueOffsets []int64 - valueBytes []byte -} - -func NewLargeBinaryData(data arrow.ArrayData) *LargeBinary { - a := &LargeBinary{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *LargeBinary) Value(i int) []byte { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - idx := a.array.data.offset + i - return a.valueBytes[a.valueOffsets[idx]:a.valueOffsets[idx+1]] -} - -func (a *LargeBinary) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return base64.StdEncoding.EncodeToString(a.Value(i)) -} -func (a *LargeBinary) ValueString(i int) string { - b := a.Value(i) - return *(*string)(unsafe.Pointer(&b)) -} - -func (a *LargeBinary) ValueOffset(i int) int64 { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - return a.valueOffsets[a.array.data.offset+i] -} - -func (a *LargeBinary) ValueOffset64(i int) int64 { - return a.ValueOffset(i) -} - -func (a *LargeBinary) ValueLen(i int) int { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - beg := a.array.data.offset + i - return int(a.valueOffsets[beg+1] - a.valueOffsets[beg]) -} - -func (a *LargeBinary) ValueOffsets() []int64 { - beg := a.array.data.offset - end := beg + a.array.data.length + 1 - return a.valueOffsets[beg:end] -} - -func (a *LargeBinary) ValueBytes() []byte { - beg := a.array.data.offset - end := beg + a.array.data.length - return a.valueBytes[a.valueOffsets[beg]:a.valueOffsets[end]] -} - -func (a *LargeBinary) String() string { - var o strings.Builder - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(&o, "%q", a.ValueString(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *LargeBinary) setData(data *Data) { - if len(data.buffers) != 3 { - panic("len(data.buffers) != 3") - } - - a.array.setData(data) - - if valueData := data.buffers[2]; valueData != nil { - a.valueBytes = valueData.Bytes() - } - - if valueOffsets := data.buffers[1]; valueOffsets != nil { - a.valueOffsets = arrow.Int64Traits.CastFromBytes(valueOffsets.Bytes()) - } - - if a.array.data.length < 1 { - return - } - - expNumOffsets := a.array.data.offset + a.array.data.length + 1 - if len(a.valueOffsets) < expNumOffsets { - panic(fmt.Errorf("arrow/array: large binary offset buffer must have at least %d values", expNumOffsets)) - } - - if int(a.valueOffsets[expNumOffsets-1]) > len(a.valueBytes) { - panic("arrow/array: large binary offsets out of bounds of data buffer") - } -} - -func (a *LargeBinary) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.Value(i) -} - -func (a *LargeBinary) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - // golang marshal standard says that []byte will be marshalled - // as a base64-encoded string - return json.Marshal(vals) -} - -func arrayEqualLargeBinary(left, right *LargeBinary) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !bytes.Equal(left.Value(i), right.Value(i)) { - return false - } - } - return true -} - -type ViewLike interface { - arrow.Array - ValueHeader(int) *arrow.ViewHeader -} - -type BinaryView struct { - array - values []arrow.ViewHeader - dataBuffers []*memory.Buffer -} - -func NewBinaryViewData(data arrow.ArrayData) *BinaryView { - a := &BinaryView{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *BinaryView) setData(data *Data) { - if len(data.buffers) < 2 { - panic("len(data.buffers) < 2") - } - a.array.setData(data) - - if valueData := data.buffers[1]; valueData != nil { - a.values = arrow.ViewHeaderTraits.CastFromBytes(valueData.Bytes()) - } - - a.dataBuffers = data.buffers[2:] -} - -func (a *BinaryView) ValueHeader(i int) *arrow.ViewHeader { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - return &a.values[a.array.data.offset+i] -} - -func (a *BinaryView) Value(i int) []byte { - s := a.ValueHeader(i) - if s.IsInline() { - return s.InlineBytes() - } - start := s.BufferOffset() - buf := a.dataBuffers[s.BufferIndex()] - return buf.Bytes()[start : start+int32(s.Len())] -} - -func (a *BinaryView) ValueLen(i int) int { - s := a.ValueHeader(i) - return s.Len() -} - -// ValueString returns the value at index i as a string instead of -// a byte slice, without copying the underlying data. -func (a *BinaryView) ValueString(i int) string { - b := a.Value(i) - return *(*string)(unsafe.Pointer(&b)) -} - -func (a *BinaryView) String() string { - var o strings.Builder - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(&o, "%q", a.ValueString(i)) - } - } - o.WriteString("]") - return o.String() -} - -// ValueStr is paired with AppendValueFromString in that it returns -// the value at index i as a string: Semantically this means that for -// a null value it will return the string "(null)", otherwise it will -// return the value as a base64 encoded string suitable for CSV/JSON. -// -// This is always going to be less performant than just using ValueString -// and exists to fulfill the Array interface to provide a method which -// can produce a human readable string for a given index. -func (a *BinaryView) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return base64.StdEncoding.EncodeToString(a.Value(i)) -} - -func (a *BinaryView) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.Value(i) -} - -func (a *BinaryView) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - // golang marshal standard says that []byte will be marshalled - // as a base64-encoded string - return json.Marshal(vals) -} - -func arrayEqualBinaryView(left, right *BinaryView) bool { - leftBufs, rightBufs := left.dataBuffers, right.dataBuffers - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !left.ValueHeader(i).Equals(leftBufs, right.ValueHeader(i), rightBufs) { - return false - } - } - return true -} - -var ( - _ arrow.Array = (*Binary)(nil) - _ arrow.Array = (*LargeBinary)(nil) - _ arrow.Array = (*BinaryView)(nil) - - _ BinaryLike = (*Binary)(nil) - _ BinaryLike = (*LargeBinary)(nil) -) diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go deleted file mode 100644 index 919fff7b5e5e8..0000000000000 --- a/go/arrow/array/binary_test.go +++ /dev/null @@ -1,726 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestBinary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - - values := [][]byte{ - []byte("AAA"), - nil, - []byte("BBBB"), - } - valid := []bool{true, false, true} - b.AppendValues(values, valid) - - b.Retain() - b.Release() - - a := b.NewBinaryArray() - assert.Equal(t, 3, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("AAA"), a.Value(0)) - assert.Equal(t, []byte{}, a.Value(1)) - assert.Equal(t, []byte("BBBB"), a.Value(2)) - assert.Equal(t, "QUFB", a.ValueStr(0)) - assert.Equal(t, NullValueStr, a.ValueStr(1)) - a.Release() - - // Test builder reset and NewArray API. - b.AppendValues(values, valid) - a = b.NewArray().(*Binary) - assert.Equal(t, 3, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("AAA"), a.Value(0)) - assert.Equal(t, []byte{}, a.Value(1)) - assert.Equal(t, []byte("BBBB"), a.Value(2)) - assert.Equal(t, "QUFB", a.ValueStr(0)) - assert.Equal(t, NullValueStr, a.ValueStr(1)) - a.Release() - - b.Release() -} - -func TestLargeBinary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - - values := [][]byte{ - []byte("AAA"), - nil, - []byte("BBBB"), - } - valid := []bool{true, false, true} - b.AppendValues(values, valid) - - b.Retain() - b.Release() - - assert.Panics(t, func() { - b.NewBinaryArray() - }) - - a := b.NewLargeBinaryArray() - assert.Equal(t, 3, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("AAA"), a.Value(0)) - assert.Equal(t, []byte{}, a.Value(1)) - assert.Equal(t, []byte("BBBB"), a.Value(2)) - assert.Equal(t, "QUFB", a.ValueStr(0)) - assert.Equal(t, NullValueStr, a.ValueStr(1)) - a.Release() - - // Test builder reset and NewArray API. - b.AppendValues(values, valid) - a = b.NewArray().(*LargeBinary) - assert.Equal(t, 3, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("AAA"), a.Value(0)) - assert.Equal(t, []byte{}, a.Value(1)) - assert.Equal(t, []byte("BBBB"), a.Value(2)) - assert.Equal(t, "QUFB", a.ValueStr(0)) - assert.Equal(t, NullValueStr, a.ValueStr(1)) - a.Release() - - b.Release() -} - -func TestBinarySliceData(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - for _, v := range values { - b.AppendString(v) - } - - arr := b.NewArray().(*Binary) - defer arr.Release() - - if got, want := arr.Len(), len(values); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]string, arr.Len()) - - for i := range vs { - vs[i] = arr.ValueString(i) - } - - if got, want := vs, values; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - tests := []struct { - interval [2]int64 - want []string - }{ - { - interval: [2]int64{0, 0}, - want: []string{}, - }, - { - interval: [2]int64{0, 5}, - want: []string{"a", "bc", "def", "g", "hijk"}, - }, - { - interval: [2]int64{0, 10}, - want: []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"}, - }, - { - interval: [2]int64{5, 10}, - want: []string{"lm", "n", "opq", "rs", "tu"}, - }, - { - interval: [2]int64{10, 10}, - want: []string{}, - }, - { - interval: [2]int64{2, 7}, - want: []string{"def", "g", "hijk", "lm", "n"}, - }, - } - - for _, tc := range tests { - t.Run("", func(t *testing.T) { - - slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary) - defer slice.Release() - - if got, want := slice.Len(), len(tc.want); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]string, slice.Len()) - - for i := range vs { - vs[i] = slice.ValueString(i) - } - - if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - }) - } -} - -func TestBinarySliceDataWithNull(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - if got, want := arr.Len(), len(values); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 3; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]string, arr.Len()) - - for i := range vs { - vs[i] = arr.ValueString(i) - } - - if got, want := vs, values; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - tests := []struct { - interval [2]int64 - nulls int - want []string - }{ - { - interval: [2]int64{0, 2}, - nulls: 0, - want: []string{"a", "bc"}, - }, - { - interval: [2]int64{0, 3}, - nulls: 1, - want: []string{"a", "bc", ""}, - }, - { - interval: [2]int64{0, 4}, - nulls: 2, - want: []string{"a", "bc", "", ""}, - }, - { - interval: [2]int64{4, 8}, - nulls: 0, - want: []string{"hijk", "lm", "", "opq"}, - }, - { - interval: [2]int64{2, 9}, - nulls: 3, - want: []string{"", "", "hijk", "lm", "", "opq", ""}, - }, - } - - for _, tc := range tests { - t.Run("", func(t *testing.T) { - - slice := NewSlice(arr, tc.interval[0], tc.interval[1]).(*Binary) - defer slice.Release() - - if got, want := slice.Len(), len(tc.want); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.NullN(), tc.nulls; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - vs := make([]string, slice.Len()) - - for i := range vs { - vs[i] = slice.ValueString(i) - } - - if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - }) - } -} - -func TestBinarySliceOutOfBounds(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", "rs", "tu"} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - for _, v := range values { - b.AppendString(v) - } - - arr := b.NewArray().(*Binary) - defer arr.Release() - - slice := NewSlice(arr, 3, 8).(*Binary) - defer slice.Release() - - tests := []struct { - index int - panic bool - }{ - { - index: -1, - panic: true, - }, - { - index: 5, - panic: true, - }, - { - index: 0, - panic: false, - }, - { - index: 4, - panic: false, - }, - } - - for _, tc := range tests { - t.Run("", func(t *testing.T) { - - var val string - - if tc.panic { - defer func() { - e := recover() - if e == nil { - t.Fatalf("this should have panicked, but did not; slice value %q", val) - } - if got, want := e.(string), "arrow/array: index out of range"; got != want { - t.Fatalf("invalid error. got=%q, want=%q", got, want) - } - }() - } else { - defer func() { - if e := recover(); e != nil { - t.Fatalf("unexpected panic: %v", e) - } - }() - } - - val = slice.ValueString(tc.index) - }) - } -} - -func TestBinaryValueOffset(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - slice := NewSlice(arr, 2, 9).(*Binary) - defer slice.Release() - - offset := 3 - vs := values[2:9] - - for i, v := range vs { - assert.Equal(t, offset, slice.ValueOffset(i)) - offset += len(v) - } -} - -func TestLargeBinaryValueOffset(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*LargeBinary) - defer arr.Release() - - slice := NewSlice(arr, 2, 9).(*LargeBinary) - defer slice.Release() - - offset := 3 - vs := values[2:9] - - for i, v := range vs { - assert.EqualValues(t, offset, slice.ValueOffset(i)) - offset += len(v) - } -} - -func TestBinaryValueLen(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - slice := NewSlice(arr, 2, 9).(*Binary) - defer slice.Release() - - vs := values[2:9] - - for i, v := range vs { - assert.Equal(t, len(v), slice.ValueLen(i)) - } -} - -func TestLargeBinaryValueLen(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*LargeBinary) - defer arr.Release() - - slice := NewSlice(arr, 2, 9).(*LargeBinary) - defer slice.Release() - - vs := values[2:9] - - for i, v := range vs { - assert.Equal(t, len(v), slice.ValueLen(i)) - } -} - -func TestBinaryValueOffsets(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - assert.Equal(t, []int32{0, 1, 3, 3, 3, 7, 9, 9, 12, 12, 14}, arr.ValueOffsets()) - - slice := NewSlice(arr, 2, 9).(*Binary) - defer slice.Release() - - assert.Equal(t, []int32{3, 3, 3, 7, 9, 9, 12, 12}, slice.ValueOffsets()) -} - -func TestLargeBinaryValueOffsets(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*LargeBinary) - defer arr.Release() - - assert.Equal(t, []int64{0, 1, 3, 3, 3, 7, 9, 9, 12, 12, 14}, arr.ValueOffsets()) - - slice := NewSlice(arr, 2, 9).(*LargeBinary) - defer slice.Release() - - assert.Equal(t, []int64{3, 3, 3, 7, 9, 9, 12, 12}, slice.ValueOffsets()) -} - -func TestBinaryValueBytes(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - assert.Equal(t, []byte{'a', 'b', 'c', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q', 't', 'u'}, arr.ValueBytes()) - - slice := NewSlice(arr, 2, 9).(*Binary) - defer slice.Release() - - assert.Equal(t, []byte{'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q'}, slice.ValueBytes()) -} - -func TestLargeBinaryValueBytes(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*LargeBinary) - defer arr.Release() - - assert.Equal(t, []byte{'a', 'b', 'c', 'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q', 't', 'u'}, arr.ValueBytes()) - - slice := NewSlice(arr, 2, 9).(*LargeBinary) - defer slice.Release() - - assert.Equal(t, []byte{'h', 'i', 'j', 'k', 'l', 'm', 'o', 'p', 'q'}, slice.ValueBytes()) -} - -func TestBinaryStringer(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "é", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, true, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - got := arr.String() - want := `["a" "bc" (null) "é" (null) "hijk" "lm" "" "opq" (null) "tu"]` - - if got != want { - t.Fatalf("invalid stringer:\ngot= %s\nwant=%s\n", got, want) - } -} - -func TestLargeBinaryStringer(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "é", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, true, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*LargeBinary) - defer arr.Release() - - got := arr.String() - want := `["a" "bc" (null) "é" (null) "hijk" "lm" "" "opq" (null) "tu"]` - - if got != want { - t.Fatalf("invalid stringer:\ngot= %s\nwant=%s\n", got, want) - } -} - -func TestBinaryInvalidOffsets(t *testing.T) { - const expectedPanic = "arrow/array: binary offsets out of bounds of data buffer" - - makeBuffers := func(valids []bool, offsets []int32, data string) []*memory.Buffer { - offsetBuf := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets)) - var nullBufBytes []byte - var nullBuf *memory.Buffer - if valids != nil { - nullBufBytes = make([]byte, bitutil.BytesForBits(int64(len(valids)))) - for i, v := range valids { - bitutil.SetBitTo(nullBufBytes, i, v) - } - nullBuf = memory.NewBufferBytes(nullBufBytes) - } - return []*memory.Buffer{nullBuf, offsetBuf, memory.NewBufferBytes([]byte(data))} - } - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{}, "") - NewBinaryData(NewData(arrow.BinaryTypes.Binary, 0, buffers, nil, 0, 0)) - }, "empty array with no offsets") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{0, 5}, "") - NewBinaryData(NewData(arrow.BinaryTypes.Binary, 0, buffers, nil, 0, 0)) - }, "empty array, offsets ignored") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{0, 3, 4, 9}, "oooabcdef") - NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, nil, 0, 2)) - }, "data has offset and value offsets are valid") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{0, 3, 6, 9, 9}, "012345678") - arr := NewBinaryData(NewData(arrow.BinaryTypes.Binary, 4, buffers, nil, 0, 0)) - if assert.Equal(t, 4, arr.Len()) && assert.Zero(t, arr.NullN()) { - assert.EqualValues(t, "012", arr.Value(0)) - assert.EqualValues(t, "345", arr.Value(1)) - assert.EqualValues(t, "678", arr.Value(2)) - assert.EqualValues(t, "", arr.Value(3), "trailing empty binary value will have offset past end") - } - }, "simple valid case") - - assert.NotPanics(t, func() { - buffers := makeBuffers([]bool{true, false, true, false}, []int32{0, 3, 4, 9, 9}, "oooabcdef") - arr := NewBinaryData(NewData(arrow.BinaryTypes.Binary, 4, buffers, nil, 2, 0)) - if assert.Equal(t, 4, arr.Len()) && assert.Equal(t, 2, arr.NullN()) { - assert.EqualValues(t, "ooo", arr.Value(0)) - assert.True(t, arr.IsNull(1)) - assert.EqualValues(t, "bcdef", arr.Value(2)) - assert.True(t, arr.IsNull(3)) - } - }, "simple valid case with nulls") - - assert.PanicsWithValue(t, expectedPanic, func() { - buffers := makeBuffers(nil, []int32{0, 5}, "abc") - NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, nil, 0, 0)) - }, "last offset is overflowing") - - assert.PanicsWithError(t, "arrow/array: binary offset buffer must have at least 2 values", func() { - buffers := makeBuffers(nil, []int32{0}, "abc") - NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, nil, 0, 0)) - }, "last offset is missing") - - assert.PanicsWithValue(t, expectedPanic, func() { - buffers := makeBuffers(nil, []int32{0, 3, 10, 15}, "oooabcdef") - NewBinaryData(NewData(arrow.BinaryTypes.Binary, 1, buffers, nil, 0, 2)) - }, "data has offset and value offset is overflowing") -} - -func TestBinaryStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valid := []bool{true, true, false, false, true, true, true, true, false, true} - - b := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b.Release() - - b.AppendStringValues(values, valid) - - arr := b.NewArray().(*Binary) - defer arr.Release() - - // 2. create array via AppendValueFromString - - b1 := NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*Binary) - defer arr1.Release() - - assert.True(t, Equal(arr, arr1)) -} - -func TestBinaryViewStringRoundTrip(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "supercalifragilistic", "", "expialidocious"} - valid := []bool{true, true, false, false, true, true, true} - - b := NewBinaryViewBuilder(mem) - defer b.Release() - - b.AppendStringValues(values, valid) - arr := b.NewArray().(*BinaryView) - defer arr.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b.NewArray().(*BinaryView) - defer arr1.Release() - - assert.True(t, Equal(arr, arr1)) -} diff --git a/go/arrow/array/binarybuilder.go b/go/arrow/array/binarybuilder.go deleted file mode 100644 index 6fcc4eaf46479..0000000000000 --- a/go/arrow/array/binarybuilder.go +++ /dev/null @@ -1,704 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "encoding/base64" - "fmt" - "math" - "reflect" - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A BinaryBuilder is used to build a Binary array using the Append methods. -type BinaryBuilder struct { - builder - - dtype arrow.BinaryDataType - offsets bufBuilder - values *byteBufferBuilder - - appendOffsetVal func(int) - getOffsetVal func(int) int - maxCapacity uint64 - offsetByteWidth int -} - -// NewBinaryBuilder can be used for any of the variable length binary types, -// Binary, LargeBinary, String, LargeString by passing the appropriate data type -func NewBinaryBuilder(mem memory.Allocator, dtype arrow.BinaryDataType) *BinaryBuilder { - var ( - offsets bufBuilder - offsetValFn func(int) - maxCapacity uint64 - offsetByteWidth int - getOffsetVal func(int) int - ) - switch dtype.Layout().Buffers[1].ByteWidth { - case 4: - b := newInt32BufferBuilder(mem) - offsetValFn = func(v int) { b.AppendValue(int32(v)) } - getOffsetVal = func(i int) int { return int(b.Value(i)) } - offsets = b - maxCapacity = math.MaxInt32 - offsetByteWidth = arrow.Int32SizeBytes - case 8: - b := newInt64BufferBuilder(mem) - offsetValFn = func(v int) { b.AppendValue(int64(v)) } - getOffsetVal = func(i int) int { return int(b.Value(i)) } - offsets = b - maxCapacity = math.MaxInt64 - offsetByteWidth = arrow.Int64SizeBytes - } - - b := &BinaryBuilder{ - builder: builder{refCount: 1, mem: mem}, - dtype: dtype, - offsets: offsets, - values: newByteBufferBuilder(mem), - appendOffsetVal: offsetValFn, - maxCapacity: maxCapacity, - offsetByteWidth: offsetByteWidth, - getOffsetVal: getOffsetVal, - } - return b -} - -func (b *BinaryBuilder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (b *BinaryBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.offsets != nil { - b.offsets.Release() - b.offsets = nil - } - if b.values != nil { - b.values.Release() - b.values = nil - } - } -} - -func (b *BinaryBuilder) Append(v []byte) { - b.Reserve(1) - b.appendNextOffset() - b.values.Append(v) - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *BinaryBuilder) AppendString(v string) { - b.Append([]byte(v)) -} - -func (b *BinaryBuilder) AppendNull() { - b.Reserve(1) - b.appendNextOffset() - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *BinaryBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *BinaryBuilder) AppendEmptyValue() { - b.Reserve(1) - b.appendNextOffset() - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *BinaryBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *BinaryBuilder) AppendValues(v [][]byte, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - for _, vv := range v { - b.appendNextOffset() - b.values.Append(vv) - } - - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -// AppendStringValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *BinaryBuilder) AppendStringValues(v []string, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - for _, vv := range v { - b.appendNextOffset() - b.values.Append([]byte(vv)) - } - - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *BinaryBuilder) UnsafeAppend(v []byte) { - b.appendNextOffset() - b.values.unsafeAppend(v) - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *BinaryBuilder) Value(i int) []byte { - start := b.getOffsetVal(i) - var end int - if i == (b.length - 1) { - end = b.values.Len() - } else { - end = b.getOffsetVal(i + 1) - } - return b.values.Bytes()[start:end] -} - -func (b *BinaryBuilder) init(capacity int) { - b.builder.init(capacity) - b.offsets.resize((capacity + 1) * b.offsetByteWidth) -} - -// DataLen returns the number of bytes in the data array. -func (b *BinaryBuilder) DataLen() int { return b.values.length } - -// DataCap returns the total number of bytes that can be stored -// without allocating additional memory. -func (b *BinaryBuilder) DataCap() int { return b.values.capacity } - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *BinaryBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// ReserveData ensures there is enough space for appending n bytes -// by checking the capacity and resizing the data buffer if necessary. -func (b *BinaryBuilder) ReserveData(n int) { - if b.values.capacity < b.values.length+n { - b.values.resize(b.values.Len() + n) - } -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may be reduced. -func (b *BinaryBuilder) Resize(n int) { - b.offsets.resize((n + 1) * b.offsetByteWidth) - if (n * b.offsetByteWidth) < b.offsets.Len() { - b.offsets.SetLength(n * b.offsetByteWidth) - } - b.builder.resize(n, b.init) -} - -func (b *BinaryBuilder) ResizeData(n int) { - b.values.length = n -} - -// NewArray creates a Binary array from the memory buffers used by the builder and resets the BinaryBuilder -// so it can be used to build a new array. -// -// Builds the appropriate Binary or LargeBinary array based on the datatype -// it was initialized with. -func (b *BinaryBuilder) NewArray() arrow.Array { - if b.offsetByteWidth == arrow.Int32SizeBytes { - return b.NewBinaryArray() - } - return b.NewLargeBinaryArray() -} - -// NewBinaryArray creates a Binary array from the memory buffers used by the builder and resets the BinaryBuilder -// so it can be used to build a new array. -func (b *BinaryBuilder) NewBinaryArray() (a *Binary) { - if b.offsetByteWidth != arrow.Int32SizeBytes { - panic("arrow/array: invalid call to NewBinaryArray when building a LargeBinary array") - } - - data := b.newData() - a = NewBinaryData(data) - data.Release() - return -} - -func (b *BinaryBuilder) NewLargeBinaryArray() (a *LargeBinary) { - if b.offsetByteWidth != arrow.Int64SizeBytes { - panic("arrow/array: invalid call to NewLargeBinaryArray when building a Binary array") - } - - data := b.newData() - a = NewLargeBinaryData(data) - data.Release() - return -} - -func (b *BinaryBuilder) newData() (data *Data) { - b.appendNextOffset() - offsets, values := b.offsets.Finish(), b.values.Finish() - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, offsets, values}, nil, b.nulls, 0) - if offsets != nil { - offsets.Release() - } - - if values != nil { - values.Release() - } - - b.builder.reset() - - return -} - -func (b *BinaryBuilder) appendNextOffset() { - numBytes := b.values.Len() - debug.Assert(uint64(numBytes) <= b.maxCapacity, "exceeded maximum capacity of binary array") - b.appendOffsetVal(numBytes) -} - -func (b *BinaryBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - if b.dtype.IsUtf8() { - b.Append([]byte(s)) - return nil - } - - decodedVal, err := base64.StdEncoding.DecodeString(s) - if err != nil { - return fmt.Errorf("could not decode base64 string: %w", err) - } - b.Append(decodedVal) - return nil -} - -func (b *BinaryBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case string: - data, err := base64.StdEncoding.DecodeString(v) - if err != nil { - return err - } - b.Append(data) - case []byte: - b.Append(v) - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf([]byte{}), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *BinaryBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *BinaryBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -const ( - dfltBlockSize = 32 << 10 // 32 KB - viewValueSizeLimit int32 = math.MaxInt32 -) - -type BinaryViewBuilder struct { - builder - dtype arrow.BinaryDataType - - data *memory.Buffer - rawData []arrow.ViewHeader - - blockBuilder multiBufferBuilder -} - -func NewBinaryViewBuilder(mem memory.Allocator) *BinaryViewBuilder { - return &BinaryViewBuilder{ - dtype: arrow.BinaryTypes.BinaryView, - builder: builder{ - refCount: 1, - mem: mem, - }, - blockBuilder: multiBufferBuilder{ - refCount: 1, - blockSize: dfltBlockSize, - mem: mem, - }, - } -} - -func (b *BinaryViewBuilder) SetBlockSize(sz uint) { - b.blockBuilder.blockSize = int(sz) -} - -func (b *BinaryViewBuilder) Type() arrow.DataType { return b.dtype } - -func (b *BinaryViewBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) != 0 { - return - } - - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } -} - -func (b *BinaryViewBuilder) init(capacity int) { - b.builder.init(capacity) - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.ViewHeaderTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.ViewHeaderTraits.CastFromBytes(b.data.Bytes()) -} - -func (b *BinaryViewBuilder) Resize(n int) { - nbuild := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - return - } - - b.builder.resize(nbuild, b.init) - b.data.Resize(arrow.ViewHeaderTraits.BytesRequired(n)) - b.rawData = arrow.ViewHeaderTraits.CastFromBytes(b.data.Bytes()) -} - -func (b *BinaryViewBuilder) ReserveData(length int) { - if int32(length) > viewValueSizeLimit { - panic(fmt.Errorf("%w: BinaryView or StringView elements cannot reference strings larger than 2GB", - arrow.ErrInvalid)) - } - b.blockBuilder.Reserve(int(length)) -} - -func (b *BinaryViewBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -func (b *BinaryViewBuilder) Append(v []byte) { - if int32(len(v)) > viewValueSizeLimit { - panic(fmt.Errorf("%w: BinaryView or StringView elements cannot reference strings larger than 2GB", arrow.ErrInvalid)) - } - - if !arrow.IsViewInline(len(v)) { - b.ReserveData(len(v)) - } - - b.Reserve(1) - b.UnsafeAppend(v) -} - -// AppendString is identical to Append, only accepting a string instead -// of a byte slice, avoiding the extra copy that would occur if you simply -// did []byte(v). -// -// This is different than AppendValueFromString which exists for the -// Builder interface, in that this expects raw binary data which is -// appended unmodified. AppendValueFromString expects base64 encoded binary -// data instead. -func (b *BinaryViewBuilder) AppendString(v string) { - // create a []byte without copying the bytes - // in go1.20 this would be unsafe.StringData - val := *(*[]byte)(unsafe.Pointer(&struct { - string - int - }{v, len(v)})) - b.Append(val) -} - -func (b *BinaryViewBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *BinaryViewBuilder) AppendNulls(n int) { - b.Reserve(n) - for i := 0; i < n; i++ { - b.UnsafeAppendBoolToBitmap(false) - } -} - -func (b *BinaryViewBuilder) AppendEmptyValue() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *BinaryViewBuilder) AppendEmptyValues(n int) { - b.Reserve(n) - b.unsafeAppendBoolsToBitmap(nil, n) -} - -func (b *BinaryViewBuilder) UnsafeAppend(v []byte) { - hdr := &b.rawData[b.length] - hdr.SetBytes(v) - if !hdr.IsInline() { - b.blockBuilder.UnsafeAppend(hdr, v) - } - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *BinaryViewBuilder) AppendValues(v [][]byte, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - outOfLineTotal := 0 - for i, vv := range v { - if len(valid) == 0 || valid[i] { - if !arrow.IsViewInline(len(vv)) { - outOfLineTotal += len(vv) - } - } - } - - b.ReserveData(outOfLineTotal) - for i, vv := range v { - if len(valid) == 0 || valid[i] { - hdr := &b.rawData[b.length+i] - hdr.SetBytes(vv) - if !hdr.IsInline() { - b.blockBuilder.UnsafeAppend(hdr, vv) - } - } - } - - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *BinaryViewBuilder) AppendStringValues(v []string, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - outOfLineTotal := 0 - for i, vv := range v { - if len(valid) == 0 || valid[i] { - if !arrow.IsViewInline(len(vv)) { - outOfLineTotal += len(vv) - } - } - } - - b.ReserveData(outOfLineTotal) - for i, vv := range v { - if len(valid) == 0 || valid[i] { - hdr := &b.rawData[b.length+i] - hdr.SetString(vv) - if !hdr.IsInline() { - b.blockBuilder.UnsafeAppendString(hdr, vv) - } - } - } - - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -// AppendValueFromString is paired with ValueStr for fulfilling the -// base Builder interface. This is intended to read in a human-readable -// string such as from CSV or JSON and append it to the array. -// -// For Binary values are expected to be base64 encoded (and will be -// decoded as such before being appended). -func (b *BinaryViewBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - if b.dtype.IsUtf8() { - b.Append([]byte(s)) - return nil - } - - decodedVal, err := base64.StdEncoding.DecodeString(s) - if err != nil { - return fmt.Errorf("could not decode base64 string: %w", err) - } - b.Append(decodedVal) - return nil -} - -func (b *BinaryViewBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case string: - data, err := base64.StdEncoding.DecodeString(v) - if err != nil { - return err - } - b.Append(data) - case []byte: - b.Append(v) - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf([]byte{}), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *BinaryViewBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *BinaryViewBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary view builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -func (b *BinaryViewBuilder) newData() (data *Data) { - bytesRequired := arrow.ViewHeaderTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - - dataBuffers := b.blockBuilder.Finish() - data = NewData(b.dtype, b.length, append([]*memory.Buffer{ - b.nullBitmap, b.data}, dataBuffers...), nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - for _, buf := range dataBuffers { - buf.Release() - } - } - return -} - -func (b *BinaryViewBuilder) NewBinaryViewArray() (a *BinaryView) { - data := b.newData() - a = NewBinaryViewData(data) - data.Release() - return -} - -func (b *BinaryViewBuilder) NewArray() arrow.Array { - return b.NewBinaryViewArray() -} - -var ( - _ Builder = (*BinaryBuilder)(nil) - _ Builder = (*BinaryViewBuilder)(nil) -) diff --git a/go/arrow/array/binarybuilder_test.go b/go/arrow/array/binarybuilder_test.go deleted file mode 100644 index 65d5c7385df4c..0000000000000 --- a/go/arrow/array/binarybuilder_test.go +++ /dev/null @@ -1,151 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "bytes" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestBinaryBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - - exp := [][]byte{[]byte("foo"), []byte("bar"), nil, []byte("sydney"), []byte("cameron")} - for _, v := range exp { - if v == nil { - ab.AppendNull() - } else { - ab.Append(v) - } - } - - assert.Equal(t, len(exp), ab.Len(), "unexpected Len()") - assert.Equal(t, 1, ab.NullN(), "unexpected NullN()") - - for i, v := range exp { - if v == nil { - v = []byte{} - } - assert.Equal(t, v, ab.Value(i), "unexpected BinaryArrayBuilder.Value(%d)", i) - } - // Zm9v is foo in base64 - assert.NoError(t, ab.AppendValueFromString("Zm9v")) - - ar := ab.NewBinaryArray() - assert.Equal(t, "Zm9v", ar.ValueStr(5)) - - ab.Release() - ar.Release() - - // check state of builder after NewBinaryArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") -} - -func TestBinaryBuilder_ReserveData(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - - // call ReserveData and ensure the capacity doesn't change - // when appending entries until that count. - ab.ReserveData(256) - expCap := ab.DataCap() - for i := 0; i < 256/8; i++ { - ab.Append(bytes.Repeat([]byte("a"), 8)) - } - assert.Equal(t, expCap, ab.DataCap(), "unexpected BinaryArrayBuilder.DataCap()") - - ar := ab.NewBinaryArray() - ab.Release() - ar.Release() - - // check state of builder after NewBinaryArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") -} - -func TestBinaryBuilderLarge(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - - exp := [][]byte{[]byte("foo"), []byte("bar"), nil, []byte("sydney"), []byte("cameron")} - for _, v := range exp { - if v == nil { - ab.AppendNull() - } else { - ab.Append(v) - } - } - - assert.Equal(t, len(exp), ab.Len(), "unexpected Len()") - assert.Equal(t, 1, ab.NullN(), "unexpected NullN()") - - for i, v := range exp { - if v == nil { - v = []byte{} - } - assert.Equal(t, v, ab.Value(i), "unexpected BinaryArrayBuilder.Value(%d)", i) - } - - ar := ab.NewLargeBinaryArray() - ab.Release() - ar.Release() - - // check state of builder after NewBinaryArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") -} - -func TestBinaryBuilderLarge_ReserveData(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - - // call ReserveData and ensure the capacity doesn't change - // when appending entries until that count. - ab.ReserveData(256) - expCap := ab.DataCap() - for i := 0; i < 256/8; i++ { - ab.Append(bytes.Repeat([]byte("a"), 8)) - } - assert.Equal(t, expCap, ab.DataCap(), "unexpected BinaryArrayBuilder.DataCap()") - - ar := ab.NewLargeBinaryArray() - ab.Release() - ar.Release() - - // check state of builder after NewBinaryArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewBinaryArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewBinaryArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewBinaryArray did not reset state") -} diff --git a/go/arrow/array/boolean.go b/go/arrow/array/boolean.go deleted file mode 100644 index eab26d273dd96..0000000000000 --- a/go/arrow/array/boolean.go +++ /dev/null @@ -1,126 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "strconv" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A type which represents an immutable sequence of boolean values. -type Boolean struct { - array - values []byte -} - -// NewBoolean creates a boolean array from the data memory.Buffer and contains length elements. -// The nullBitmap buffer can be nil of there are no null values. -// If nulls is not known, use UnknownNullCount to calculate the value of NullN at runtime from the nullBitmap buffer. -func NewBoolean(length int, data *memory.Buffer, nullBitmap *memory.Buffer, nulls int) *Boolean { - arrdata := NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nullBitmap, data}, nil, nulls, 0) - defer arrdata.Release() - return NewBooleanData(arrdata) -} - -func NewBooleanData(data arrow.ArrayData) *Boolean { - a := &Boolean{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *Boolean) Value(i int) bool { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - return bitutil.BitIsSet(a.values, a.array.data.offset+i) -} - -func (a *Boolean) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } else { - return strconv.FormatBool(a.Value(i)) - } -} - -func (a *Boolean) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Boolean) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = vals.Bytes() - } -} - -func (a *Boolean) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.Value(i) - } - return nil -} - -func (a *Boolean) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.Value(i) - } else { - vals[i] = nil - } - } - return json.Marshal(vals) -} - -func arrayEqualBoolean(left, right *Boolean) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -var ( - _ arrow.Array = (*Boolean)(nil) -) diff --git a/go/arrow/array/boolean_test.go b/go/arrow/array/boolean_test.go deleted file mode 100644 index f980497d54521..0000000000000 --- a/go/arrow/array/boolean_test.go +++ /dev/null @@ -1,322 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "reflect" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestBooleanSliceData(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - values := []bool{true, false, true, true, true, true, true, false, true, false} - - b := array.NewBooleanBuilder(pool) - defer b.Release() - - for _, v := range values { - b.Append(v) - } - - arr := b.NewArray().(*array.Boolean) - defer arr.Release() - - if got, want := arr.Len(), len(values); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]bool, arr.Len()) - - for i := range vs { - vs[i] = arr.Value(i) - } - - if got, want := vs, values; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - tests := []struct { - interval [2]int64 - want []bool - }{ - { - interval: [2]int64{0, 0}, - want: []bool{}, - }, - { - interval: [2]int64{10, 10}, - want: []bool{}, - }, - { - interval: [2]int64{0, 5}, - want: []bool{true, false, true, true, true}, - }, - { - interval: [2]int64{5, 10}, - want: []bool{true, true, false, true, false}, - }, - { - interval: [2]int64{2, 7}, - want: []bool{true, true, true, true, true}, - }, - } - - for _, tc := range tests { - t.Run("", func(t *testing.T) { - - slice := array.NewSlice(arr, tc.interval[0], tc.interval[1]).(*array.Boolean) - defer slice.Release() - - if got, want := slice.Len(), len(tc.want); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]bool, slice.Len()) - - for i := range vs { - vs[i] = slice.Value(i) - } - - if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - }) - } -} - -func TestBooleanSliceDataWithNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - values := []bool{true, false, true, false, false, false, true, false, true, false} - valids := []bool{true, false, true, true, true, true, true, false, true, true} - - b := array.NewBooleanBuilder(pool) - defer b.Release() - - b.AppendValues(values, valids) - - arr := b.NewArray().(*array.Boolean) - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 2; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]bool, arr.Len()) - - for i := range vs { - vs[i] = arr.Value(i) - } - - if got, want := vs, values; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - tests := []struct { - interval [2]int64 - nulls int - want []bool - }{ - { - interval: [2]int64{2, 9}, - nulls: 1, - want: []bool{true, false, false, false, true, false, true}, - }, - { - interval: [2]int64{0, 7}, - nulls: 1, - want: []bool{true, false, true, false, false, false, true}, - }, - { - interval: [2]int64{1, 8}, - nulls: 2, - want: []bool{false, true, false, false, false, true, false}, - }, - { - interval: [2]int64{2, 7}, - nulls: 0, - want: []bool{true, false, false, false, true}, - }, - } - - for _, tc := range tests { - t.Run("", func(t *testing.T) { - - slice := array.NewSlice(arr, tc.interval[0], tc.interval[1]).(*array.Boolean) - defer slice.Release() - - if got, want := slice.NullN(), tc.nulls; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - if got, want := slice.Len(), len(tc.want); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - vs := make([]bool, slice.Len()) - - for i := range vs { - vs[i] = slice.Value(i) - } - - if got, want := vs, tc.want; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - }) - } -} - -func TestBooleanSliceOutOfBounds(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - values := []bool{true, false, true, false, true, false, true, false, true, false} - - b := array.NewBooleanBuilder(pool) - defer b.Release() - - for _, v := range values { - b.Append(v) - } - - arr := b.NewArray().(*array.Boolean) - defer arr.Release() - - slice := array.NewSlice(arr, 3, 8).(*array.Boolean) - defer slice.Release() - - tests := []struct { - index int - panic bool - }{ - { - index: -1, - panic: true, - }, - { - index: 5, - panic: true, - }, - { - index: 0, - panic: false, - }, - { - index: 4, - panic: false, - }, - } - - for _, tc := range tests { - t.Run("", func(t *testing.T) { - - var val bool - - if tc.panic { - defer func() { - e := recover() - if e == nil { - t.Fatalf("this should have panicked, but did not; slice value %v", val) - } - if got, want := e.(string), "arrow/array: index out of range"; got != want { - t.Fatalf("invalid error. got=%q, want=%q", got, want) - } - }() - } else { - defer func() { - if e := recover(); e != nil { - t.Fatalf("unexpected panic: %v", e) - } - }() - } - - val = slice.Value(tc.index) - }) - } -} - -func TestBooleanStringer(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - values = []bool{true, false, true, false, true, false, true, false, true, false} - valids = []bool{true, true, false, true, true, true, false, true, true, true} - ) - - b := array.NewBooleanBuilder(pool) - defer b.Release() - - b.AppendValues(values, valids) - - arr := b.NewArray().(*array.Boolean) - defer arr.Release() - - out := new(strings.Builder) - fmt.Fprintf(out, "%v", arr) - - const want = "[true false (null) false true false (null) false true false]" - if got := out.String(); got != want { - t.Fatalf("invalid stringer:\ngot= %q\nwant=%q", got, want) - } - assert.Equal(t, "true", arr.ValueStr(0)) - assert.Equal(t, "false", arr.ValueStr(1)) - assert.Equal(t, array.NullValueStr, arr.ValueStr(2)) -} - -func TestBooleanStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []bool{true, false, true, true, true, true, true, false, true, false} - valid := []bool{true, false, false, true, false, true, true, false, true, false} - - b := array.NewBooleanBuilder(mem) - defer b.Release() - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.Boolean) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewBooleanBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Boolean) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} diff --git a/go/arrow/array/booleanbuilder.go b/go/arrow/array/booleanbuilder.go deleted file mode 100644 index 44d33018f94ea..0000000000000 --- a/go/arrow/array/booleanbuilder.go +++ /dev/null @@ -1,263 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "reflect" - "strconv" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type BooleanBuilder struct { - builder - - data *memory.Buffer - rawData []byte -} - -func NewBooleanBuilder(mem memory.Allocator) *BooleanBuilder { - return &BooleanBuilder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *BooleanBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.Boolean } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (b *BooleanBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *BooleanBuilder) Append(v bool) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *BooleanBuilder) AppendByte(v byte) { - b.Reserve(1) - b.UnsafeAppend(v != 0) -} - -func (b *BooleanBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *BooleanBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *BooleanBuilder) AppendEmptyValue() { - b.Reserve(1) - b.UnsafeAppend(false) -} - -func (b *BooleanBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *BooleanBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - val, err := strconv.ParseBool(s) - if err != nil { - return err - } - b.Append(val) - return nil -} - -func (b *BooleanBuilder) UnsafeAppend(v bool) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - if v { - bitutil.SetBit(b.rawData, b.length) - } else { - bitutil.ClearBit(b.rawData, b.length) - } - b.length++ -} - -func (b *BooleanBuilder) AppendValues(v []bool, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - for i, vv := range v { - bitutil.SetBitTo(b.rawData, b.length+i, vv) - } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *BooleanBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.BooleanTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = b.data.Bytes() -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *BooleanBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *BooleanBuilder) Resize(n int) { - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(n, b.init) - b.data.Resize(arrow.BooleanTraits.BytesRequired(n)) - b.rawData = b.data.Bytes() - } -} - -// NewArray creates a Boolean array from the memory buffers used by the builder and resets the BooleanBuilder -// so it can be used to build a new array. -func (b *BooleanBuilder) NewArray() arrow.Array { - return b.NewBooleanArray() -} - -// NewBooleanArray creates a Boolean array from the memory buffers used by the builder and resets the BooleanBuilder -// so it can be used to build a new array. -func (b *BooleanBuilder) NewBooleanArray() (a *Boolean) { - data := b.newData() - a = NewBooleanData(data) - data.Release() - return -} - -func (b *BooleanBuilder) newData() *Data { - bytesRequired := arrow.BooleanTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - res := NewData(arrow.FixedWidthTypes.Boolean, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return res -} - -func (b *BooleanBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case bool: - b.Append(v) - case string: - val, err := strconv.ParseBool(v) - if err != nil { - return err - } - b.Append(val) - case json.Number: - val, err := strconv.ParseBool(v.String()) - if err != nil { - return err - } - b.Append(val) - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(true), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *BooleanBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *BooleanBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - dec.UseNumber() - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("boolean builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -func (b *BooleanBuilder) Value(i int) bool { - return bitutil.BitIsSet(b.rawData, i) -} - -var ( - _ Builder = (*BooleanBuilder)(nil) -) diff --git a/go/arrow/array/booleanbuilder_test.go b/go/arrow/array/booleanbuilder_test.go deleted file mode 100644 index 42e49f95a2f3e..0000000000000 --- a/go/arrow/array/booleanbuilder_test.go +++ /dev/null @@ -1,103 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestBooleanBuilder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewBooleanBuilder(mem) - - exp := tools.Bools(1, 1, 0, 1, 1, 0) - - b.AppendValues(exp, nil) - assert.NoError(t, b.AppendValueFromString("true")) - assert.NoError(t, b.AppendValueFromString("false")) - exp = tools.Bools(1, 1, 0, 1, 1, 0, 1, 0) - - got := make([]bool, len(exp)) - // make sure we can read the values directly from the builder. - for i := 0; i < b.Len(); i++ { - got[i] = b.Value(i) - } - assert.Equal(t, exp, got) - - got = make([]bool, len(exp)) // reset - - a := b.NewBooleanArray() - b.Release() - for i := 0; i < a.Len(); i++ { - got[i] = a.Value(i) - } - assert.Equal(t, exp, got) - - a.Release() -} - -func TestBooleanBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewBooleanBuilder(mem) - defer ab.Release() - - want := tools.Bools(1, 1, 0, 1, 1, 0, 1, 0) - - boolValues := func(a *array.Boolean) []bool { - vs := make([]bool, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - ab.AppendValues([]bool{}, nil) - a := ab.NewBooleanArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewBooleanArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(want, nil) - a = ab.NewBooleanArray() - assert.Equal(t, want, boolValues(a)) - a.Release() - - ab.AppendValues([]bool{}, nil) - ab.AppendValues(want, nil) - a = ab.NewBooleanArray() - assert.Equal(t, want, boolValues(a)) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]bool{}, nil) - a = ab.NewBooleanArray() - assert.Equal(t, want, boolValues(a)) - a.Release() -} diff --git a/go/arrow/array/bufferbuilder.go b/go/arrow/array/bufferbuilder.go deleted file mode 100644 index 037d220f0b141..0000000000000 --- a/go/arrow/array/bufferbuilder.go +++ /dev/null @@ -1,261 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -type bufBuilder interface { - Retain() - Release() - Len() int - Cap() int - Bytes() []byte - resize(int) - Advance(int) - SetLength(int) - Append([]byte) - Reset() - Finish() *memory.Buffer -} - -// A bufferBuilder provides common functionality for populating memory with a sequence of type-specific values. -// Specialized implementations provide type-safe APIs for appending and accessing the memory. -type bufferBuilder struct { - refCount int64 - mem memory.Allocator - buffer *memory.Buffer - length int - capacity int - - bytes []byte -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (b *bufferBuilder) Retain() { - atomic.AddInt64(&b.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (b *bufferBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.buffer != nil { - b.buffer.Release() - b.buffer, b.bytes = nil, nil - } - } -} - -// Len returns the length of the memory buffer in bytes. -func (b *bufferBuilder) Len() int { return b.length } - -// Cap returns the total number of bytes that can be stored without allocating additional memory. -func (b *bufferBuilder) Cap() int { return b.capacity } - -// Bytes returns a slice of length b.Len(). -// The slice is only valid for use until the next buffer modification. That is, until the next call -// to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next -// buffer modification. -func (b *bufferBuilder) Bytes() []byte { return b.bytes[:b.length] } - -func (b *bufferBuilder) resize(elements int) { - if b.buffer == nil { - b.buffer = memory.NewResizableBuffer(b.mem) - } - - b.buffer.ResizeNoShrink(elements) - oldCapacity := b.capacity - b.capacity = b.buffer.Cap() - b.bytes = b.buffer.Buf() - - if b.capacity > oldCapacity { - memory.Set(b.bytes[oldCapacity:], 0) - } -} - -func (b *bufferBuilder) SetLength(length int) { - if length > b.length { - b.Advance(length) - return - } - - b.length = length -} - -// Advance increases the buffer by length and initializes the skipped bytes to zero. -func (b *bufferBuilder) Advance(length int) { - if b.capacity < b.length+length { - newCapacity := bitutil.NextPowerOf2(b.length + length) - b.resize(newCapacity) - } - b.length += length -} - -// Append appends the contents of v to the buffer, resizing it if necessary. -func (b *bufferBuilder) Append(v []byte) { - if b.capacity < b.length+len(v) { - newCapacity := bitutil.NextPowerOf2(b.length + len(v)) - b.resize(newCapacity) - } - b.unsafeAppend(v) -} - -// Reset returns the buffer to an empty state. Reset releases the memory and sets the length and capacity to zero. -func (b *bufferBuilder) Reset() { - if b.buffer != nil { - b.buffer.Release() - } - b.buffer, b.bytes = nil, nil - b.capacity, b.length = 0, 0 -} - -// Finish TODO(sgc) -func (b *bufferBuilder) Finish() (buffer *memory.Buffer) { - if b.length > 0 { - b.buffer.ResizeNoShrink(b.length) - } - buffer = b.buffer - b.buffer = nil - b.Reset() - if buffer == nil { - buffer = memory.NewBufferBytes(nil) - } - return -} - -func (b *bufferBuilder) unsafeAppend(data []byte) { - copy(b.bytes[b.length:], data) - b.length += len(data) -} - -type multiBufferBuilder struct { - refCount int64 - blockSize int - - mem memory.Allocator - blocks []*memory.Buffer - currentOutBuffer int -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (b *multiBufferBuilder) Retain() { - atomic.AddInt64(&b.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (b *multiBufferBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - b.Reset() - } -} - -func (b *multiBufferBuilder) Reserve(nbytes int) { - if len(b.blocks) == 0 { - out := memory.NewResizableBuffer(b.mem) - if nbytes < b.blockSize { - nbytes = b.blockSize - } - out.Reserve(nbytes) - b.currentOutBuffer = 0 - b.blocks = []*memory.Buffer{out} - return - } - - curBuf := b.blocks[b.currentOutBuffer] - remain := curBuf.Cap() - curBuf.Len() - if nbytes <= remain { - return - } - - // search for underfull block that has enough bytes - for i, block := range b.blocks { - remaining := block.Cap() - block.Len() - if nbytes <= remaining { - b.currentOutBuffer = i - return - } - } - - // current buffer doesn't have enough space, no underfull buffers - // make new buffer and set that as our current. - newBuf := memory.NewResizableBuffer(b.mem) - if nbytes < b.blockSize { - nbytes = b.blockSize - } - - newBuf.Reserve(nbytes) - b.currentOutBuffer = len(b.blocks) - b.blocks = append(b.blocks, newBuf) -} - -func (b *multiBufferBuilder) RemainingBytes() int { - if len(b.blocks) == 0 { - return 0 - } - - buf := b.blocks[b.currentOutBuffer] - return buf.Cap() - buf.Len() -} - -func (b *multiBufferBuilder) Reset() { - b.currentOutBuffer = 0 - for _, block := range b.Finish() { - block.Release() - } -} - -func (b *multiBufferBuilder) UnsafeAppend(hdr *arrow.ViewHeader, val []byte) { - buf := b.blocks[b.currentOutBuffer] - idx, offset := b.currentOutBuffer, buf.Len() - hdr.SetIndexOffset(int32(idx), int32(offset)) - - n := copy(buf.Buf()[offset:], val) - buf.ResizeNoShrink(offset + n) -} - -func (b *multiBufferBuilder) UnsafeAppendString(hdr *arrow.ViewHeader, val string) { - // create a byte slice with zero-copies - // in go1.20 this would be equivalent to unsafe.StringData - v := *(*[]byte)(unsafe.Pointer(&struct { - string - int - }{val, len(val)})) - b.UnsafeAppend(hdr, v) -} - -func (b *multiBufferBuilder) Finish() (out []*memory.Buffer) { - b.currentOutBuffer = 0 - out, b.blocks = b.blocks, nil - return -} diff --git a/go/arrow/array/bufferbuilder_byte.go b/go/arrow/array/bufferbuilder_byte.go deleted file mode 100644 index 2ac7ec703b579..0000000000000 --- a/go/arrow/array/bufferbuilder_byte.go +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import "github.com/apache/arrow/go/v18/arrow/memory" - -type byteBufferBuilder struct { - bufferBuilder -} - -func newByteBufferBuilder(mem memory.Allocator) *byteBufferBuilder { - return &byteBufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} -} - -func (b *byteBufferBuilder) Values() []byte { return b.Bytes() } -func (b *byteBufferBuilder) Value(i int) byte { return b.bytes[i] } diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go b/go/arrow/array/bufferbuilder_numeric.gen.go deleted file mode 100644 index 5215ecf65a312..0000000000000 --- a/go/arrow/array/bufferbuilder_numeric.gen.go +++ /dev/null @@ -1,124 +0,0 @@ -// Code generated by array/bufferbuilder_numeric.gen.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -type int64BufferBuilder struct { - bufferBuilder -} - -func newInt64BufferBuilder(mem memory.Allocator) *int64BufferBuilder { - return &int64BufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} -} - -// AppendValues appends the contents of v to the buffer, growing the buffer as needed. -func (b *int64BufferBuilder) AppendValues(v []int64) { b.Append(arrow.Int64Traits.CastToBytes(v)) } - -// Values returns a slice of length b.Len(). -// The slice is only valid for use until the next buffer modification. That is, until the next call -// to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next -// buffer modification. -func (b *int64BufferBuilder) Values() []int64 { return arrow.Int64Traits.CastFromBytes(b.Bytes()) } - -// Value returns the int64 element at the index i. Value will panic if i is negative or ≥ Len. -func (b *int64BufferBuilder) Value(i int) int64 { return b.Values()[i] } - -// Len returns the number of int64 elements in the buffer. -func (b *int64BufferBuilder) Len() int { return b.length / arrow.Int64SizeBytes } - -// AppendValue appends v to the buffer, growing the buffer as needed. -func (b *int64BufferBuilder) AppendValue(v int64) { - if b.capacity < b.length+arrow.Int64SizeBytes { - newCapacity := bitutil.NextPowerOf2(b.length + arrow.Int64SizeBytes) - b.resize(newCapacity) - } - arrow.Int64Traits.PutValue(b.bytes[b.length:], v) - b.length += arrow.Int64SizeBytes -} - -type int32BufferBuilder struct { - bufferBuilder -} - -func newInt32BufferBuilder(mem memory.Allocator) *int32BufferBuilder { - return &int32BufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} -} - -// AppendValues appends the contents of v to the buffer, growing the buffer as needed. -func (b *int32BufferBuilder) AppendValues(v []int32) { b.Append(arrow.Int32Traits.CastToBytes(v)) } - -// Values returns a slice of length b.Len(). -// The slice is only valid for use until the next buffer modification. That is, until the next call -// to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next -// buffer modification. -func (b *int32BufferBuilder) Values() []int32 { return arrow.Int32Traits.CastFromBytes(b.Bytes()) } - -// Value returns the int32 element at the index i. Value will panic if i is negative or ≥ Len. -func (b *int32BufferBuilder) Value(i int) int32 { return b.Values()[i] } - -// Len returns the number of int32 elements in the buffer. -func (b *int32BufferBuilder) Len() int { return b.length / arrow.Int32SizeBytes } - -// AppendValue appends v to the buffer, growing the buffer as needed. -func (b *int32BufferBuilder) AppendValue(v int32) { - if b.capacity < b.length+arrow.Int32SizeBytes { - newCapacity := bitutil.NextPowerOf2(b.length + arrow.Int32SizeBytes) - b.resize(newCapacity) - } - arrow.Int32Traits.PutValue(b.bytes[b.length:], v) - b.length += arrow.Int32SizeBytes -} - -type int8BufferBuilder struct { - bufferBuilder -} - -func newInt8BufferBuilder(mem memory.Allocator) *int8BufferBuilder { - return &int8BufferBuilder{bufferBuilder: bufferBuilder{refCount: 1, mem: mem}} -} - -// AppendValues appends the contents of v to the buffer, growing the buffer as needed. -func (b *int8BufferBuilder) AppendValues(v []int8) { b.Append(arrow.Int8Traits.CastToBytes(v)) } - -// Values returns a slice of length b.Len(). -// The slice is only valid for use until the next buffer modification. That is, until the next call -// to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next -// buffer modification. -func (b *int8BufferBuilder) Values() []int8 { return arrow.Int8Traits.CastFromBytes(b.Bytes()) } - -// Value returns the int8 element at the index i. Value will panic if i is negative or ≥ Len. -func (b *int8BufferBuilder) Value(i int) int8 { return b.Values()[i] } - -// Len returns the number of int8 elements in the buffer. -func (b *int8BufferBuilder) Len() int { return b.length / arrow.Int8SizeBytes } - -// AppendValue appends v to the buffer, growing the buffer as needed. -func (b *int8BufferBuilder) AppendValue(v int8) { - if b.capacity < b.length+arrow.Int8SizeBytes { - newCapacity := bitutil.NextPowerOf2(b.length + arrow.Int8SizeBytes) - b.resize(newCapacity) - } - arrow.Int8Traits.PutValue(b.bytes[b.length:], v) - b.length += arrow.Int8SizeBytes -} diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl b/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl deleted file mode 100644 index 2b7fcaefcdeb2..0000000000000 --- a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -{{range .In}} -{{$TypeNamePrefix := .name}} -{{if .Opt.BufferBuilder}} -type {{$TypeNamePrefix}}BufferBuilder struct { - bufferBuilder -} - -func new{{.Name}}BufferBuilder(mem memory.Allocator) *{{$TypeNamePrefix}}BufferBuilder { - return &{{$TypeNamePrefix}}BufferBuilder{bufferBuilder:bufferBuilder{refCount: 1, mem:mem}} -} - -// AppendValues appends the contents of v to the buffer, growing the buffer as needed. -func (b *{{$TypeNamePrefix}}BufferBuilder) AppendValues(v []{{.Type}}) { b.Append(arrow.{{.Name}}Traits.CastToBytes(v)) } - -// Values returns a slice of length b.Len(). -// The slice is only valid for use until the next buffer modification. That is, until the next call -// to Advance, Reset, Finish or any Append function. The slice aliases the buffer content at least until the next -// buffer modification. -func (b *{{$TypeNamePrefix}}BufferBuilder) Values() []{{.Type}} { return arrow.{{.Name}}Traits.CastFromBytes(b.Bytes()) } - -// Value returns the {{.Type}} element at the index i. Value will panic if i is negative or ≥ Len. -func (b *{{$TypeNamePrefix}}BufferBuilder) Value(i int) {{.Type}} { return b.Values()[i] } - -// Len returns the number of {{.Type}} elements in the buffer. -func (b *{{$TypeNamePrefix}}BufferBuilder) Len() int { return b.length/arrow.{{.Name}}SizeBytes } - -// AppendValue appends v to the buffer, growing the buffer as needed. -func (b *{{$TypeNamePrefix}}BufferBuilder) AppendValue(v {{.Type}}) { - if b.capacity < b.length+arrow.{{.Name}}SizeBytes { - newCapacity := bitutil.NextPowerOf2(b.length + arrow.{{.Name}}SizeBytes) - b.resize(newCapacity) - } - arrow.{{.Name}}Traits.PutValue(b.bytes[b.length:], v) - b.length+=arrow.{{.Name}}SizeBytes -} -{{end}} -{{end}} diff --git a/go/arrow/array/bufferbuilder_numeric_test.go b/go/arrow/array/bufferbuilder_numeric_test.go deleted file mode 100644 index 3c947c87eeaac..0000000000000 --- a/go/arrow/array/bufferbuilder_numeric_test.go +++ /dev/null @@ -1,106 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "testing" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow/endian" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestInt32BufferBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - bb := newInt32BufferBuilder(mem) - exp := []int32{0x01020304, 0x05060708, 0x090a0b0c, 0x0d0e0f01, 0x02030405, 0x06070809} - bb.AppendValues(exp[:3]) - bb.AppendValues(exp[3:]) - - var expBuf []byte - if endian.IsBigEndian { - expBuf = []byte{ - 0x01, 0x02, 0x03, 0x04, - 0x05, 0x06, 0x07, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, - 0x0d, 0x0e, 0x0f, 0x01, - 0x02, 0x03, 0x04, 0x05, - 0x06, 0x07, 0x08, 0x09, - } - } else { - expBuf = []byte{ - 0x04, 0x03, 0x02, 0x01, - 0x08, 0x07, 0x06, 0x05, - 0x0c, 0x0b, 0x0a, 0x09, - 0x01, 0x0f, 0x0e, 0x0d, - 0x05, 0x04, 0x03, 0x02, - 0x09, 0x08, 0x07, 0x06, - } - } - assert.Equal(t, expBuf, bb.Bytes(), "unexpected byte values") - assert.Equal(t, exp, bb.Values(), "unexpected int32 values") - assert.Equal(t, len(exp), bb.Len(), "unexpected Len()") - - buflen := bb.Len() - bfr := bb.Finish() - assert.Equal(t, buflen*int(unsafe.Sizeof(int32(0))), bfr.Len(), "Buffer was not resized") - assert.Len(t, bfr.Bytes(), bfr.Len(), "Buffer.Bytes() != Buffer.Len()") - bfr.Release() - - assert.Len(t, bb.Bytes(), 0, "BufferBuilder was not reset after Finish") - assert.Zero(t, bb.Len(), "BufferBuilder was not reset after Finish") - bb.Release() -} - -func TestInt32BufferBuilder_AppendValue(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - bb := newInt32BufferBuilder(mem) - exp := []int32{0x01020304, 0x05060708, 0x090a0b0c, 0x0d0e0f01, 0x02030405, 0x06070809} - for _, v := range exp { - bb.AppendValue(v) - } - - var expBuf []byte - if endian.IsBigEndian { - expBuf = []byte{ - 0x01, 0x02, 0x03, 0x04, - 0x05, 0x06, 0x07, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, - 0x0d, 0x0e, 0x0f, 0x01, - 0x02, 0x03, 0x04, 0x05, - 0x06, 0x07, 0x08, 0x09, - } - } else { - expBuf = []byte{ - 0x04, 0x03, 0x02, 0x01, - 0x08, 0x07, 0x06, 0x05, - 0x0c, 0x0b, 0x0a, 0x09, - 0x01, 0x0f, 0x0e, 0x0d, - 0x05, 0x04, 0x03, 0x02, - 0x09, 0x08, 0x07, 0x06, - } - } - assert.Equal(t, expBuf, bb.Bytes(), "unexpected byte values") - assert.Equal(t, exp, bb.Values(), "unexpected int32 values") - assert.Equal(t, len(exp), bb.Len(), "unexpected Len()") - bb.Release() -} diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go deleted file mode 100644 index 1f4d0ea963509..0000000000000 --- a/go/arrow/array/builder.go +++ /dev/null @@ -1,374 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -const ( - minBuilderCapacity = 1 << 5 -) - -// Builder provides an interface to build arrow arrays. -type Builder interface { - // you can unmarshal a json array to add the values to a builder - json.Unmarshaler - - // Type returns the datatype that this is building - Type() arrow.DataType - - // Retain increases the reference count by 1. - // Retain may be called simultaneously from multiple goroutines. - Retain() - - // Release decreases the reference count by 1. - Release() - - // Len returns the number of elements in the array builder. - Len() int - - // Cap returns the total number of elements that can be stored - // without allocating additional memory. - Cap() int - - // NullN returns the number of null values in the array builder. - NullN() int - - // AppendNull adds a new null value to the array being built. - AppendNull() - - // AppendNulls adds new n null values to the array being built. - AppendNulls(n int) - - // AppendEmptyValue adds a new zero value of the appropriate type - AppendEmptyValue() - - // AppendEmptyValues adds new n zero values of the appropriate type - AppendEmptyValues(n int) - - // AppendValueFromString adds a new value from a string. Inverse of array.ValueStr(i int) string - AppendValueFromString(string) error - - // Reserve ensures there is enough space for appending n elements - // by checking the capacity and calling Resize if necessary. - Reserve(n int) - - // Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), - // additional memory will be allocated. If n is smaller, the allocated memory may reduced. - Resize(n int) - - // NewArray creates a new array from the memory buffers used - // by the builder and resets the Builder so it can be used to build - // a new array. - NewArray() arrow.Array - - // IsNull returns if a previously appended value at a given index is null or not. - IsNull(i int) bool - - // SetNull sets the value at index i to null. - SetNull(i int) - - UnsafeAppendBoolToBitmap(bool) - - init(capacity int) - resize(newBits int, init func(int)) - - UnmarshalOne(*json.Decoder) error - Unmarshal(*json.Decoder) error - - newData() *Data -} - -// builder provides common functionality for managing the validity bitmap (nulls) when building arrays. -type builder struct { - refCount int64 - mem memory.Allocator - nullBitmap *memory.Buffer - nulls int - length int - capacity int -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (b *builder) Retain() { - atomic.AddInt64(&b.refCount, 1) -} - -// Len returns the number of elements in the array builder. -func (b *builder) Len() int { return b.length } - -// Cap returns the total number of elements that can be stored without allocating additional memory. -func (b *builder) Cap() int { return b.capacity } - -// NullN returns the number of null values in the array builder. -func (b *builder) NullN() int { return b.nulls } - -func (b *builder) IsNull(i int) bool { - return b.nullBitmap.Len() != 0 && bitutil.BitIsNotSet(b.nullBitmap.Bytes(), i) -} - -func (b *builder) SetNull(i int) { - if i < 0 || i >= b.length { - panic("arrow/array: index out of range") - } - bitutil.ClearBit(b.nullBitmap.Bytes(), i) -} - -func (b *builder) init(capacity int) { - toAlloc := bitutil.CeilByte(capacity) / 8 - b.nullBitmap = memory.NewResizableBuffer(b.mem) - b.nullBitmap.Resize(toAlloc) - b.capacity = capacity - memory.Set(b.nullBitmap.Buf(), 0) -} - -func (b *builder) reset() { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - - b.nulls = 0 - b.length = 0 - b.capacity = 0 -} - -func (b *builder) resize(newBits int, init func(int)) { - if b.nullBitmap == nil { - init(newBits) - return - } - - newBytesN := bitutil.CeilByte(newBits) / 8 - oldBytesN := b.nullBitmap.Len() - b.nullBitmap.Resize(newBytesN) - b.capacity = newBits - if oldBytesN < newBytesN { - // TODO(sgc): necessary? - memory.Set(b.nullBitmap.Buf()[oldBytesN:], 0) - } - if newBits < b.length { - b.length = newBits - b.nulls = newBits - bitutil.CountSetBits(b.nullBitmap.Buf(), 0, newBits) - } -} - -func (b *builder) reserve(elements int, resize func(int)) { - if b.nullBitmap == nil { - b.nullBitmap = memory.NewResizableBuffer(b.mem) - } - if b.length+elements > b.capacity { - newCap := bitutil.NextPowerOf2(b.length + elements) - resize(newCap) - } -} - -// unsafeAppendBoolsToBitmap appends the contents of valid to the validity bitmap. -// As an optimization, if the valid slice is empty, the next length bits will be set to valid (not null). -func (b *builder) unsafeAppendBoolsToBitmap(valid []bool, length int) { - if len(valid) == 0 { - b.unsafeSetValid(length) - return - } - - byteOffset := b.length / 8 - bitOffset := byte(b.length % 8) - nullBitmap := b.nullBitmap.Bytes() - bitSet := nullBitmap[byteOffset] - - for _, v := range valid { - if bitOffset == 8 { - bitOffset = 0 - nullBitmap[byteOffset] = bitSet - byteOffset++ - bitSet = nullBitmap[byteOffset] - } - - if v { - bitSet |= bitutil.BitMask[bitOffset] - } else { - bitSet &= bitutil.FlippedBitMask[bitOffset] - b.nulls++ - } - bitOffset++ - } - - if bitOffset != 0 { - nullBitmap[byteOffset] = bitSet - } - b.length += len(valid) -} - -// unsafeSetValid sets the next length bits to valid in the validity bitmap. -func (b *builder) unsafeSetValid(length int) { - padToByte := min(8-(b.length%8), length) - if padToByte == 8 { - padToByte = 0 - } - bits := b.nullBitmap.Bytes() - for i := b.length; i < b.length+padToByte; i++ { - bitutil.SetBit(bits, i) - } - - start := (b.length + padToByte) / 8 - fastLength := (length - padToByte) / 8 - memory.Set(bits[start:start+fastLength], 0xff) - - newLength := b.length + length - // trailing bytes - for i := b.length + padToByte + (fastLength * 8); i < newLength; i++ { - bitutil.SetBit(bits, i) - } - - b.length = newLength -} - -func (b *builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder { - // FIXME(sbinet): use a type switch on dtype instead? - switch dtype.ID() { - case arrow.NULL: - return NewNullBuilder(mem) - case arrow.BOOL: - return NewBooleanBuilder(mem) - case arrow.UINT8: - return NewUint8Builder(mem) - case arrow.INT8: - return NewInt8Builder(mem) - case arrow.UINT16: - return NewUint16Builder(mem) - case arrow.INT16: - return NewInt16Builder(mem) - case arrow.UINT32: - return NewUint32Builder(mem) - case arrow.INT32: - return NewInt32Builder(mem) - case arrow.UINT64: - return NewUint64Builder(mem) - case arrow.INT64: - return NewInt64Builder(mem) - case arrow.FLOAT16: - return NewFloat16Builder(mem) - case arrow.FLOAT32: - return NewFloat32Builder(mem) - case arrow.FLOAT64: - return NewFloat64Builder(mem) - case arrow.STRING: - return NewStringBuilder(mem) - case arrow.LARGE_STRING: - return NewLargeStringBuilder(mem) - case arrow.BINARY: - return NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - case arrow.LARGE_BINARY: - return NewBinaryBuilder(mem, arrow.BinaryTypes.LargeBinary) - case arrow.FIXED_SIZE_BINARY: - typ := dtype.(*arrow.FixedSizeBinaryType) - return NewFixedSizeBinaryBuilder(mem, typ) - case arrow.DATE32: - return NewDate32Builder(mem) - case arrow.DATE64: - return NewDate64Builder(mem) - case arrow.TIMESTAMP: - typ := dtype.(*arrow.TimestampType) - return NewTimestampBuilder(mem, typ) - case arrow.TIME32: - typ := dtype.(*arrow.Time32Type) - return NewTime32Builder(mem, typ) - case arrow.TIME64: - typ := dtype.(*arrow.Time64Type) - return NewTime64Builder(mem, typ) - case arrow.INTERVAL_MONTHS: - return NewMonthIntervalBuilder(mem) - case arrow.INTERVAL_DAY_TIME: - return NewDayTimeIntervalBuilder(mem) - case arrow.INTERVAL_MONTH_DAY_NANO: - return NewMonthDayNanoIntervalBuilder(mem) - case arrow.DECIMAL128: - if typ, ok := dtype.(*arrow.Decimal128Type); ok { - return NewDecimal128Builder(mem, typ) - } - case arrow.DECIMAL256: - if typ, ok := dtype.(*arrow.Decimal256Type); ok { - return NewDecimal256Builder(mem, typ) - } - case arrow.LIST: - typ := dtype.(*arrow.ListType) - return NewListBuilderWithField(mem, typ.ElemField()) - case arrow.STRUCT: - typ := dtype.(*arrow.StructType) - return NewStructBuilder(mem, typ) - case arrow.SPARSE_UNION: - typ := dtype.(*arrow.SparseUnionType) - return NewSparseUnionBuilder(mem, typ) - case arrow.DENSE_UNION: - typ := dtype.(*arrow.DenseUnionType) - return NewDenseUnionBuilder(mem, typ) - case arrow.DICTIONARY: - typ := dtype.(*arrow.DictionaryType) - return NewDictionaryBuilder(mem, typ) - case arrow.LARGE_LIST: - typ := dtype.(*arrow.LargeListType) - return NewLargeListBuilderWithField(mem, typ.ElemField()) - case arrow.MAP: - typ := dtype.(*arrow.MapType) - return NewMapBuilderWithType(mem, typ) - case arrow.LIST_VIEW: - typ := dtype.(*arrow.ListViewType) - return NewListViewBuilderWithField(mem, typ.ElemField()) - case arrow.LARGE_LIST_VIEW: - typ := dtype.(*arrow.LargeListViewType) - return NewLargeListViewBuilderWithField(mem, typ.ElemField()) - case arrow.EXTENSION: - if custom, ok := dtype.(CustomExtensionBuilder); ok { - return custom.NewBuilder(mem) - } - if typ, ok := dtype.(arrow.ExtensionType); ok { - return NewExtensionBuilder(mem, typ) - } - panic(fmt.Errorf("arrow/array: invalid extension type: %T", dtype)) - case arrow.FIXED_SIZE_LIST: - typ := dtype.(*arrow.FixedSizeListType) - return NewFixedSizeListBuilderWithField(mem, typ.Len(), typ.ElemField()) - case arrow.DURATION: - typ := dtype.(*arrow.DurationType) - return NewDurationBuilder(mem, typ) - case arrow.RUN_END_ENCODED: - typ := dtype.(*arrow.RunEndEncodedType) - return NewRunEndEncodedBuilder(mem, typ.RunEnds(), typ.Encoded()) - case arrow.BINARY_VIEW: - return NewBinaryViewBuilder(mem) - case arrow.STRING_VIEW: - return NewStringViewBuilder(mem) - } - panic(fmt.Errorf("arrow/array: unsupported builder for %T", dtype)) -} diff --git a/go/arrow/array/builder_test.go b/go/arrow/array/builder_test.go deleted file mode 100644 index 7eb2b3f7cf9e3..0000000000000 --- a/go/arrow/array/builder_test.go +++ /dev/null @@ -1,123 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestBuilder_Init(t *testing.T) { - type exp struct{ size int } - tests := []struct { - name string - cap int - - exp exp - }{ - {"07 bits", 07, exp{size: 1}}, - {"19 bits", 19, exp{size: 3}}, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - ab := &builder{mem: memory.NewGoAllocator()} - ab.init(test.cap) - assert.Equal(t, test.cap, ab.Cap(), "invalid capacity") - assert.Equal(t, test.exp.size, ab.nullBitmap.Len(), "invalid length") - }) - } -} - -func TestBuilder_UnsafeSetValid(t *testing.T) { - ab := &builder{mem: memory.NewGoAllocator()} - ab.init(32) - ab.unsafeAppendBoolsToBitmap(tools.Bools(0, 0, 0, 0, 0), 5) - assert.Equal(t, 5, ab.Len()) - assert.Equal(t, []byte{0, 0, 0, 0}, ab.nullBitmap.Bytes()) - - ab.unsafeSetValid(17) - assert.Equal(t, []byte{0xe0, 0xff, 0x3f, 0}, ab.nullBitmap.Bytes()) -} - -func TestBuilder_resize(t *testing.T) { - b := &builder{mem: memory.NewGoAllocator()} - n := 64 - - b.init(n) - assert.Equal(t, n, b.Cap()) - assert.Equal(t, 0, b.Len()) - - b.UnsafeAppendBoolToBitmap(true) - for i := 1; i < n; i++ { - b.UnsafeAppendBoolToBitmap(false) - } - assert.Equal(t, n, b.Cap()) - assert.Equal(t, n, b.Len()) - assert.Equal(t, n-1, b.NullN()) - - n = 5 - b.resize(n, b.init) - assert.Equal(t, n, b.Len()) - assert.Equal(t, n-1, b.NullN()) - - b.resize(32, b.init) - assert.Equal(t, n, b.Len()) - assert.Equal(t, n-1, b.NullN()) -} - -func TestBuilder_IsNull(t *testing.T) { - b := &builder{mem: memory.NewGoAllocator()} - n := 32 - b.init(n) - - assert.True(t, b.IsNull(0)) - assert.True(t, b.IsNull(1)) - - for i := 0; i < n; i++ { - b.UnsafeAppendBoolToBitmap(i%2 == 0) - } - for i := 0; i < n; i++ { - assert.Equal(t, i%2 != 0, b.IsNull(i)) - } -} - -func TestBuilder_SetNull(t *testing.T) { - b := &builder{mem: memory.NewGoAllocator()} - n := 32 - b.init(n) - - for i := 0; i < n; i++ { - // Set everything to true - b.UnsafeAppendBoolToBitmap(true) - } - for i := 0; i < n; i++ { - if i%2 == 0 { // Set all even numbers to null - b.SetNull(i) - } - } - - for i := 0; i < n; i++ { - if i%2 == 0 { - assert.True(t, b.IsNull(i)) - } else { - assert.False(t, b.IsNull(i)) - } - } -} diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go deleted file mode 100644 index a54c1e23c1e1c..0000000000000 --- a/go/arrow/array/compare.go +++ /dev/null @@ -1,854 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "math" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/internal/bitutils" -) - -// RecordEqual reports whether the two provided records are equal. -func RecordEqual(left, right arrow.Record) bool { - switch { - case left.NumCols() != right.NumCols(): - return false - case left.NumRows() != right.NumRows(): - return false - } - - for i := range left.Columns() { - lc := left.Column(i) - rc := right.Column(i) - if !Equal(lc, rc) { - return false - } - } - return true -} - -// RecordApproxEqual reports whether the two provided records are approximately equal. -// For non-floating point columns, it is equivalent to RecordEqual. -func RecordApproxEqual(left, right arrow.Record, opts ...EqualOption) bool { - switch { - case left.NumCols() != right.NumCols(): - return false - case left.NumRows() != right.NumRows(): - return false - } - - opt := newEqualOption(opts...) - - for i := range left.Columns() { - lc := left.Column(i) - rc := right.Column(i) - if !arrayApproxEqual(lc, rc, opt) { - return false - } - } - return true -} - -// helper function to evaluate a function on two chunked object having possibly different -// chunk layouts. the function passed in will be called for each corresponding slice of the -// two chunked arrays and if the function returns false it will end the loop early. -func chunkedBinaryApply(left, right *arrow.Chunked, fn func(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool) { - var ( - pos int64 - length int64 = int64(left.Len()) - leftIdx, rightIdx int - leftPos, rightPos int64 - ) - - for pos < length { - var cleft, cright arrow.Array - for { - cleft, cright = left.Chunk(leftIdx), right.Chunk(rightIdx) - if leftPos == int64(cleft.Len()) { - leftPos = 0 - leftIdx++ - continue - } - if rightPos == int64(cright.Len()) { - rightPos = 0 - rightIdx++ - continue - } - break - } - - sz := int64(min(cleft.Len()-int(leftPos), cright.Len()-int(rightPos))) - pos += sz - if !fn(cleft, leftPos, leftPos+sz, cright, rightPos, rightPos+sz) { - return - } - - leftPos += sz - rightPos += sz - } -} - -// ChunkedEqual reports whether two chunked arrays are equal regardless of their chunkings -func ChunkedEqual(left, right *arrow.Chunked) bool { - switch { - case left == right: - return true - case left.Len() != right.Len(): - return false - case left.NullN() != right.NullN(): - return false - case !arrow.TypeEqual(left.DataType(), right.DataType()): - return false - } - - var isequal bool = true - chunkedBinaryApply(left, right, func(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool { - isequal = SliceEqual(left, lbeg, lend, right, rbeg, rend) - return isequal - }) - - return isequal -} - -// ChunkedApproxEqual reports whether two chunked arrays are approximately equal regardless of their chunkings -// for non-floating point arrays, this is equivalent to ChunkedEqual -func ChunkedApproxEqual(left, right *arrow.Chunked, opts ...EqualOption) bool { - switch { - case left == right: - return true - case left.Len() != right.Len(): - return false - case left.NullN() != right.NullN(): - return false - case !arrow.TypeEqual(left.DataType(), right.DataType()): - return false - } - - var isequal bool - chunkedBinaryApply(left, right, func(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool { - isequal = SliceApproxEqual(left, lbeg, lend, right, rbeg, rend, opts...) - return isequal - }) - - return isequal -} - -// TableEqual returns if the two tables have the same data in the same schema -func TableEqual(left, right arrow.Table) bool { - switch { - case left.NumCols() != right.NumCols(): - return false - case left.NumRows() != right.NumRows(): - return false - } - - for i := 0; int64(i) < left.NumCols(); i++ { - lc := left.Column(i) - rc := right.Column(i) - if !lc.Field().Equal(rc.Field()) { - return false - } - - if !ChunkedEqual(lc.Data(), rc.Data()) { - return false - } - } - return true -} - -// TableEqual returns if the two tables have the approximately equal data in the same schema -func TableApproxEqual(left, right arrow.Table, opts ...EqualOption) bool { - switch { - case left.NumCols() != right.NumCols(): - return false - case left.NumRows() != right.NumRows(): - return false - } - - for i := 0; int64(i) < left.NumCols(); i++ { - lc := left.Column(i) - rc := right.Column(i) - if !lc.Field().Equal(rc.Field()) { - return false - } - - if !ChunkedApproxEqual(lc.Data(), rc.Data(), opts...) { - return false - } - } - return true -} - -// Equal reports whether the two provided arrays are equal. -func Equal(left, right arrow.Array) bool { - switch { - case !baseArrayEqual(left, right): - return false - case left.Len() == 0: - return true - case left.NullN() == left.Len(): - return true - } - - // at this point, we know both arrays have same type, same length, same number of nulls - // and nulls at the same place. - // compare the values. - - switch l := left.(type) { - case *Null: - return true - case *Boolean: - r := right.(*Boolean) - return arrayEqualBoolean(l, r) - case *FixedSizeBinary: - r := right.(*FixedSizeBinary) - return arrayEqualFixedSizeBinary(l, r) - case *Binary: - r := right.(*Binary) - return arrayEqualBinary(l, r) - case *String: - r := right.(*String) - return arrayEqualString(l, r) - case *LargeBinary: - r := right.(*LargeBinary) - return arrayEqualLargeBinary(l, r) - case *LargeString: - r := right.(*LargeString) - return arrayEqualLargeString(l, r) - case *BinaryView: - r := right.(*BinaryView) - return arrayEqualBinaryView(l, r) - case *StringView: - r := right.(*StringView) - return arrayEqualStringView(l, r) - case *Int8: - r := right.(*Int8) - return arrayEqualInt8(l, r) - case *Int16: - r := right.(*Int16) - return arrayEqualInt16(l, r) - case *Int32: - r := right.(*Int32) - return arrayEqualInt32(l, r) - case *Int64: - r := right.(*Int64) - return arrayEqualInt64(l, r) - case *Uint8: - r := right.(*Uint8) - return arrayEqualUint8(l, r) - case *Uint16: - r := right.(*Uint16) - return arrayEqualUint16(l, r) - case *Uint32: - r := right.(*Uint32) - return arrayEqualUint32(l, r) - case *Uint64: - r := right.(*Uint64) - return arrayEqualUint64(l, r) - case *Float16: - r := right.(*Float16) - return arrayEqualFloat16(l, r) - case *Float32: - r := right.(*Float32) - return arrayEqualFloat32(l, r) - case *Float64: - r := right.(*Float64) - return arrayEqualFloat64(l, r) - case *Decimal128: - r := right.(*Decimal128) - return arrayEqualDecimal128(l, r) - case *Decimal256: - r := right.(*Decimal256) - return arrayEqualDecimal256(l, r) - case *Date32: - r := right.(*Date32) - return arrayEqualDate32(l, r) - case *Date64: - r := right.(*Date64) - return arrayEqualDate64(l, r) - case *Time32: - r := right.(*Time32) - return arrayEqualTime32(l, r) - case *Time64: - r := right.(*Time64) - return arrayEqualTime64(l, r) - case *Timestamp: - r := right.(*Timestamp) - return arrayEqualTimestamp(l, r) - case *List: - r := right.(*List) - return arrayEqualList(l, r) - case *LargeList: - r := right.(*LargeList) - return arrayEqualLargeList(l, r) - case *ListView: - r := right.(*ListView) - return arrayEqualListView(l, r) - case *LargeListView: - r := right.(*LargeListView) - return arrayEqualLargeListView(l, r) - case *FixedSizeList: - r := right.(*FixedSizeList) - return arrayEqualFixedSizeList(l, r) - case *Struct: - r := right.(*Struct) - return arrayEqualStruct(l, r) - case *MonthInterval: - r := right.(*MonthInterval) - return arrayEqualMonthInterval(l, r) - case *DayTimeInterval: - r := right.(*DayTimeInterval) - return arrayEqualDayTimeInterval(l, r) - case *MonthDayNanoInterval: - r := right.(*MonthDayNanoInterval) - return arrayEqualMonthDayNanoInterval(l, r) - case *Duration: - r := right.(*Duration) - return arrayEqualDuration(l, r) - case *Map: - r := right.(*Map) - return arrayEqualMap(l, r) - case ExtensionArray: - r := right.(ExtensionArray) - return arrayEqualExtension(l, r) - case *Dictionary: - r := right.(*Dictionary) - return arrayEqualDict(l, r) - case *SparseUnion: - r := right.(*SparseUnion) - return arraySparseUnionEqual(l, r) - case *DenseUnion: - r := right.(*DenseUnion) - return arrayDenseUnionEqual(l, r) - case *RunEndEncoded: - r := right.(*RunEndEncoded) - return arrayRunEndEncodedEqual(l, r) - default: - panic(fmt.Errorf("arrow/array: unknown array type %T", l)) - } -} - -// SliceEqual reports whether slices left[lbeg:lend] and right[rbeg:rend] are equal. -func SliceEqual(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64) bool { - l := NewSlice(left, lbeg, lend) - defer l.Release() - r := NewSlice(right, rbeg, rend) - defer r.Release() - - return Equal(l, r) -} - -// SliceApproxEqual reports whether slices left[lbeg:lend] and right[rbeg:rend] are approximately equal. -func SliceApproxEqual(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64, opts ...EqualOption) bool { - opt := newEqualOption(opts...) - return sliceApproxEqual(left, lbeg, lend, right, rbeg, rend, opt) -} - -func sliceApproxEqual(left arrow.Array, lbeg, lend int64, right arrow.Array, rbeg, rend int64, opt equalOption) bool { - l := NewSlice(left, lbeg, lend) - defer l.Release() - r := NewSlice(right, rbeg, rend) - defer r.Release() - - return arrayApproxEqual(l, r, opt) -} - -const defaultAbsoluteTolerance = 1e-5 - -type equalOption struct { - atol float64 // absolute tolerance - nansEq bool // whether NaNs are considered equal. - unorderedMapKeys bool // whether maps are allowed to have different entries order -} - -func (eq equalOption) f16(f1, f2 float16.Num) bool { - v1 := float64(f1.Float32()) - v2 := float64(f2.Float32()) - switch { - case eq.nansEq: - return math.Abs(v1-v2) <= eq.atol || (math.IsNaN(v1) && math.IsNaN(v2)) - default: - return math.Abs(v1-v2) <= eq.atol - } -} - -func (eq equalOption) f32(f1, f2 float32) bool { - v1 := float64(f1) - v2 := float64(f2) - switch { - case eq.nansEq: - return v1 == v2 || math.Abs(v1-v2) <= eq.atol || (math.IsNaN(v1) && math.IsNaN(v2)) - default: - return v1 == v2 || math.Abs(v1-v2) <= eq.atol - } -} - -func (eq equalOption) f64(v1, v2 float64) bool { - switch { - case eq.nansEq: - return v1 == v2 || math.Abs(v1-v2) <= eq.atol || (math.IsNaN(v1) && math.IsNaN(v2)) - default: - return v1 == v2 || math.Abs(v1-v2) <= eq.atol - } -} - -func newEqualOption(opts ...EqualOption) equalOption { - eq := equalOption{ - atol: defaultAbsoluteTolerance, - nansEq: false, - } - for _, opt := range opts { - opt(&eq) - } - - return eq -} - -// EqualOption is a functional option type used to configure how Records and Arrays are compared. -type EqualOption func(*equalOption) - -// WithNaNsEqual configures the comparison functions so that NaNs are considered equal. -func WithNaNsEqual(v bool) EqualOption { - return func(o *equalOption) { - o.nansEq = v - } -} - -// WithAbsTolerance configures the comparison functions so that 2 floating point values -// v1 and v2 are considered equal if |v1-v2| <= atol. -func WithAbsTolerance(atol float64) EqualOption { - return func(o *equalOption) { - o.atol = atol - } -} - -// WithUnorderedMapKeys configures the comparison functions so that Map with different entries order are considered equal. -func WithUnorderedMapKeys(v bool) EqualOption { - return func(o *equalOption) { - o.unorderedMapKeys = v - } -} - -// ApproxEqual reports whether the two provided arrays are approximately equal. -// For non-floating point arrays, it is equivalent to Equal. -func ApproxEqual(left, right arrow.Array, opts ...EqualOption) bool { - opt := newEqualOption(opts...) - return arrayApproxEqual(left, right, opt) -} - -func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool { - switch { - case !baseArrayEqual(left, right): - return false - case left.Len() == 0: - return true - case left.NullN() == left.Len(): - return true - } - - // at this point, we know both arrays have same type, same length, same number of nulls - // and nulls at the same place. - // compare the values. - - switch l := left.(type) { - case *Null: - return true - case *Boolean: - r := right.(*Boolean) - return arrayEqualBoolean(l, r) - case *FixedSizeBinary: - r := right.(*FixedSizeBinary) - return arrayEqualFixedSizeBinary(l, r) - case *Binary: - r := right.(*Binary) - return arrayEqualBinary(l, r) - case *String: - r := right.(*String) - return arrayEqualString(l, r) - case *LargeBinary: - r := right.(*LargeBinary) - return arrayEqualLargeBinary(l, r) - case *LargeString: - r := right.(*LargeString) - return arrayEqualLargeString(l, r) - case *BinaryView: - r := right.(*BinaryView) - return arrayEqualBinaryView(l, r) - case *StringView: - r := right.(*StringView) - return arrayEqualStringView(l, r) - case *Int8: - r := right.(*Int8) - return arrayEqualInt8(l, r) - case *Int16: - r := right.(*Int16) - return arrayEqualInt16(l, r) - case *Int32: - r := right.(*Int32) - return arrayEqualInt32(l, r) - case *Int64: - r := right.(*Int64) - return arrayEqualInt64(l, r) - case *Uint8: - r := right.(*Uint8) - return arrayEqualUint8(l, r) - case *Uint16: - r := right.(*Uint16) - return arrayEqualUint16(l, r) - case *Uint32: - r := right.(*Uint32) - return arrayEqualUint32(l, r) - case *Uint64: - r := right.(*Uint64) - return arrayEqualUint64(l, r) - case *Float16: - r := right.(*Float16) - return arrayApproxEqualFloat16(l, r, opt) - case *Float32: - r := right.(*Float32) - return arrayApproxEqualFloat32(l, r, opt) - case *Float64: - r := right.(*Float64) - return arrayApproxEqualFloat64(l, r, opt) - case *Decimal128: - r := right.(*Decimal128) - return arrayEqualDecimal128(l, r) - case *Decimal256: - r := right.(*Decimal256) - return arrayEqualDecimal256(l, r) - case *Date32: - r := right.(*Date32) - return arrayEqualDate32(l, r) - case *Date64: - r := right.(*Date64) - return arrayEqualDate64(l, r) - case *Time32: - r := right.(*Time32) - return arrayEqualTime32(l, r) - case *Time64: - r := right.(*Time64) - return arrayEqualTime64(l, r) - case *Timestamp: - r := right.(*Timestamp) - return arrayEqualTimestamp(l, r) - case *List: - r := right.(*List) - return arrayApproxEqualList(l, r, opt) - case *LargeList: - r := right.(*LargeList) - return arrayApproxEqualLargeList(l, r, opt) - case *ListView: - r := right.(*ListView) - return arrayApproxEqualListView(l, r, opt) - case *LargeListView: - r := right.(*LargeListView) - return arrayApproxEqualLargeListView(l, r, opt) - case *FixedSizeList: - r := right.(*FixedSizeList) - return arrayApproxEqualFixedSizeList(l, r, opt) - case *Struct: - r := right.(*Struct) - return arrayApproxEqualStruct(l, r, opt) - case *MonthInterval: - r := right.(*MonthInterval) - return arrayEqualMonthInterval(l, r) - case *DayTimeInterval: - r := right.(*DayTimeInterval) - return arrayEqualDayTimeInterval(l, r) - case *MonthDayNanoInterval: - r := right.(*MonthDayNanoInterval) - return arrayEqualMonthDayNanoInterval(l, r) - case *Duration: - r := right.(*Duration) - return arrayEqualDuration(l, r) - case *Map: - r := right.(*Map) - if opt.unorderedMapKeys { - return arrayApproxEqualMap(l, r, opt) - } - return arrayApproxEqualList(l.List, r.List, opt) - case *Dictionary: - r := right.(*Dictionary) - return arrayApproxEqualDict(l, r, opt) - case ExtensionArray: - r := right.(ExtensionArray) - return arrayApproxEqualExtension(l, r, opt) - case *SparseUnion: - r := right.(*SparseUnion) - return arraySparseUnionApproxEqual(l, r, opt) - case *DenseUnion: - r := right.(*DenseUnion) - return arrayDenseUnionApproxEqual(l, r, opt) - case *RunEndEncoded: - r := right.(*RunEndEncoded) - return arrayRunEndEncodedApproxEqual(l, r, opt) - default: - panic(fmt.Errorf("arrow/array: unknown array type %T", l)) - } -} - -func baseArrayEqual(left, right arrow.Array) bool { - switch { - case left.Len() != right.Len(): - return false - case left.NullN() != right.NullN(): - return false - case !arrow.TypeEqual(left.DataType(), right.DataType()): // We do not check for metadata as in the C++ implementation. - return false - case !validityBitmapEqual(left, right): - return false - } - return true -} - -func validityBitmapEqual(left, right arrow.Array) bool { - // TODO(alexandreyc): make it faster by comparing byte slices of the validity bitmap? - n := left.Len() - if n != right.Len() { - return false - } - for i := 0; i < n; i++ { - if left.IsNull(i) != right.IsNull(i) { - return false - } - } - return true -} - -func arrayApproxEqualFloat16(left, right *Float16, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !opt.f16(left.Value(i), right.Value(i)) { - return false - } - } - return true -} - -func arrayApproxEqualFloat32(left, right *Float32, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !opt.f32(left.Value(i), right.Value(i)) { - return false - } - } - return true -} - -func arrayApproxEqualFloat64(left, right *Float64, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !opt.f64(left.Value(i), right.Value(i)) { - return false - } - } - return true -} - -func arrayApproxEqualList(left, right *List, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return arrayApproxEqual(l, r, opt) - }() - if !o { - return false - } - } - return true -} - -func arrayApproxEqualLargeList(left, right *LargeList, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return arrayApproxEqual(l, r, opt) - }() - if !o { - return false - } - } - return true -} - -func arrayApproxEqualListView(left, right *ListView, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return arrayApproxEqual(l, r, opt) - }() - if !o { - return false - } - } - return true -} - -func arrayApproxEqualLargeListView(left, right *LargeListView, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return arrayApproxEqual(l, r, opt) - }() - if !o { - return false - } - } - return true -} - -func arrayApproxEqualFixedSizeList(left, right *FixedSizeList, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return arrayApproxEqual(l, r, opt) - }() - if !o { - return false - } - } - return true -} - -func arrayApproxEqualStruct(left, right *Struct, opt equalOption) bool { - return bitutils.VisitSetBitRuns( - left.NullBitmapBytes(), - int64(left.Offset()), int64(left.Len()), - approxEqualStructRun(left, right, opt), - ) == nil -} - -func approxEqualStructRun(left, right *Struct, opt equalOption) bitutils.VisitFn { - return func(pos int64, length int64) error { - for i := range left.fields { - if !sliceApproxEqual(left.fields[i], pos, pos+length, right.fields[i], pos, pos+length, opt) { - return arrow.ErrInvalid - } - } - return nil - } -} - -// arrayApproxEqualMap doesn't care about the order of keys (in Go map traversal order is undefined) -func arrayApproxEqualMap(left, right *Map, opt equalOption) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !arrayApproxEqualSingleMapEntry(left.newListValue(i).(*Struct), right.newListValue(i).(*Struct), opt) { - return false - } - } - return true -} - -// arrayApproxEqualSingleMapEntry is a helper function that checks if a single entry pair is approx equal. -// Basically, it doesn't care about key order. -// structs passed will be released -func arrayApproxEqualSingleMapEntry(left, right *Struct, opt equalOption) bool { - defer left.Release() - defer right.Release() - - // we don't compare the validity bitmap, but we want other checks from baseArrayEqual - switch { - case left.Len() != right.Len(): - return false - case left.NullN() != right.NullN(): - return false - case !arrow.TypeEqual(left.DataType(), right.DataType()): // We do not check for metadata as in the C++ implementation. - return false - case left.NullN() == left.Len(): - return true - } - - used := make(map[int]bool, right.Len()) - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - - found := false - lBeg, lEnd := int64(i), int64(i+1) - for j := 0; j < right.Len(); j++ { - if used[j] { - continue - } - if right.IsNull(j) { - used[j] = true - continue - } - - rBeg, rEnd := int64(j), int64(j+1) - - // check keys (field 0) - if !sliceApproxEqual(left.Field(0), lBeg, lEnd, right.Field(0), rBeg, rEnd, opt) { - continue - } - - // only now check the values - if sliceApproxEqual(left.Field(1), lBeg, lEnd, right.Field(1), rBeg, rEnd, opt) { - found = true - used[j] = true - break - } - } - if !found { - return false - } - } - - return len(used) == right.Len() -} diff --git a/go/arrow/array/compare_test.go b/go/arrow/array/compare_test.go deleted file mode 100644 index f757ab9f25f07..0000000000000 --- a/go/arrow/array/compare_test.go +++ /dev/null @@ -1,728 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "math" - "sort" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/arrow/internal/arrdata" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestArrayEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - rec := recs[0] - schema := rec.Schema() - for i, col := range rec.Columns() { - t.Run(schema.Field(i).Name, func(t *testing.T) { - arr := col - if !array.Equal(arr, arr) { - t.Fatalf("identical arrays should compare equal:\narray=%v", arr) - } - sub1 := array.NewSlice(arr, 1, int64(arr.Len())) - defer sub1.Release() - - sub2 := array.NewSlice(arr, 0, int64(arr.Len()-1)) - defer sub2.Release() - - if array.Equal(sub1, sub2) && name != "nulls" { - t.Fatalf("non-identical arrays should not compare equal:\nsub1=%v\nsub2=%v\narrf=%v\n", sub1, sub2, arr) - } - }) - } - }) - } -} - -func TestArraySliceEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - rec := recs[0] - schema := rec.Schema() - for i, col := range rec.Columns() { - t.Run(schema.Field(i).Name, func(t *testing.T) { - arr := col - if !array.SliceEqual( - arr, 0, int64(arr.Len()), - arr, 0, int64(arr.Len()), - ) { - t.Fatalf("identical slices should compare equal:\narray=%v", arr) - } - sub1 := array.NewSlice(arr, 1, int64(arr.Len())) - defer sub1.Release() - - sub2 := array.NewSlice(arr, 0, int64(arr.Len()-1)) - defer sub2.Release() - - if array.SliceEqual(sub1, 0, int64(sub1.Len()), sub2, 0, int64(sub2.Len())) && name != "nulls" { - t.Fatalf("non-identical slices should not compare equal:\nsub1=%v\nsub2=%v\narrf=%v\n", sub1, sub2, arr) - } - }) - } - }) - } -} - -func TestArrayApproxEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - rec := recs[0] - schema := rec.Schema() - for i, col := range rec.Columns() { - t.Run(schema.Field(i).Name, func(t *testing.T) { - arr := col - if !array.ApproxEqual(arr, arr) { - t.Fatalf("identical arrays should compare equal:\narray=%v", arr) - } - sub1 := array.NewSlice(arr, 1, int64(arr.Len())) - defer sub1.Release() - - sub2 := array.NewSlice(arr, 0, int64(arr.Len()-1)) - defer sub2.Release() - - if array.ApproxEqual(sub1, sub2) && name != "nulls" { - t.Fatalf("non-identical arrays should not compare equal:\nsub1=%v\nsub2=%v\narrf=%v\n", sub1, sub2, arr) - } - }) - } - }) - } -} - -func TestArrayApproxEqualFloats(t *testing.T) { - f16sFrom := func(vs []float64) []float16.Num { - o := make([]float16.Num, len(vs)) - for i, v := range vs { - o[i] = float16.New(float32(v)) - } - return o - } - - for _, tc := range []struct { - name string - a1 interface{} - a2 interface{} - opts []array.EqualOption - want bool - }{ - { - name: "f16", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), - a2: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), - want: true, - }, - { - name: "f16-no-tol", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), - a2: f16sFrom([]float64{1, 2, 3, 4, 5, 7}), - want: false, - }, - { - name: "f16-tol-ok", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), - a2: f16sFrom([]float64{1, 2, 3, 4, 5, 7}), - opts: []array.EqualOption{array.WithAbsTolerance(1)}, - want: true, - }, - { - name: "f16-nan", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), - a2: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), - want: false, - }, - { - name: "f16-nan-not", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, 6}), - a2: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: false, - }, - { - name: "f16-nan-ok", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), - a2: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: true, - }, - { - name: "f16-nan-no-tol", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), - a2: f16sFrom([]float64{1, 2, 3, 4, 6, math.NaN()}), - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: false, - }, - { - name: "f16-nan-tol", - a1: f16sFrom([]float64{1, 2, 3, 4, 5, math.NaN()}), - a2: f16sFrom([]float64{1, 2, 3, 4, 6, math.NaN()}), - opts: []array.EqualOption{array.WithNaNsEqual(true), array.WithAbsTolerance(1)}, - want: true, - }, - { - name: "f32", - a1: []float32{1, 2, 3, 4, 5, 6}, - a2: []float32{1, 2, 3, 4, 5, 6}, - want: true, - }, - { - name: "f32-no-tol", - a1: []float32{1, 2, 3, 4, 5, 6}, - a2: []float32{1, 2, 3, 4, 5, 7}, - want: false, - }, - { - name: "f32-tol-ok", - a1: []float32{1, 2, 3, 4, 5, 6}, - a2: []float32{1, 2, 3, 4, 5, 7}, - opts: []array.EqualOption{array.WithAbsTolerance(1)}, - want: true, - }, - { - name: "f32-nan", - a1: []float32{1, 2, 3, 4, 5, 6}, - a2: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, - want: false, - }, - { - name: "f32-nan-not", - a1: []float32{1, 2, 3, 4, 5, 6}, - a2: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: false, - }, - { - name: "f32-nan-ok", - a1: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, - a2: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: true, - }, - { - name: "f32-nan-no-tol", - a1: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, - a2: []float32{1, 2, 3, 4, 6, float32(math.NaN())}, - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: false, - }, - { - name: "f32-nan-tol", - a1: []float32{1, 2, 3, 4, 5, float32(math.NaN())}, - a2: []float32{1, 2, 3, 4, 6, float32(math.NaN())}, - opts: []array.EqualOption{array.WithNaNsEqual(true), array.WithAbsTolerance(1)}, - want: true, - }, - { - name: "f64", - a1: []float64{1, 2, 3, 4, 5, 6}, - a2: []float64{1, 2, 3, 4, 5, 6}, - want: true, - }, - { - name: "f64-no-tol", - a1: []float64{1, 2, 3, 4, 5, 6}, - a2: []float64{1, 2, 3, 4, 5, 7}, - want: false, - }, - { - name: "f64-tol-ok", - a1: []float64{1, 2, 3, 4, 5, 6}, - a2: []float64{1, 2, 3, 4, 5, 7}, - opts: []array.EqualOption{array.WithAbsTolerance(1)}, - want: true, - }, - { - name: "f64-nan", - a1: []float64{1, 2, 3, 4, 5, 6}, - a2: []float64{1, 2, 3, 4, 5, math.NaN()}, - want: false, - }, - { - name: "f64-nan-not", - a1: []float64{1, 2, 3, 4, 5, 6}, - a2: []float64{1, 2, 3, 4, 5, math.NaN()}, - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: false, - }, - { - name: "f64-nan-ok", - a1: []float64{1, 2, 3, 4, 5, math.NaN()}, - a2: []float64{1, 2, 3, 4, 5, math.NaN()}, - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: true, - }, - { - name: "f64-nan-no-tol", - a1: []float64{1, 2, 3, 4, 5, math.NaN()}, - a2: []float64{1, 2, 3, 4, 6, math.NaN()}, - opts: []array.EqualOption{array.WithNaNsEqual(true)}, - want: false, - }, - { - name: "f64-nan-tol", - a1: []float64{1, 2, 3, 4, 5, math.NaN()}, - a2: []float64{1, 2, 3, 4, 6, math.NaN()}, - opts: []array.EqualOption{array.WithNaNsEqual(true), array.WithAbsTolerance(1)}, - want: true, - }, - } { - t.Run(tc.name, func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - a1 := arrayOf(mem, tc.a1, nil) - defer a1.Release() - a2 := arrayOf(mem, tc.a2, nil) - defer a2.Release() - - if got, want := array.ApproxEqual(a1, a2, tc.opts...), tc.want; got != want { - t.Fatalf("invalid comparison: got=%v, want=%v\na1: %v\na2: %v\n", got, want, a1, a2) - } - }) - } -} - -func testStringMap(mem memory.Allocator, m map[string]string, keys []string) *array.Map { - dt := arrow.MapOf(arrow.BinaryTypes.String, arrow.BinaryTypes.String) - builder := array.NewMapBuilderWithType(mem, dt) - defer builder.Release() - key, item := builder.KeyBuilder().(*array.StringBuilder), builder.ItemBuilder().(*array.StringBuilder) - - builder.AppendNull() - builder.Append(true) - - for _, k := range keys { - key.Append(k) - - v, ok := m[k] - if !ok { - item.AppendNull() - continue - } - - item.Append(v) - } - - return builder.NewMapArray() -} - -func TestArrayApproxEqualMaps(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - t.Run("different order", func(t *testing.T) { - m := map[string]string{"x": "x", "y": "y", "z": "z"} - - keys := []string{"z", "y", "x", "null"} - a := testStringMap(mem, m, keys) - defer a.Release() - - asc := make([]string, len(keys)) - copy(asc, keys) - sort.Strings(asc) - assert.NotEqual(t, keys, asc) - - b := testStringMap(mem, m, asc) - defer b.Release() - - assert.False(t, array.ApproxEqual(a, b)) - assert.True(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) - }) - - t.Run("extra left value", func(t *testing.T) { - m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": "extra"} - - aKeys := []string{"z", "y", "x", "extra"} - a := testStringMap(mem, m, aKeys) - defer a.Release() - - bKeys := []string{"z", "y", "x"} - b := testStringMap(mem, m, bKeys) - defer b.Release() - - assert.NotEqual(t, aKeys, bKeys) - assert.Equal(t, a.NullN(), b.NullN()) - assert.False(t, array.ApproxEqual(a, b)) - assert.False(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) - }) - - t.Run("extra right value", func(t *testing.T) { - m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": "extra"} - - aKeys := []string{"z", "y", "x"} - a := testStringMap(mem, m, aKeys) - defer a.Release() - - bKeys := []string{"z", "y", "x", "extra"} - b := testStringMap(mem, m, bKeys) - defer b.Release() - - assert.NotEqual(t, aKeys, bKeys) - assert.Equal(t, a.NullN(), b.NullN()) - assert.False(t, array.ApproxEqual(a, b)) - assert.False(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) - }) - - t.Run("unmatched value", func(t *testing.T) { - m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": "extra", "extra2": "extra"} - - aKeys := []string{"z", "y", "x", "extra"} - a := testStringMap(mem, m, aKeys) - defer a.Release() - - bKeys := []string{"z", "y", "x", "extra2"} - b := testStringMap(mem, m, bKeys) - defer b.Release() - - assert.NotEqual(t, aKeys, bKeys) - assert.Equal(t, a.NullN(), b.NullN()) - assert.False(t, array.ApproxEqual(a, b)) - assert.False(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) - }) - - t.Run("different value", func(t *testing.T) { - m := map[string]string{"x": "x", "y": "y", "z": "z", "extra": "extra"} - - keys := []string{"z", "y", "x", "extra"} - a := testStringMap(mem, m, keys) - defer a.Release() - - m["extra"] = "different" - b := testStringMap(mem, m, keys) - defer b.Release() - - assert.Equal(t, a.NullN(), b.NullN()) - assert.False(t, array.ApproxEqual(a, b)) - assert.False(t, array.ApproxEqual(a, b, array.WithUnorderedMapKeys(true))) - }) -} - -func arrayOf(mem memory.Allocator, a interface{}, valids []bool) arrow.Array { - if mem == nil { - mem = memory.NewGoAllocator() - } - - switch a := a.(type) { - case []float16.Num: - bldr := array.NewFloat16Builder(mem) - defer bldr.Release() - - bldr.AppendValues(a, valids) - return bldr.NewFloat16Array() - - case []float32: - bldr := array.NewFloat32Builder(mem) - defer bldr.Release() - - bldr.AppendValues(a, valids) - return bldr.NewFloat32Array() - - case []float64: - bldr := array.NewFloat64Builder(mem) - defer bldr.Release() - - bldr.AppendValues(a, valids) - return bldr.NewFloat64Array() - - default: - panic(fmt.Errorf("arrdata: invalid data slice type %T", a)) - } -} - -func TestArrayEqualBaseArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b1 := array.NewBooleanBuilder(mem) - defer b1.Release() - b1.Append(true) - a1 := b1.NewBooleanArray() - defer a1.Release() - - b2 := array.NewBooleanBuilder(mem) - defer b2.Release() - a2 := b2.NewBooleanArray() - defer a2.Release() - - if array.Equal(a1, a2) { - t.Errorf("two arrays with different lengths must not be equal") - } - - b3 := array.NewBooleanBuilder(mem) - defer b3.Release() - b3.AppendNull() - a3 := b3.NewBooleanArray() - defer a3.Release() - - if array.Equal(a1, a3) { - t.Errorf("two arrays with different number of null values must not be equal") - } - - b4 := array.NewInt32Builder(mem) - defer b4.Release() - b4.Append(0) - a4 := b4.NewInt32Array() - defer a4.Release() - - if array.Equal(a1, a4) { - t.Errorf("two arrays with different types must not be equal") - } - - b5 := array.NewBooleanBuilder(mem) - defer b5.Release() - b5.AppendNull() - b5.Append(true) - a5 := b5.NewBooleanArray() - defer a5.Release() - b1.AppendNull() - - if array.Equal(a1, a5) { - t.Errorf("two arrays with different validity bitmaps must not be equal") - } -} - -func TestArrayEqualNull(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - null := array.NewNull(0) - defer null.Release() - - if !array.Equal(null, null) { - t.Fatalf("identical arrays should compare equal") - } - - n0 := array.NewNull(10) - defer n0.Release() - - n1 := array.NewNull(10) - defer n1.Release() - - if !array.Equal(n0, n0) { - t.Fatalf("identical arrays should compare equal") - } - if !array.Equal(n1, n1) { - t.Fatalf("identical arrays should compare equal") - } - if !array.Equal(n0, n1) || !array.Equal(n1, n0) { - t.Fatalf("n0 and n1 should compare equal") - } - - sub07 := array.NewSlice(n0, 0, 7) - defer sub07.Release() - sub08 := array.NewSlice(n0, 0, 8) - defer sub08.Release() - sub19 := array.NewSlice(n0, 1, 9) - defer sub19.Release() - - if !array.Equal(sub08, sub19) { - t.Fatalf("sub08 and sub19 should compare equal") - } - - if array.Equal(sub08, sub07) { - t.Fatalf("sub08 and sub07 should not compare equal") - } -} - -func TestArrayEqualMaskedArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt32Builder(mem) - defer ab.Release() - - valids := []bool{false, false, false, false} - ab.AppendValues([]int32{1, 2, 0, 4}, valids) - - a1 := ab.NewInt32Array() - defer a1.Release() - - ab.AppendValues([]int32{1, 2, 3, 4}, valids) - a2 := ab.NewInt32Array() - defer a2.Release() - - if !array.Equal(a1, a1) || !array.Equal(a2, a2) { - t.Errorf("an array must be equal to itself") - } - - if !array.Equal(a1, a2) { - t.Errorf("%v must be equal to %v", a1, a2) - } -} - -func TestArrayEqualDifferentMaskedValues(t *testing.T) { - // test 2 int32 arrays, with same nulls (but different masked values) compare equal. - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt32Builder(mem) - defer ab.Release() - - valids := []bool{true, true, false, true} - ab.AppendValues([]int32{1, 2, 0, 4}, valids) - - a1 := ab.NewInt32Array() - defer a1.Release() - - ab.AppendValues([]int32{1, 2, 3, 4}, valids) - a2 := ab.NewInt32Array() - defer a2.Release() - - if !array.Equal(a1, a1) || !array.Equal(a2, a2) { - t.Errorf("an array must be equal to itself") - } - - if !array.Equal(a1, a2) { - t.Errorf("%v must be equal to %v", a1, a2) - } -} - -func TestRecordEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - rec0 := recs[0] - rec1 := recs[1] - if !array.RecordEqual(rec0, rec0) { - t.Fatalf("identical records should compare equal:\nrecord:\n%v", rec0) - } - - if array.RecordEqual(rec0, rec1) && name != "nulls" { - t.Fatalf("non-identical records should not compare equal:\nrec0:\n%v\nrec1:\n%v", rec0, rec1) - } - - sub00 := rec0.NewSlice(0, recs[0].NumRows()-1) - defer sub00.Release() - sub01 := rec0.NewSlice(1, recs[0].NumRows()) - defer sub01.Release() - - if array.RecordEqual(sub00, sub01) && name != "nulls" { - t.Fatalf("non-identical records should not compare equal:\nsub0:\n%v\nsub1:\n%v", sub00, sub01) - } - }) - } -} - -func TestRecordApproxEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - rec0 := recs[0] - rec1 := recs[1] - if !array.RecordApproxEqual(rec0, rec0) { - t.Fatalf("identical records should compare equal:\nrecord:\n%v", rec0) - } - - if array.RecordApproxEqual(rec0, rec1) && name != "nulls" { - t.Fatalf("non-identical records should not compare equal:\nrec0:\n%v\nrec1:\n%v", rec0, rec1) - } - - sub00 := rec0.NewSlice(0, recs[0].NumRows()-1) - defer sub00.Release() - sub01 := rec0.NewSlice(1, recs[0].NumRows()) - defer sub01.Release() - - if array.RecordApproxEqual(sub00, sub01) && name != "nulls" { - t.Fatalf("non-identical records should not compare equal:\nsub0:\n%v\nsub1:\n%v", sub00, sub01) - } - }) - } -} - -func TestChunkedEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - tbl := array.NewTableFromRecords(recs[0].Schema(), recs) - defer tbl.Release() - - for i := 0; i < int(tbl.NumCols()); i++ { - if !array.ChunkedEqual(tbl.Column(i).Data(), tbl.Column(i).Data()) && name != "nulls" { - t.Fatalf("identical chunked arrays should compare as equal:\narr:%v\n", tbl.Column(i).Data()) - } - } - }) - } -} - -func TestChunkedApproxEqual(t *testing.T) { - fb := array.NewFloat64Builder(memory.DefaultAllocator) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - fb.AppendValues([]float64{6, 7}, nil) - f2 := fb.NewFloat64Array() - defer f2.Release() - - fb.AppendValues([]float64{8, 9, 10}, nil) - f3 := fb.NewFloat64Array() - defer f3.Release() - - c1 := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1, f2, f3}, - ) - defer c1.Release() - - fb.AppendValues([]float64{1, 2, 3}, nil) - f4 := fb.NewFloat64Array() - defer f4.Release() - - fb.AppendValues([]float64{4, 5}, nil) - f5 := fb.NewFloat64Array() - defer f5.Release() - - fb.AppendValues([]float64{6, 7, 8, 9}, nil) - f6 := fb.NewFloat64Array() - defer f6.Release() - - fb.AppendValues([]float64{10}, nil) - f7 := fb.NewFloat64Array() - defer f7.Release() - - c2 := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f4, f5, f6, f7}, - ) - defer c2.Release() - - assert.True(t, array.ChunkedEqual(c1, c2)) - assert.True(t, array.ChunkedApproxEqual(c1, c2)) -} - -func TestTableEqual(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - tbl := array.NewTableFromRecords(recs[0].Schema(), recs) - defer tbl.Release() - - if !array.TableEqual(tbl, tbl) { - t.Fatalf("identical tables should compare as equal:\tbl:%v\n", tbl) - } - if !array.TableApproxEqual(tbl, tbl) { - t.Fatalf("identical tables should compare as approx equal:\tbl:%v\n", tbl) - } - }) - } -} diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go deleted file mode 100644 index 3d2b4b4b83167..0000000000000 --- a/go/arrow/array/concat.go +++ /dev/null @@ -1,933 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "errors" - "fmt" - "math" - "math/bits" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/encoded" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/bitutils" - "github.com/apache/arrow/go/v18/internal/utils" -) - -// Concatenate creates a new arrow.Array which is the concatenation of the -// passed in arrays. Returns nil if an error is encountered. -// -// The passed in arrays still need to be released manually, and will not be -// released by this function. -func Concatenate(arrs []arrow.Array, mem memory.Allocator) (result arrow.Array, err error) { - if len(arrs) == 0 { - return nil, errors.New("array/concat: must pass at least one array") - } - - // gather Data of inputs - data := make([]arrow.ArrayData, len(arrs)) - for i, ar := range arrs { - if !arrow.TypeEqual(ar.DataType(), arrs[0].DataType()) { - return nil, fmt.Errorf("arrays to be concatenated must be identically typed, but %s and %s were encountered", - arrs[0].DataType(), ar.DataType()) - } - data[i] = ar.Data() - } - - out, err := concat(data, mem) - if err != nil { - return nil, err - } - - defer out.Release() - return MakeFromData(out), nil -} - -// simple struct to hold ranges -type rng struct { - offset, len int -} - -// simple bitmap struct to reference a specific slice of a bitmap where the range -// offset and length are in bits -type bitmap struct { - data []byte - rng rng -} - -// gather up the bitmaps from the passed in data objects -func gatherBitmaps(data []arrow.ArrayData, idx int) []bitmap { - out := make([]bitmap, len(data)) - for i, d := range data { - if d.Buffers()[idx] != nil { - out[i].data = d.Buffers()[idx].Bytes() - } - out[i].rng.offset = d.Offset() - out[i].rng.len = d.Len() - } - return out -} - -// gatherFixedBuffers gathers up the buffer objects of the given index, specifically -// returning only the slices of the buffers which are relevant to the passed in arrays -// in case they are themselves slices of other arrays. nil buffers are ignored and not -// in the output slice. -func gatherFixedBuffers(data []arrow.ArrayData, idx, byteWidth int) []*memory.Buffer { - out := make([]*memory.Buffer, 0, len(data)) - for _, d := range data { - buf := d.Buffers()[idx] - if buf == nil { - continue - } - - out = append(out, memory.NewBufferBytes(buf.Bytes()[d.Offset()*byteWidth:(d.Offset()+d.Len())*byteWidth])) - } - return out -} - -// gatherBuffersFixedWidthType is like gatherFixedBuffers, but uses a datatype to determine the size -// to use for determining the byte slice rather than a passed in bytewidth. -func gatherBuffersFixedWidthType(data []arrow.ArrayData, idx int, fixed arrow.FixedWidthDataType) []*memory.Buffer { - return gatherFixedBuffers(data, idx, fixed.BitWidth()/8) -} - -// gatherBufferRanges requires that len(ranges) == len(data) and returns a list of buffers -// which represent the corresponding range of each buffer in the specified index of each -// data object. -func gatherBufferRanges(data []arrow.ArrayData, idx int, ranges []rng) []*memory.Buffer { - out := make([]*memory.Buffer, 0, len(data)) - for i, d := range data { - buf := d.Buffers()[idx] - if buf == nil { - debug.Assert(ranges[i].len == 0, "misaligned buffer value ranges") - continue - } - - out = append(out, memory.NewBufferBytes(buf.Bytes()[ranges[i].offset:ranges[i].offset+ranges[i].len])) - } - return out -} - -// gatherChildren gathers the children data objects for child of index idx for all of the data objects. -func gatherChildren(data []arrow.ArrayData, idx int) []arrow.ArrayData { - return gatherChildrenMultiplier(data, idx, 1) -} - -// gatherChildrenMultiplier gathers the full data slice of the underlying values from the children data objects -// such as the values data for a list array so that it can return a slice of the buffer for a given -// index into the children. -func gatherChildrenMultiplier(data []arrow.ArrayData, idx, multiplier int) []arrow.ArrayData { - out := make([]arrow.ArrayData, len(data)) - for i, d := range data { - out[i] = NewSliceData(d.Children()[idx], int64(d.Offset()*multiplier), int64(d.Offset()+d.Len())*int64(multiplier)) - } - return out -} - -// gatherChildrenRanges returns a slice of Data objects which each represent slices of the given ranges from the -// child in the specified index from each data object. -func gatherChildrenRanges(data []arrow.ArrayData, idx int, ranges []rng) []arrow.ArrayData { - debug.Assert(len(data) == len(ranges), "mismatched children ranges for concat") - out := make([]arrow.ArrayData, len(data)) - for i, d := range data { - out[i] = NewSliceData(d.Children()[idx], int64(ranges[i].offset), int64(ranges[i].offset+ranges[i].len)) - } - return out -} - -// creates a single contiguous buffer which contains the concatenation of all of the passed -// in buffer objects. -func concatBuffers(bufs []*memory.Buffer, mem memory.Allocator) *memory.Buffer { - outLen := 0 - for _, b := range bufs { - outLen += b.Len() - } - out := memory.NewResizableBuffer(mem) - out.Resize(outLen) - - data := out.Bytes() - for _, b := range bufs { - copy(data, b.Bytes()) - data = data[b.Len():] - } - return out -} - -func handle32BitOffsets(outLen int, buffers []*memory.Buffer, out *memory.Buffer) (*memory.Buffer, []rng, error) { - dst := arrow.Int32Traits.CastFromBytes(out.Bytes()) - valuesRanges := make([]rng, len(buffers)) - nextOffset := int32(0) - nextElem := int(0) - for i, b := range buffers { - if b.Len() == 0 { - valuesRanges[i].offset = 0 - valuesRanges[i].len = 0 - continue - } - - // when we gather our buffers, we sliced off the last offset from the buffer - // so that we could count the lengths accurately - src := arrow.Int32Traits.CastFromBytes(b.Bytes()) - valuesRanges[i].offset = int(src[0]) - // expand our slice to see that final offset - expand := src[:len(src)+1] - // compute the length of this range by taking the final offset and subtracting where we started. - valuesRanges[i].len = int(expand[len(src)]) - valuesRanges[i].offset - - if nextOffset > math.MaxInt32-int32(valuesRanges[i].len) { - return nil, nil, errors.New("offset overflow while concatenating arrays") - } - - // adjust each offset by the difference between our last ending point and our starting point - adj := nextOffset - src[0] - for j, o := range src { - dst[nextElem+j] = adj + o - } - - // the next index for an element in the output buffer - nextElem += b.Len() / arrow.Int32SizeBytes - // update our offset counter to be the total current length of our output - nextOffset += int32(valuesRanges[i].len) - } - - // final offset should point to the end of the data - dst[outLen] = nextOffset - return out, valuesRanges, nil -} - -func unifyDictionaries(mem memory.Allocator, data []arrow.ArrayData, dt *arrow.DictionaryType) ([]*memory.Buffer, arrow.Array, error) { - unifier, err := NewDictionaryUnifier(mem, dt.ValueType) - if err != nil { - return nil, nil, err - } - defer unifier.Release() - - newLookup := make([]*memory.Buffer, len(data)) - for i, d := range data { - dictArr := MakeFromData(d.Dictionary()) - defer dictArr.Release() - newLookup[i], err = unifier.UnifyAndTranspose(dictArr) - if err != nil { - return nil, nil, err - } - } - - unified, err := unifier.GetResultWithIndexType(dt.IndexType) - if err != nil { - for _, b := range newLookup { - b.Release() - } - return nil, nil, err - } - return newLookup, unified, nil -} - -func concatDictIndices(mem memory.Allocator, data []arrow.ArrayData, idxType arrow.FixedWidthDataType, transpositions []*memory.Buffer) (out *memory.Buffer, err error) { - defer func() { - if err != nil && out != nil { - out.Release() - out = nil - } - }() - - idxWidth := idxType.BitWidth() / 8 - outLen := 0 - for i, d := range data { - outLen += d.Len() - defer transpositions[i].Release() - } - - out = memory.NewResizableBuffer(mem) - out.Resize(outLen * idxWidth) - - outData := out.Bytes() - for i, d := range data { - transposeMap := arrow.Int32Traits.CastFromBytes(transpositions[i].Bytes()) - src := d.Buffers()[1].Bytes() - if d.Buffers()[0] == nil { - if err = utils.TransposeIntsBuffers(idxType, idxType, src, outData, d.Offset(), 0, d.Len(), transposeMap); err != nil { - return - } - } else { - rdr := bitutils.NewBitRunReader(d.Buffers()[0].Bytes(), int64(d.Offset()), int64(d.Len())) - pos := 0 - for { - run := rdr.NextRun() - if run.Len == 0 { - break - } - - if run.Set { - err = utils.TransposeIntsBuffers(idxType, idxType, src, outData, d.Offset()+pos, pos, int(run.Len), transposeMap) - if err != nil { - return - } - } else { - memory.Set(outData[pos:pos+(int(run.Len)*idxWidth)], 0x00) - } - - pos += int(run.Len) - } - } - outData = outData[d.Len()*idxWidth:] - } - return -} - -func handle64BitOffsets(outLen int, buffers []*memory.Buffer, out *memory.Buffer) (*memory.Buffer, []rng, error) { - dst := arrow.Int64Traits.CastFromBytes(out.Bytes()) - valuesRanges := make([]rng, len(buffers)) - nextOffset := int64(0) - nextElem := int(0) - for i, b := range buffers { - if b.Len() == 0 { - valuesRanges[i].offset = 0 - valuesRanges[i].len = 0 - continue - } - - // when we gather our buffers, we sliced off the last offset from the buffer - // so that we could count the lengths accurately - src := arrow.Int64Traits.CastFromBytes(b.Bytes()) - valuesRanges[i].offset = int(src[0]) - // expand our slice to see that final offset - expand := src[:len(src)+1] - // compute the length of this range by taking the final offset and subtracting where we started. - valuesRanges[i].len = int(expand[len(src)]) - valuesRanges[i].offset - - if nextOffset > math.MaxInt64-int64(valuesRanges[i].len) { - return nil, nil, errors.New("offset overflow while concatenating arrays") - } - - // adjust each offset by the difference between our last ending point and our starting point - adj := nextOffset - src[0] - for j, o := range src { - dst[nextElem+j] = adj + o - } - - // the next index for an element in the output buffer - nextElem += b.Len() / arrow.Int64SizeBytes - // update our offset counter to be the total current length of our output - nextOffset += int64(valuesRanges[i].len) - } - - // final offset should point to the end of the data - dst[outLen] = nextOffset - return out, valuesRanges, nil -} - -// concatOffsets creates a single offset buffer which represents the concatenation of all of the -// offsets buffers, adjusting the offsets appropriately to their new relative locations. -// -// It also returns the list of ranges that need to be fetched for the corresponding value buffers -// to construct the final concatenated value buffer. -func concatOffsets(buffers []*memory.Buffer, byteWidth int, mem memory.Allocator) (*memory.Buffer, []rng, error) { - outLen := 0 - for _, b := range buffers { - outLen += b.Len() / byteWidth - } - - out := memory.NewResizableBuffer(mem) - out.Resize(byteWidth * (outLen + 1)) - - switch byteWidth { - case arrow.Int64SizeBytes: - return handle64BitOffsets(outLen, buffers, out) - default: - return handle32BitOffsets(outLen, buffers, out) - } -} - -func sumArraySizes(data []arrow.ArrayData) int { - outSize := 0 - for _, arr := range data { - outSize += arr.Len() - } - return outSize -} - -func getListViewBufferValues[T int32 | int64](data arrow.ArrayData, i int) []T { - bytes := data.Buffers()[i].Bytes() - base := (*T)(unsafe.Pointer(&bytes[0])) - ret := unsafe.Slice(base, data.Offset()+data.Len()) - return ret[data.Offset():] -} - -func putListViewOffsets32(in arrow.ArrayData, displacement int32, out *memory.Buffer, outOff int) { - debug.Assert(in.DataType().ID() == arrow.LIST_VIEW, "putListViewOffsets32: expected LIST_VIEW data") - inOff, inLen := in.Offset(), in.Len() - if inLen == 0 { - return - } - bitmap := in.Buffers()[0] - srcOffsets := getListViewBufferValues[int32](in, 1) - srcSizes := getListViewBufferValues[int32](in, 2) - isValidAndNonEmpty := func(i int) bool { - return (bitmap == nil || bitutil.BitIsSet(bitmap.Bytes(), inOff+i)) && srcSizes[i] > 0 - } - - dstOffsets := arrow.Int32Traits.CastFromBytes(out.Bytes()) - for i, offset := range srcOffsets { - if isValidAndNonEmpty(i) { - // This is guaranteed by RangeOfValuesUsed returning the smallest offset - // of valid and non-empty list-views. - debug.Assert(offset+displacement >= 0, "putListViewOffsets32: offset underflow while concatenating arrays") - dstOffsets[outOff+i] = offset + displacement - } else { - dstOffsets[outOff+i] = 0 - } - } -} - -func putListViewOffsets64(in arrow.ArrayData, displacement int64, out *memory.Buffer, outOff int) { - debug.Assert(in.DataType().ID() == arrow.LARGE_LIST_VIEW, "putListViewOffsets64: expected LARGE_LIST_VIEW data") - inOff, inLen := in.Offset(), in.Len() - if inLen == 0 { - return - } - bitmap := in.Buffers()[0] - srcOffsets := getListViewBufferValues[int64](in, 1) - srcSizes := getListViewBufferValues[int64](in, 2) - isValidAndNonEmpty := func(i int) bool { - return (bitmap == nil || bitutil.BitIsSet(bitmap.Bytes(), inOff+i)) && srcSizes[i] > 0 - } - - dstOffsets := arrow.Int64Traits.CastFromBytes(out.Bytes()) - for i, offset := range srcOffsets { - if isValidAndNonEmpty(i) { - // This is guaranteed by RangeOfValuesUsed returning the smallest offset - // of valid and non-empty list-views. - debug.Assert(offset+displacement >= 0, "putListViewOffsets64: offset underflow while concatenating arrays") - dstOffsets[outOff+i] = offset + displacement - } else { - dstOffsets[outOff+i] = 0 - } - } -} - -// Concatenate buffers holding list-view offsets into a single buffer of offsets -// -// valueRanges contains the relevant ranges of values in the child array actually -// referenced to by the views. Most commonly, these ranges will start from 0, -// but when that is not the case, we need to adjust the displacement of offsets. -// The concatenated child array does not contain values from the beginning -// if they are not referenced to by any view. -func concatListViewOffsets(data []arrow.ArrayData, byteWidth int, valueRanges []rng, mem memory.Allocator) (*memory.Buffer, error) { - outSize := sumArraySizes(data) - if byteWidth == 4 && outSize > math.MaxInt32 { - return nil, fmt.Errorf("%w: offset overflow while concatenating arrays", arrow.ErrInvalid) - } - out := memory.NewResizableBuffer(mem) - out.Resize(byteWidth * outSize) - - numChildValues, elementsLength := 0, 0 - for i, arr := range data { - displacement := numChildValues - valueRanges[i].offset - if byteWidth == 4 { - putListViewOffsets32(arr, int32(displacement), out, elementsLength) - } else { - putListViewOffsets64(arr, int64(displacement), out, elementsLength) - } - elementsLength += arr.Len() - numChildValues += valueRanges[i].len - } - debug.Assert(elementsLength == outSize, "implementation error") - - return out, nil -} - -func zeroNullListViewSizes[T int32 | int64](data arrow.ArrayData) { - if data.Len() == 0 || data.Buffers()[0] == nil { - return - } - validity := data.Buffers()[0].Bytes() - sizes := getListViewBufferValues[T](data, 2) - - for i := 0; i < data.Len(); i++ { - if !bitutil.BitIsSet(validity, data.Offset()+i) { - sizes[i] = 0 - } - } -} - -func concatListView(data []arrow.ArrayData, offsetType arrow.FixedWidthDataType, out *Data, mem memory.Allocator) (err error) { - // Calculate the ranges of values that each list-view array uses - valueRanges := make([]rng, len(data)) - for i, input := range data { - offset, len := rangeOfValuesUsed(input) - valueRanges[i].offset = offset - valueRanges[i].len = len - } - - // Gather the children ranges of each input array - childData := gatherChildrenRanges(data, 0, valueRanges) - for _, c := range childData { - defer c.Release() - } - - // Concatenate the values - values, err := concat(childData, mem) - if err != nil { - return err - } - - // Concatenate the offsets - offsetBuffer, err := concatListViewOffsets(data, offsetType.Bytes(), valueRanges, mem) - if err != nil { - return err - } - - // Concatenate the sizes - sizeBuffers := gatherBuffersFixedWidthType(data, 2, offsetType) - sizeBuffer := concatBuffers(sizeBuffers, mem) - - out.childData = []arrow.ArrayData{values} - out.buffers[1] = offsetBuffer - out.buffers[2] = sizeBuffer - - // To make sure the sizes don't reference values that are not in the new - // concatenated values array, we zero the sizes of null list-view values. - if offsetType.ID() == arrow.INT32 { - zeroNullListViewSizes[int32](out) - } else { - zeroNullListViewSizes[int64](out) - } - - return nil -} - -// concat is the implementation for actually performing the concatenation of the arrow.ArrayData -// objects that we can call internally for nested types. -func concat(data []arrow.ArrayData, mem memory.Allocator) (arr arrow.ArrayData, err error) { - out := &Data{refCount: 1, dtype: data[0].DataType(), nulls: 0} - defer func() { - if pErr := recover(); pErr != nil { - err = utils.FormatRecoveredError("arrow/concat", pErr) - } - if err != nil { - out.Release() - } - }() - for _, d := range data { - out.length += d.Len() - if out.nulls == UnknownNullCount || d.NullN() == UnknownNullCount { - out.nulls = UnknownNullCount - continue - } - out.nulls += d.NullN() - } - - out.buffers = make([]*memory.Buffer, len(data[0].Buffers())) - if out.nulls != 0 && out.dtype.ID() != arrow.NULL { - bm, err := concatBitmaps(gatherBitmaps(data, 0), mem) - if err != nil { - return nil, err - } - out.buffers[0] = bm - } - - dt := out.dtype - if dt.ID() == arrow.EXTENSION { - dt = dt.(arrow.ExtensionType).StorageType() - } - - switch dt := dt.(type) { - case *arrow.NullType: - case *arrow.BooleanType: - bm, err := concatBitmaps(gatherBitmaps(data, 1), mem) - if err != nil { - return nil, err - } - out.buffers[1] = bm - case *arrow.DictionaryType: - idxType := dt.IndexType.(arrow.FixedWidthDataType) - // two cases: all dictionaries are the same or we need to unify them - dictsSame := true - dict0 := MakeFromData(data[0].Dictionary()) - defer dict0.Release() - for _, d := range data { - dict := MakeFromData(d.Dictionary()) - if !Equal(dict0, dict) { - dict.Release() - dictsSame = false - break - } - dict.Release() - } - - indexBuffers := gatherBuffersFixedWidthType(data, 1, idxType) - if dictsSame { - out.dictionary = dict0.Data().(*Data) - out.dictionary.Retain() - out.buffers[1] = concatBuffers(indexBuffers, mem) - break - } - - indexLookup, unifiedDict, err := unifyDictionaries(mem, data, dt) - if err != nil { - return nil, err - } - defer unifiedDict.Release() - out.dictionary = unifiedDict.Data().(*Data) - out.dictionary.Retain() - - out.buffers[1], err = concatDictIndices(mem, data, idxType, indexLookup) - if err != nil { - return nil, err - } - case arrow.FixedWidthDataType: - out.buffers[1] = concatBuffers(gatherBuffersFixedWidthType(data, 1, dt), mem) - case arrow.BinaryViewDataType: - out.buffers = out.buffers[:2] - for _, d := range data { - for _, buf := range d.Buffers()[2:] { - buf.Retain() - out.buffers = append(out.buffers, buf) - } - } - - out.buffers[1] = concatBuffers(gatherFixedBuffers(data, 1, arrow.ViewHeaderSizeBytes), mem) - - var ( - s = arrow.ViewHeaderTraits.CastFromBytes(out.buffers[1].Bytes()) - i = data[0].Len() - precedingBufsCount int - ) - - for idx := 1; idx < len(data); idx++ { - precedingBufsCount += len(data[idx-1].Buffers()) - 2 - - for end := i + data[idx].Len(); i < end; i++ { - if s[i].IsInline() { - continue - } - - bufIndex := s[i].BufferIndex() + int32(precedingBufsCount) - s[i].SetIndexOffset(bufIndex, s[i].BufferOffset()) - } - } - case arrow.BinaryDataType: - offsetWidth := dt.Layout().Buffers[1].ByteWidth - offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) - if err != nil { - return nil, err - } - out.buffers[1] = offsetBuffer - out.buffers[2] = concatBuffers(gatherBufferRanges(data, 2, valueRanges), mem) - case *arrow.ListType: - offsetWidth := dt.Layout().Buffers[1].ByteWidth - offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) - if err != nil { - return nil, err - } - childData := gatherChildrenRanges(data, 0, valueRanges) - for _, c := range childData { - defer c.Release() - } - - out.buffers[1] = offsetBuffer - out.childData = make([]arrow.ArrayData, 1) - out.childData[0], err = concat(childData, mem) - if err != nil { - return nil, err - } - case *arrow.LargeListType: - offsetWidth := dt.Layout().Buffers[1].ByteWidth - offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) - if err != nil { - return nil, err - } - childData := gatherChildrenRanges(data, 0, valueRanges) - for _, c := range childData { - defer c.Release() - } - - out.buffers[1] = offsetBuffer - out.childData = make([]arrow.ArrayData, 1) - out.childData[0], err = concat(childData, mem) - if err != nil { - return nil, err - } - case *arrow.ListViewType: - offsetType := arrow.PrimitiveTypes.Int32.(arrow.FixedWidthDataType) - err := concatListView(data, offsetType, out, mem) - if err != nil { - return nil, err - } - case *arrow.LargeListViewType: - offsetType := arrow.PrimitiveTypes.Int64.(arrow.FixedWidthDataType) - err := concatListView(data, offsetType, out, mem) - if err != nil { - return nil, err - } - case *arrow.FixedSizeListType: - childData := gatherChildrenMultiplier(data, 0, int(dt.Len())) - for _, c := range childData { - defer c.Release() - } - - children, err := concat(childData, mem) - if err != nil { - return nil, err - } - out.childData = []arrow.ArrayData{children} - case *arrow.StructType: - out.childData = make([]arrow.ArrayData, dt.NumFields()) - for i := range dt.Fields() { - children := gatherChildren(data, i) - for _, c := range children { - defer c.Release() - } - - childData, err := concat(children, mem) - if err != nil { - return nil, err - } - out.childData[i] = childData - } - case *arrow.MapType: - offsetWidth := dt.Layout().Buffers[1].ByteWidth - offsetBuffer, valueRanges, err := concatOffsets(gatherFixedBuffers(data, 1, offsetWidth), offsetWidth, mem) - if err != nil { - return nil, err - } - childData := gatherChildrenRanges(data, 0, valueRanges) - for _, c := range childData { - defer c.Release() - } - - out.buffers[1] = offsetBuffer - out.childData = make([]arrow.ArrayData, 1) - out.childData[0], err = concat(childData, mem) - if err != nil { - return nil, err - } - case *arrow.RunEndEncodedType: - physicalLength, overflow := int(0), false - // we can't use gatherChildren because the Offset and Len of - // data doesn't correspond to the physical length or offset - runs := make([]arrow.ArrayData, len(data)) - values := make([]arrow.ArrayData, len(data)) - for i, d := range data { - plen := encoded.GetPhysicalLength(d) - off := encoded.FindPhysicalOffset(d) - - runs[i] = NewSliceData(d.Children()[0], int64(off), int64(off+plen)) - defer runs[i].Release() - values[i] = NewSliceData(d.Children()[1], int64(off), int64(off+plen)) - defer values[i].Release() - - physicalLength, overflow = addOvf(physicalLength, plen) - if overflow { - return nil, fmt.Errorf("%w: run end encoded array length must fit into a 32-bit signed integer", - arrow.ErrInvalid) - } - } - - runEndsByteWidth := runs[0].DataType().(arrow.FixedWidthDataType).Bytes() - runEndsBuffers := gatherFixedBuffers(runs, 1, runEndsByteWidth) - outRunEndsLen := physicalLength * runEndsByteWidth - outRunEndsBuf := memory.NewResizableBuffer(mem) - outRunEndsBuf.Resize(outRunEndsLen) - defer outRunEndsBuf.Release() - - if err := updateRunEnds(runEndsByteWidth, data, runEndsBuffers, outRunEndsBuf); err != nil { - return nil, err - } - - out.childData = make([]arrow.ArrayData, 2) - out.childData[0] = NewData(data[0].Children()[0].DataType(), int(physicalLength), - []*memory.Buffer{nil, outRunEndsBuf}, nil, 0, 0) - - var err error - out.childData[1], err = concat(values, mem) - if err != nil { - out.childData[0].Release() - return nil, err - } - default: - return nil, fmt.Errorf("concatenate not implemented for type %s", dt) - } - - return out, nil -} - -// check overflow in the addition, taken from bits.Add but adapted for signed integers -// rather than unsigned integers. bits.UintSize will be either 32 or 64 based on -// whether our architecture is 32 bit or 64. The operation is the same for both cases, -// the only difference is how much we need to shift by 30 for 32 bit and 62 for 64 bit. -// Thus, bits.UintSize - 2 is how much we shift right by to check if we had an overflow -// in the signed addition. -// -// First return is the result of the sum, the second return is true if there was an overflow -func addOvf(x, y int) (int, bool) { - sum := x + y - return sum, ((x&y)|((x|y)&^sum))>>(bits.UintSize-2) == 1 -} - -// concatenate bitmaps together and return a buffer with the combined bitmaps -func concatBitmaps(bitmaps []bitmap, mem memory.Allocator) (*memory.Buffer, error) { - var ( - outlen int - overflow bool - ) - - for _, bm := range bitmaps { - if outlen, overflow = addOvf(outlen, bm.rng.len); overflow { - return nil, errors.New("length overflow when concatenating arrays") - } - } - - out := memory.NewResizableBuffer(mem) - out.Resize(int(bitutil.BytesForBits(int64(outlen)))) - dst := out.Bytes() - - offset := 0 - for _, bm := range bitmaps { - if bm.data == nil { // if the bitmap is nil, that implies that the value is true for all elements - bitutil.SetBitsTo(out.Bytes(), int64(offset), int64(bm.rng.len), true) - } else { - bitutil.CopyBitmap(bm.data, bm.rng.offset, bm.rng.len, dst, offset) - } - offset += bm.rng.len - } - return out, nil -} - -func updateRunEnds(byteWidth int, inputData []arrow.ArrayData, inputBuffers []*memory.Buffer, outputBuffer *memory.Buffer) error { - switch byteWidth { - case 2: - out := arrow.Int16Traits.CastFromBytes(outputBuffer.Bytes()) - return updateRunsInt16(inputData, inputBuffers, out) - case 4: - out := arrow.Int32Traits.CastFromBytes(outputBuffer.Bytes()) - return updateRunsInt32(inputData, inputBuffers, out) - case 8: - out := arrow.Int64Traits.CastFromBytes(outputBuffer.Bytes()) - return updateRunsInt64(inputData, inputBuffers, out) - } - return fmt.Errorf("%w: invalid dataType for RLE runEnds", arrow.ErrInvalid) -} - -func updateRunsInt16(inputData []arrow.ArrayData, inputBuffers []*memory.Buffer, output []int16) error { - // for now we will not attempt to optimize by checking if we - // can fold the end and beginning of each array we're concatenating - // into a single run - pos := 0 - for i, buf := range inputBuffers { - if buf.Len() == 0 { - continue - } - src := arrow.Int16Traits.CastFromBytes(buf.Bytes()) - if pos == 0 { - pos += copy(output, src) - continue - } - - lastEnd := output[pos-1] - // we can check the last runEnd in the src and add it to the - // last value that we're adjusting them all by to see if we - // are going to overflow - if int64(lastEnd)+int64(int(src[len(src)-1])-inputData[i].Offset()) > math.MaxInt16 { - return fmt.Errorf("%w: overflow in run-length-encoded run ends concat", arrow.ErrInvalid) - } - - // adjust all of the run ends by first normalizing them (e - data[i].offset) - // then adding the previous value we ended on. Since the offset - // is a logical length offset it should be accurate to just subtract - // it from each value. - for j, e := range src { - output[pos+j] = lastEnd + int16(int(e)-inputData[i].Offset()) - } - pos += len(src) - } - return nil -} - -func updateRunsInt32(inputData []arrow.ArrayData, inputBuffers []*memory.Buffer, output []int32) error { - // for now we will not attempt to optimize by checking if we - // can fold the end and beginning of each array we're concatenating - // into a single run - pos := 0 - for i, buf := range inputBuffers { - if buf.Len() == 0 { - continue - } - src := arrow.Int32Traits.CastFromBytes(buf.Bytes()) - if pos == 0 { - pos += copy(output, src) - continue - } - - lastEnd := output[pos-1] - // we can check the last runEnd in the src and add it to the - // last value that we're adjusting them all by to see if we - // are going to overflow - if int64(lastEnd)+int64(int(src[len(src)-1])-inputData[i].Offset()) > math.MaxInt32 { - return fmt.Errorf("%w: overflow in run-length-encoded run ends concat", arrow.ErrInvalid) - } - - // adjust all of the run ends by first normalizing them (e - data[i].offset) - // then adding the previous value we ended on. Since the offset - // is a logical length offset it should be accurate to just subtract - // it from each value. - for j, e := range src { - output[pos+j] = lastEnd + int32(int(e)-inputData[i].Offset()) - } - pos += len(src) - } - return nil -} - -func updateRunsInt64(inputData []arrow.ArrayData, inputBuffers []*memory.Buffer, output []int64) error { - // for now we will not attempt to optimize by checking if we - // can fold the end and beginning of each array we're concatenating - // into a single run - pos := 0 - for i, buf := range inputBuffers { - if buf.Len() == 0 { - continue - } - src := arrow.Int64Traits.CastFromBytes(buf.Bytes()) - if pos == 0 { - pos += copy(output, src) - continue - } - - lastEnd := output[pos-1] - // we can check the last runEnd in the src and add it to the - // last value that we're adjusting them all by to see if we - // are going to overflow - if uint64(lastEnd)+uint64(int(src[len(src)-1])-inputData[i].Offset()) > math.MaxInt64 { - return fmt.Errorf("%w: overflow in run-length-encoded run ends concat", arrow.ErrInvalid) - } - - // adjust all of the run ends by first normalizing them (e - data[i].offset) - // then adding the previous value we ended on. Since the offset - // is a logical length offset it should be accurate to just subtract - // it from each value. - for j, e := range src { - output[pos+j] = lastEnd + e - int64(inputData[i].Offset()) - } - pos += len(src) - } - return nil -} diff --git a/go/arrow/array/concat_test.go b/go/arrow/array/concat_test.go deleted file mode 100644 index 7e6a3c08efd5c..0000000000000 --- a/go/arrow/array/concat_test.go +++ /dev/null @@ -1,789 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "math" - "sort" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" - "golang.org/x/exp/rand" -) - -func TestConcatenateValueBuffersNull(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - inputs := make([]arrow.Array, 0) - - bldr := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary) - defer bldr.Release() - - arr := bldr.NewArray() - defer arr.Release() - inputs = append(inputs, arr) - - bldr.AppendNull() - arr = bldr.NewArray() - defer arr.Release() - inputs = append(inputs, arr) - - actual, err := array.Concatenate(inputs, mem) - assert.NoError(t, err) - defer actual.Release() - - assert.True(t, array.Equal(actual, inputs[1])) -} - -func TestConcatenate(t *testing.T) { - tests := []struct { - dt arrow.DataType - }{ - {arrow.FixedWidthTypes.Boolean}, - {arrow.PrimitiveTypes.Int8}, - {arrow.PrimitiveTypes.Uint8}, - {arrow.PrimitiveTypes.Int16}, - {arrow.PrimitiveTypes.Uint16}, - {arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Uint32}, - {arrow.PrimitiveTypes.Int64}, - {arrow.PrimitiveTypes.Uint64}, - {arrow.PrimitiveTypes.Float32}, - {arrow.PrimitiveTypes.Float64}, - {arrow.BinaryTypes.String}, - {arrow.BinaryTypes.LargeString}, - {arrow.ListOf(arrow.PrimitiveTypes.Int8)}, - {arrow.LargeListOf(arrow.PrimitiveTypes.Int8)}, - {arrow.ListViewOf(arrow.PrimitiveTypes.Int8)}, - {arrow.LargeListViewOf(arrow.PrimitiveTypes.Int8)}, - {arrow.FixedSizeListOf(3, arrow.PrimitiveTypes.Int8)}, - {arrow.StructOf()}, - {arrow.MapOf(arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8)}, - {&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: arrow.PrimitiveTypes.Float64}}, - {arrow.BinaryTypes.StringView}, - } - - for _, tt := range tests { - t.Run(tt.dt.Name(), func(t *testing.T) { - suite.Run(t, &ConcatTestSuite{ - seed: 0xdeadbeef, - dt: tt.dt, - nullProbs: []float64{0.0, 0.1, 0.5, 0.9, 1.0}, - sizes: []int32{0, 1, 2, 4, 16, 31, 1234}, - }) - }) - } -} - -type ConcatTestSuite struct { - suite.Suite - - seed uint64 - rng gen.RandomArrayGenerator - dt arrow.DataType - - nullProbs []float64 - sizes []int32 - - mem *memory.CheckedAllocator -} - -func (cts *ConcatTestSuite) SetupSuite() { - cts.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - cts.rng = gen.NewRandomArrayGenerator(cts.seed, cts.mem) -} - -func (cts *ConcatTestSuite) TearDownSuite() { - cts.mem.AssertSize(cts.T(), 0) -} - -func (cts *ConcatTestSuite) generateArr(size int64, nullprob float64) arrow.Array { - switch cts.dt.ID() { - case arrow.BOOL: - return cts.rng.Boolean(size, 0.5, nullprob) - case arrow.INT8: - return cts.rng.Int8(size, 0, 127, nullprob) - case arrow.UINT8: - return cts.rng.Uint8(size, 0, 127, nullprob) - case arrow.INT16: - return cts.rng.Int16(size, 0, 127, nullprob) - case arrow.UINT16: - return cts.rng.Uint16(size, 0, 127, nullprob) - case arrow.INT32: - return cts.rng.Int32(size, 0, 127, nullprob) - case arrow.UINT32: - return cts.rng.Uint32(size, 0, 127, nullprob) - case arrow.INT64: - return cts.rng.Int64(size, 0, 127, nullprob) - case arrow.UINT64: - return cts.rng.Uint64(size, 0, 127, nullprob) - case arrow.FLOAT32: - return cts.rng.Float32(size, 0, 127, nullprob) - case arrow.FLOAT64: - return cts.rng.Float64(size, 0, 127, nullprob) - case arrow.NULL: - return array.NewNull(int(size)) - case arrow.STRING: - return cts.rng.String(size, 0, 15, nullprob) - case arrow.LARGE_STRING: - return cts.rng.LargeString(size, 0, 15, nullprob) - case arrow.STRING_VIEW: - return cts.rng.StringView(size, 0, 20, nullprob) - case arrow.LIST: - valuesSize := size * 4 - values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) - defer values.Release() - offsetsVector := cts.offsets(int32(valuesSize), int32(size)) - // ensure the first and last offsets encompass the whole values - offsetsVector[0] = 0 - offsetsVector[len(offsetsVector)-1] = int32(valuesSize) - - bldr := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) - defer bldr.Release() - - valid := make([]bool, len(offsetsVector)-1) - for i := range valid { - valid[i] = true - } - bldr.AppendValues(offsetsVector, valid) - vb := bldr.ValueBuilder().(*array.Int8Builder) - for i := 0; i < values.Len(); i++ { - if values.IsValid(i) { - vb.Append(values.Value(i)) - } else { - vb.AppendNull() - } - } - return bldr.NewArray() - case arrow.LARGE_LIST: - valuesSize := size * 8 - values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) - defer values.Release() - offsetsVector := cts.largeoffsets(int64(valuesSize), int32(size)) - // ensure the first and last offsets encompass the whole values - offsetsVector[0] = 0 - offsetsVector[len(offsetsVector)-1] = int64(valuesSize) - - bldr := array.NewLargeListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) - defer bldr.Release() - - valid := make([]bool, len(offsetsVector)-1) - for i := range valid { - valid[i] = true - } - bldr.AppendValues(offsetsVector, valid) - vb := bldr.ValueBuilder().(*array.Int8Builder) - for i := 0; i < values.Len(); i++ { - if values.IsValid(i) { - vb.Append(values.Value(i)) - } else { - vb.AppendNull() - } - } - return bldr.NewArray() - case arrow.LIST_VIEW: - arr := cts.rng.ListView(cts.dt.(arrow.VarLenListLikeType), size, 0, 20, nullprob) - err := arr.ValidateFull() - cts.NoError(err) - return arr - case arrow.LARGE_LIST_VIEW: - arr := cts.rng.LargeListView(cts.dt.(arrow.VarLenListLikeType), size, 0, 20, nullprob) - err := arr.ValidateFull() - cts.NoError(err) - return arr - case arrow.FIXED_SIZE_LIST: - const listsize = 3 - valuesSize := size * listsize - values := cts.rng.Int8(valuesSize, 0, 127, nullprob) - defer values.Release() - - data := array.NewData(arrow.FixedSizeListOf(listsize, arrow.PrimitiveTypes.Int8), int(size), []*memory.Buffer{nil}, []arrow.ArrayData{values.Data()}, 0, 0) - defer data.Release() - return array.MakeFromData(data) - case arrow.STRUCT: - foo := cts.rng.Int8(size, 0, 127, nullprob) - defer foo.Release() - bar := cts.rng.Float64(size, 0, 127, nullprob) - defer bar.Release() - baz := cts.rng.Boolean(size, 0.5, nullprob) - defer baz.Release() - - data := array.NewData(arrow.StructOf( - arrow.Field{Name: "foo", Type: foo.DataType(), Nullable: true}, - arrow.Field{Name: "bar", Type: bar.DataType(), Nullable: true}, - arrow.Field{Name: "baz", Type: baz.DataType(), Nullable: true}), - int(size), []*memory.Buffer{nil}, []arrow.ArrayData{foo.Data(), bar.Data(), baz.Data()}, 0, 0) - defer data.Release() - return array.NewStructData(data) - case arrow.MAP: - valuesSize := size * 4 - keys := cts.rng.Uint16(valuesSize, 0, 127, 0).(*array.Uint16) - defer keys.Release() - values := cts.rng.Int8(valuesSize, 0, 127, nullprob).(*array.Int8) - defer values.Release() - - offsetsVector := cts.offsets(int32(valuesSize), int32(size)) - offsetsVector[0] = 0 - offsetsVector[len(offsetsVector)-1] = int32(valuesSize) - - bldr := array.NewMapBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int8, false) - defer bldr.Release() - - kb := bldr.KeyBuilder().(*array.Uint16Builder) - vb := bldr.ItemBuilder().(*array.Int8Builder) - - valid := make([]bool, len(offsetsVector)-1) - for i := range valid { - valid[i] = true - } - bldr.AppendValues(offsetsVector, valid) - for i := 0; i < int(valuesSize); i++ { - kb.Append(keys.Value(i)) - if values.IsValid(i) { - vb.Append(values.Value(i)) - } else { - vb.AppendNull() - } - } - return bldr.NewArray() - case arrow.DICTIONARY: - indices := cts.rng.Int32(size, 0, 127, nullprob) - defer indices.Release() - dict := cts.rng.Float64(128, 0.0, 127.0, nullprob) - defer dict.Release() - return array.NewDictionaryArray(cts.dt, indices, dict) - default: - return nil - } -} - -func (cts *ConcatTestSuite) slices(arr arrow.Array, offsets []int32) []arrow.Array { - slices := make([]arrow.Array, len(offsets)-1) - for i := 0; i != len(slices); i++ { - slices[i] = array.NewSlice(arr, int64(offsets[i]), int64(offsets[i+1])) - } - return slices -} - -func (cts *ConcatTestSuite) checkTrailingBitsZeroed(bitmap *memory.Buffer, length int64) { - if preceding := bitutil.PrecedingBitmask[length%8]; preceding != 0 { - lastByte := bitmap.Bytes()[length/8] - cts.Equal(lastByte&preceding, lastByte, length, preceding) - } -} - -func (cts *ConcatTestSuite) offsets(length, slicecount int32) []int32 { - offsets := make([]int32, slicecount+1) - dist := rand.New(rand.NewSource(cts.seed)) - for i := range offsets { - offsets[i] = dist.Int31n(length + 1) - } - sort.Slice(offsets, func(i, j int) bool { return offsets[i] < offsets[j] }) - return offsets -} - -func (cts *ConcatTestSuite) largeoffsets(length int64, slicecount int32) []int64 { - offsets := make([]int64, slicecount+1) - dist := rand.New(rand.NewSource(cts.seed)) - for i := range offsets { - offsets[i] = dist.Int63n(length + 1) - } - sort.Slice(offsets, func(i, j int) bool { return offsets[i] < offsets[j] }) - return offsets -} - -func (cts *ConcatTestSuite) TestCheckConcat() { - for _, sz := range cts.sizes { - cts.Run(fmt.Sprintf("size %d", sz), func() { - offsets := cts.offsets(sz, 3) - for _, np := range cts.nullProbs { - cts.Run(fmt.Sprintf("nullprob %0.2f", np), func() { - scopedMem := memory.NewCheckedAllocatorScope(cts.mem) - defer scopedMem.CheckSize(cts.T()) - - arr := cts.generateArr(int64(sz), np) - defer arr.Release() - expected := array.NewSlice(arr, int64(offsets[0]), int64(offsets[len(offsets)-1])) - defer expected.Release() - - slices := cts.slices(arr, offsets) - for _, s := range slices { - if s.DataType().ID() == arrow.LIST_VIEW { - err := s.(*array.ListView).ValidateFull() - cts.NoError(err) - } - defer s.Release() - } - - actual, err := array.Concatenate(slices, cts.mem) - cts.NoError(err) - if arr.DataType().ID() == arrow.LIST_VIEW { - lv := actual.(*array.ListView) - err := lv.ValidateFull() - cts.NoError(err) - } - defer actual.Release() - - cts.Truef(array.Equal(expected, actual), "expected: %s\ngot: %s\n", expected, actual) - if len(actual.Data().Buffers()) > 0 { - if actual.Data().Buffers()[0] != nil { - cts.checkTrailingBitsZeroed(actual.Data().Buffers()[0], int64(actual.Len())) - } - if actual.DataType().ID() == arrow.BOOL { - cts.checkTrailingBitsZeroed(actual.Data().Buffers()[1], int64(actual.Len())) - } - } - }) - } - }) - } -} - -func TestConcatDifferentDicts(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - t.Run("simple dicts", func(t *testing.T) { - scopedMem := memory.NewCheckedAllocatorScope(mem) - defer scopedMem.CheckSize(t) - - dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String} - dict1, err := array.DictArrayFromJSON(mem, dictType, `[1, 2, null, 3, 0]`, `["A0", "A1", "A2", "A3"]`) - require.NoError(t, err) - defer dict1.Release() - dict2, err := array.DictArrayFromJSON(mem, dictType, `[null, 4, 2, 1]`, `["B0", "B1", "B2", "B3", "B4"]`) - require.NoError(t, err) - defer dict2.Release() - - expected, err := array.DictArrayFromJSON(mem, dictType, `[1, 2, null, 3, 0, null, 8, 6, 5]`, `["A0", "A1", "A2", "A3", "B0", "B1", "B2", "B3", "B4"]`) - require.NoError(t, err) - defer expected.Release() - - concat, err := array.Concatenate([]arrow.Array{dict1, dict2}, mem) - assert.NoError(t, err) - defer concat.Release() - assert.Truef(t, array.Equal(concat, expected), "got: %s, expected: %s", concat, expected) - }) - - t.Run("larger", func(t *testing.T) { - scopedMem := memory.NewCheckedAllocatorScope(mem) - defer scopedMem.CheckSize(t) - - const size = 500 - dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.BinaryTypes.String} - - idxBuilder, exIdxBldr := array.NewUint16Builder(mem), array.NewUint16Builder(mem) - defer idxBuilder.Release() - defer exIdxBldr.Release() - idxBuilder.Reserve(size) - exIdxBldr.Reserve(size * 2) - - for i := uint16(0); i < size; i++ { - idxBuilder.UnsafeAppend(i) - exIdxBldr.UnsafeAppend(i) - } - for i := uint16(size); i < 2*size; i++ { - exIdxBldr.UnsafeAppend(i) - } - - indices, expIndices := idxBuilder.NewArray(), exIdxBldr.NewArray() - defer indices.Release() - defer expIndices.Release() - - // create three dictionaries. First maps i -> "{i}", second maps i->"{500+i}", - // each for 500 values and the third maps i -> "{i}" but for 1000 values. - // first and second concatenated should end up equaling the third. All strings - // padded to length 8 so we can know the size ahead of time. - valuesOneBldr, valuesTwoBldr := array.NewStringBuilder(mem), array.NewStringBuilder(mem) - defer valuesOneBldr.Release() - defer valuesTwoBldr.Release() - - valuesOneBldr.Reserve(size) - valuesTwoBldr.Reserve(size) - valuesOneBldr.ReserveData(size * 8) - valuesTwoBldr.ReserveData(size * 8) - - for i := 0; i < size; i++ { - valuesOneBldr.Append(fmt.Sprintf("%-8d", i)) - valuesTwoBldr.Append(fmt.Sprintf("%-8d", i+size)) - } - - dict1, dict2 := valuesOneBldr.NewArray(), valuesTwoBldr.NewArray() - defer dict1.Release() - defer dict2.Release() - expectedDict, err := array.Concatenate([]arrow.Array{dict1, dict2}, mem) - require.NoError(t, err) - defer expectedDict.Release() - - one, two := array.NewDictionaryArray(dictType, indices, dict1), array.NewDictionaryArray(dictType, indices, dict2) - defer one.Release() - defer two.Release() - expected := array.NewDictionaryArray(dictType, expIndices, expectedDict) - defer expected.Release() - - combined, err := array.Concatenate([]arrow.Array{one, two}, mem) - assert.NoError(t, err) - defer combined.Release() - assert.Truef(t, array.Equal(combined, expected), "got: %s, expected: %s", combined, expected) - }) -} - -func TestConcatDictionaryPartialOverlap(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String} - dictOne, err := array.DictArrayFromJSON(mem, dt, `[1, 2, null, 3, 0]`, `["A0", "A1", "C2", "C3"]`) - require.NoError(t, err) - defer dictOne.Release() - - dictTwo, err := array.DictArrayFromJSON(mem, dt, `[null, 4, 2, 1]`, `["B0", "B1", "C2", "C3", "B4"]`) - require.NoError(t, err) - defer dictTwo.Release() - - expected, err := array.DictArrayFromJSON(mem, dt, `[1, 2, null, 3, 0, null, 6, 2, 5]`, `["A0", "A1", "C2", "C3", "B0", "B1", "B4"]`) - require.NoError(t, err) - defer expected.Release() - - actual, err := array.Concatenate([]arrow.Array{dictOne, dictTwo}, mem) - assert.NoError(t, err) - defer actual.Release() - - assert.Truef(t, array.Equal(actual, expected), "got: %s, expected: %s", actual, expected) -} - -func TestConcatDictionaryDifferentSizeIndex(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String} - biggerDt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.BinaryTypes.String} - dictOne, err := array.DictArrayFromJSON(mem, dt, `[0]`, `["A0"]`) - require.NoError(t, err) - defer dictOne.Release() - - dictTwo, err := array.DictArrayFromJSON(mem, biggerDt, `[0]`, `["B0"]`) - require.NoError(t, err) - defer dictTwo.Release() - - arr, err := array.Concatenate([]arrow.Array{dictOne, dictTwo}, mem) - assert.Nil(t, arr) - assert.Error(t, err) -} - -func TestConcatDictionaryUnifyNullInDict(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String} - dictOne, err := array.DictArrayFromJSON(mem, dt, `[0, 1]`, `[null, "A"]`) - require.NoError(t, err) - defer dictOne.Release() - - dictTwo, err := array.DictArrayFromJSON(mem, dt, `[0, 1]`, `[null, "B"]`) - require.NoError(t, err) - defer dictTwo.Release() - - expected, err := array.DictArrayFromJSON(mem, dt, `[0, 1, 0, 2]`, `[null, "A", "B"]`) - require.NoError(t, err) - defer expected.Release() - - actual, err := array.Concatenate([]arrow.Array{dictOne, dictTwo}, mem) - assert.NoError(t, err) - defer actual.Release() - - assert.Truef(t, array.Equal(actual, expected), "got: %s, expected: %s", actual, expected) -} - -func TestConcatDictionaryEnlargedIndices(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - const size = math.MaxUint8 + 1 - dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.PrimitiveTypes.Uint16} - - idxBuilder := array.NewUint8Builder(mem) - defer idxBuilder.Release() - idxBuilder.Reserve(size) - for i := 0; i < size; i++ { - idxBuilder.UnsafeAppend(uint8(i)) - } - indices := idxBuilder.NewUint8Array() - defer indices.Release() - - valuesBuilder := array.NewUint16Builder(mem) - defer valuesBuilder.Release() - valuesBuilder.Reserve(size) - valuesBuilderTwo := array.NewUint16Builder(mem) - defer valuesBuilderTwo.Release() - valuesBuilderTwo.Reserve(size) - - for i := uint16(0); i < size; i++ { - valuesBuilder.UnsafeAppend(i) - valuesBuilderTwo.UnsafeAppend(i + size) - } - - dict1, dict2 := valuesBuilder.NewUint16Array(), valuesBuilderTwo.NewUint16Array() - defer dict1.Release() - defer dict2.Release() - - d1, d2 := array.NewDictionaryArray(dt, indices, dict1), array.NewDictionaryArray(dt, indices, dict2) - defer d1.Release() - defer d2.Release() - - _, err := array.Concatenate([]arrow.Array{d1, d2}, mem) - assert.Error(t, err) - - biggerDt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.PrimitiveTypes.Uint16} - bigger1, bigger2 := array.NewDictionaryArray(biggerDt, dict1, dict1), array.NewDictionaryArray(biggerDt, dict1, dict2) - defer bigger1.Release() - defer bigger2.Release() - - combined, err := array.Concatenate([]arrow.Array{bigger1, bigger2}, mem) - assert.NoError(t, err) - defer combined.Release() - - assert.EqualValues(t, size*2, combined.Len()) -} - -func TestConcatDictionaryNullSlots(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dt := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: arrow.BinaryTypes.String} - dict1, err := array.DictArrayFromJSON(mem, dt, `[null, null, null, null]`, `[]`) - require.NoError(t, err) - defer dict1.Release() - - dict2, err := array.DictArrayFromJSON(mem, dt, `[null, null, null, null, 0, 1]`, `["a", "b"]`) - require.NoError(t, err) - defer dict2.Release() - - expected, err := array.DictArrayFromJSON(mem, dt, `[null, null, null, null, null, null, null, null, 0, 1]`, `["a", "b"]`) - require.NoError(t, err) - defer expected.Release() - - actual, err := array.Concatenate([]arrow.Array{dict1, dict2}, mem) - assert.NoError(t, err) - defer actual.Release() - - assert.Truef(t, array.Equal(actual, expected), "got: %s, expected: %s", actual, expected) -} - -func TestConcatRunEndEncoded(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - tests := []struct { - offsetType arrow.DataType - expected interface{} - }{ - {arrow.PrimitiveTypes.Int16, []int16{1, 11, 111, 211, 311, 411, 500, 600}}, - {arrow.PrimitiveTypes.Int32, []int32{1, 11, 111, 211, 311, 411, 500, 600}}, - {arrow.PrimitiveTypes.Int64, []int64{1, 11, 111, 211, 311, 411, 500, 600}}, - } - - for _, tt := range tests { - t.Run(tt.offsetType.String(), func(t *testing.T) { - - arrs := make([]arrow.Array, 0) - bldr := array.NewRunEndEncodedBuilder(mem, tt.offsetType, arrow.BinaryTypes.String) - defer bldr.Release() - valBldr := bldr.ValueBuilder().(*array.StringBuilder) - - bldr.Append(1) - valBldr.Append("Hello") - bldr.AppendNull() - bldr.ContinueRun(9) - - bldr.Append(100) - valBldr.Append("World") - arrs = append(arrs, bldr.NewArray()) - - bldr.Append(100) - valBldr.Append("Goku") - bldr.Append(100) - valBldr.Append("Gohan") - bldr.Append(100) - valBldr.Append("Goten") - arrs = append(arrs, bldr.NewArray()) - - bldr.AppendNull() - bldr.ContinueRun(99) - bldr.Append(100) - valBldr.Append("Vegeta") - bldr.Append(100) - valBldr.Append("Trunks") - next := bldr.NewArray() - defer next.Release() - // remove the initial null with an offset and dig into the next run - arrs = append(arrs, array.NewSlice(next, 111, int64(next.Len()))) - - for _, a := range arrs { - defer a.Release() - } - - result, err := array.Concatenate(arrs, mem) - assert.NoError(t, err) - defer result.Release() - - rle := result.(*array.RunEndEncoded) - assert.EqualValues(t, 8, rle.GetPhysicalLength()) - assert.EqualValues(t, 0, rle.GetPhysicalOffset()) - - var values interface{} - switch endsArr := rle.RunEndsArr().(type) { - case *array.Int16: - values = endsArr.Int16Values() - case *array.Int32: - values = endsArr.Int32Values() - case *array.Int64: - values = endsArr.Int64Values() - } - assert.Equal(t, tt.expected, values) - - expectedValues, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, - strings.NewReader(`["Hello", null, "World", "Goku", "Gohan", "Goten", "Vegeta", "Trunks"]`)) - defer expectedValues.Release() - assert.Truef(t, array.Equal(expectedValues, rle.Values()), "expected: %s\ngot: %s", expectedValues, rle.Values()) - }) - } -} - -func TestConcatAlmostOverflowRunEndEncoding(t *testing.T) { - tests := []struct { - offsetType arrow.DataType - max uint64 - }{ - {arrow.PrimitiveTypes.Int16, math.MaxInt16}, - {arrow.PrimitiveTypes.Int32, math.MaxInt32}, - {arrow.PrimitiveTypes.Int64, math.MaxInt64}, - } - - for _, tt := range tests { - t.Run(tt.offsetType.String(), func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - arrs := make([]arrow.Array, 0) - bldr := array.NewRunEndEncodedBuilder(mem, tt.offsetType, arrow.BinaryTypes.String) - defer bldr.Release() - valBldr := bldr.ValueBuilder().(*array.StringBuilder) - - // max is not evenly divisible by 4, so we add one to each - // to account for that so our final concatenate will overflow - bldr.Append((tt.max / 4) + 1) - valBldr.Append("foo") - bldr.Append((tt.max / 4) + 1) - valBldr.Append("bar") - arrs = append(arrs, bldr.NewArray()) - - bldr.Append((tt.max / 4) + 1) - valBldr.Append("baz") - bldr.Append((tt.max / 4)) - valBldr.Append("bop") - arrs = append(arrs, bldr.NewArray()) - - defer func() { - for _, a := range arrs { - a.Release() - } - }() - - arr, err := array.Concatenate(arrs, mem) - assert.NoError(t, err) - defer arr.Release() - }) - } -} - -func TestConcatOverflowRunEndEncoding(t *testing.T) { - tests := []struct { - offsetType arrow.DataType - max uint64 - }{ - {arrow.PrimitiveTypes.Int16, math.MaxInt16}, - {arrow.PrimitiveTypes.Int32, math.MaxInt32}, - {arrow.PrimitiveTypes.Int64, math.MaxInt64}, - } - - for _, tt := range tests { - t.Run(tt.offsetType.String(), func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - arrs := make([]arrow.Array, 0) - bldr := array.NewRunEndEncodedBuilder(mem, tt.offsetType, arrow.BinaryTypes.String) - defer bldr.Release() - valBldr := bldr.ValueBuilder().(*array.StringBuilder) - - // max is not evenly divisible by 4, so we add one to each - // to account for that so our final concatenate will overflow - bldr.Append((tt.max / 4) + 1) - valBldr.Append("foo") - bldr.Append((tt.max / 4) + 1) - valBldr.Append("bar") - arrs = append(arrs, bldr.NewArray()) - - bldr.Append((tt.max / 4) + 1) - valBldr.Append("baz") - bldr.Append((tt.max / 4) + 1) - valBldr.Append("bop") - arrs = append(arrs, bldr.NewArray()) - - defer func() { - for _, a := range arrs { - a.Release() - } - }() - - arr, err := array.Concatenate(arrs, mem) - assert.Nil(t, arr) - assert.ErrorIs(t, err, arrow.ErrInvalid) - }) - } -} - -func TestConcatPanic(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - allocator := &panicAllocator{ - n: 400, - Allocator: mem, - } - - g := gen.NewRandomArrayGenerator(0, memory.DefaultAllocator) - ar1 := g.ArrayOf(arrow.STRING, 32, 0) - defer ar1.Release() - ar2 := g.ArrayOf(arrow.STRING, 32, 0) - defer ar2.Release() - - concat, err := array.Concatenate([]arrow.Array{ar1, ar2}, allocator) - assert.Error(t, err) - assert.Nil(t, concat) -} diff --git a/go/arrow/array/data.go b/go/arrow/array/data.go deleted file mode 100644 index 19513ebaacf50..0000000000000 --- a/go/arrow/array/data.go +++ /dev/null @@ -1,277 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "hash/maphash" - "math/bits" - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -// Data represents the memory and metadata of an Arrow array. -type Data struct { - refCount int64 - dtype arrow.DataType - nulls int - offset int - length int - - // for dictionary arrays: buffers will be the null validity bitmap and the indexes that reference - // values in the dictionary member. childData would be empty in a dictionary array - buffers []*memory.Buffer // TODO(sgc): should this be an interface? - childData []arrow.ArrayData // TODO(sgc): managed by ListArray, StructArray and UnionArray types - dictionary *Data // only populated for dictionary arrays -} - -// NewData creates a new Data. -func NewData(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []arrow.ArrayData, nulls, offset int) *Data { - for _, b := range buffers { - if b != nil { - b.Retain() - } - } - - for _, child := range childData { - if child != nil { - child.Retain() - } - } - - return &Data{ - refCount: 1, - dtype: dtype, - nulls: nulls, - length: length, - offset: offset, - buffers: buffers, - childData: childData, - } -} - -// NewDataWithDictionary creates a new data object, but also sets the provided dictionary into the data if it's not nil -func NewDataWithDictionary(dtype arrow.DataType, length int, buffers []*memory.Buffer, nulls, offset int, dict *Data) *Data { - data := NewData(dtype, length, buffers, nil, nulls, offset) - if dict != nil { - dict.Retain() - } - data.dictionary = dict - return data -} - -func (d *Data) Copy() *Data { - // don't pass the slices directly, otherwise it retains the connection - // we need to make new slices and populate them with the same pointers - bufs := make([]*memory.Buffer, len(d.buffers)) - copy(bufs, d.buffers) - children := make([]arrow.ArrayData, len(d.childData)) - copy(children, d.childData) - - data := NewData(d.dtype, d.length, bufs, children, d.nulls, d.offset) - data.SetDictionary(d.dictionary) - return data -} - -// Reset sets the Data for re-use. -func (d *Data) Reset(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []arrow.ArrayData, nulls, offset int) { - // Retain new buffers before releasing existing buffers in-case they're the same ones to prevent accidental premature - // release. - for _, b := range buffers { - if b != nil { - b.Retain() - } - } - for _, b := range d.buffers { - if b != nil { - b.Release() - } - } - d.buffers = buffers - - // Retain new children data before releasing existing children data in-case they're the same ones to prevent accidental - // premature release. - for _, d := range childData { - if d != nil { - d.Retain() - } - } - for _, d := range d.childData { - if d != nil { - d.Release() - } - } - d.childData = childData - - d.dtype = dtype - d.length = length - d.nulls = nulls - d.offset = offset -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (d *Data) Retain() { - atomic.AddInt64(&d.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (d *Data) Release() { - debug.Assert(atomic.LoadInt64(&d.refCount) > 0, "too many releases") - - if atomic.AddInt64(&d.refCount, -1) == 0 { - for _, b := range d.buffers { - if b != nil { - b.Release() - } - } - - for _, b := range d.childData { - b.Release() - } - - if d.dictionary != nil { - d.dictionary.Release() - } - d.dictionary, d.buffers, d.childData = nil, nil, nil - } -} - -// DataType returns the DataType of the data. -func (d *Data) DataType() arrow.DataType { return d.dtype } - -func (d *Data) SetNullN(n int) { d.nulls = n } - -// NullN returns the number of nulls. -func (d *Data) NullN() int { return d.nulls } - -// Len returns the length. -func (d *Data) Len() int { return d.length } - -// Offset returns the offset. -func (d *Data) Offset() int { return d.offset } - -// Buffers returns the buffers. -func (d *Data) Buffers() []*memory.Buffer { return d.buffers } - -func (d *Data) Children() []arrow.ArrayData { return d.childData } - -// Dictionary returns the ArrayData object for the dictionary member, or nil -func (d *Data) Dictionary() arrow.ArrayData { return d.dictionary } - -// SetDictionary allows replacing the dictionary for this particular Data object -func (d *Data) SetDictionary(dict arrow.ArrayData) { - if d.dictionary != nil { - d.dictionary.Release() - d.dictionary = nil - } - if dict.(*Data) != nil { - dict.Retain() - d.dictionary = dict.(*Data) - } -} - -// SizeInBytes returns the size of the Data and any children and/or dictionary in bytes by -// recursively examining the nested structures of children and/or dictionary. -// The value returned is an upper-bound since offset is not taken into account. -func (d *Data) SizeInBytes() uint64 { - var size uint64 - - if d == nil { - return 0 - } - - for _, b := range d.Buffers() { - if b != nil { - size += uint64(b.Len()) - } - } - for _, c := range d.Children() { - size += c.SizeInBytes() - } - if d.dictionary != nil { - size += d.dictionary.SizeInBytes() - } - - return size -} - -// NewSliceData returns a new slice that shares backing data with the input. -// The returned Data slice starts at i and extends j-i elements, such as: -// -// slice := data[i:j] -// -// The returned value must be Release'd after use. -// -// NewSliceData panics if the slice is outside the valid range of the input Data. -// NewSliceData panics if j < i. -func NewSliceData(data arrow.ArrayData, i, j int64) arrow.ArrayData { - if j > int64(data.Len()) || i > j || data.Offset()+int(i) > data.Offset()+data.Len() { - panic("arrow/array: index out of range") - } - - for _, b := range data.Buffers() { - if b != nil { - b.Retain() - } - } - - for _, child := range data.Children() { - if child != nil { - child.Retain() - } - } - - if data.(*Data).dictionary != nil { - data.(*Data).dictionary.Retain() - } - - o := &Data{ - refCount: 1, - dtype: data.DataType(), - nulls: UnknownNullCount, - length: int(j - i), - offset: data.Offset() + int(i), - buffers: data.Buffers(), - childData: data.Children(), - dictionary: data.(*Data).dictionary, - } - - if data.NullN() == 0 { - o.nulls = 0 - } - - return o -} - -func Hash(h *maphash.Hash, data arrow.ArrayData) { - a := data.(*Data) - - h.Write((*[bits.UintSize / 8]byte)(unsafe.Pointer(&a.length))[:]) - h.Write((*[bits.UintSize / 8]byte)(unsafe.Pointer(&a.length))[:]) - if len(a.buffers) > 0 && a.buffers[0] != nil { - h.Write(a.buffers[0].Bytes()) - } - for _, c := range a.childData { - Hash(h, c) - } -} diff --git a/go/arrow/array/data_test.go b/go/arrow/array/data_test.go deleted file mode 100644 index 2cfc64fbe2d7e..0000000000000 --- a/go/arrow/array/data_test.go +++ /dev/null @@ -1,138 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "slices" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestDataReset(t *testing.T) { - var ( - buffers1 = make([]*memory.Buffer, 0, 3) - buffers2 = make([]*memory.Buffer, 0, 3) - ) - for i := 0; i < cap(buffers1); i++ { - buffers1 = append(buffers1, memory.NewBufferBytes([]byte("some-bytes1"))) - buffers2 = append(buffers2, memory.NewBufferBytes([]byte("some-bytes2"))) - } - - data := NewData(&arrow.StringType{}, 10, buffers1, nil, 0, 0) - data.Reset(&arrow.Int64Type{}, 5, buffers2, nil, 1, 2) - - for i := 0; i < 2; i++ { - assert.Equal(t, buffers2, data.Buffers()) - assert.Equal(t, &arrow.Int64Type{}, data.DataType()) - assert.Equal(t, 1, data.NullN()) - assert.Equal(t, 2, data.Offset()) - assert.Equal(t, 5, data.Len()) - - // Make sure it works when resetting the data with its own buffers (new buffers are retained - // before old ones are released.) - data.Reset(&arrow.Int64Type{}, 5, data.Buffers(), nil, 1, 2) - } -} - -func TestSizeInBytes(t *testing.T) { - var buffers1 = make([]*memory.Buffer, 0, 3) - - for i := 0; i < cap(buffers1); i++ { - buffers1 = append(buffers1, memory.NewBufferBytes([]byte("15-bytes-buffer"))) - } - data := NewData(&arrow.StringType{}, 10, buffers1, nil, 0, 0) - var arrayData arrow.ArrayData = data - dataWithChild := NewData(&arrow.StringType{}, 10, buffers1, []arrow.ArrayData{arrayData}, 0, 0) - - buffers2 := slices.Clone(buffers1) - buffers2[0] = nil - dataWithNilBuffer := NewData(&arrow.StringType{}, 10, buffers2, nil, 0, 0) - - t.Run("nil buffers", func(t *testing.T) { - expectedSize := uint64(30) - if actualSize := dataWithNilBuffer.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("buffers only", func(t *testing.T) { - expectedSize := uint64(45) - if actualSize := data.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("buffers and child data", func(t *testing.T) { - // 45 bytes in buffers, 45 bytes in child data - expectedSize := uint64(90) - if actualSize := dataWithChild.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("buffers and nested child data", func(t *testing.T) { - var dataWithChildArrayData arrow.ArrayData = dataWithChild - var dataWithNestedChild arrow.ArrayData = NewData(&arrow.StringType{}, 10, buffers1, []arrow.ArrayData{dataWithChildArrayData}, 0, 0) - // 45 bytes in buffers, 90 bytes in nested child data - expectedSize := uint64(135) - if actualSize := dataWithNestedChild.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("buffers and dictionary", func(t *testing.T) { - dictData := data - dataWithDict := NewDataWithDictionary(&arrow.StringType{}, 10, buffers1, 0, 0, dictData) - // 45 bytes in buffers, 45 bytes in dictionary - expectedSize := uint64(90) - if actualSize := dataWithDict.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("sliced data", func(t *testing.T) { - sliceData := NewSliceData(arrayData, 3, 5) - // offset is not taken into account in SizeInBytes() - expectedSize := uint64(45) - if actualSize := sliceData.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("sliced data with children", func(t *testing.T) { - var dataWithChildArrayData arrow.ArrayData = dataWithChild - sliceData := NewSliceData(dataWithChildArrayData, 3, 5) - // offset is not taken into account in SizeInBytes() - expectedSize := uint64(90) - if actualSize := sliceData.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) - - t.Run("buffers with children which are sliced data", func(t *testing.T) { - sliceData := NewSliceData(arrayData, 3, 5) - dataWithSlicedChildren := NewData(&arrow.StringType{}, 10, buffers1, []arrow.ArrayData{sliceData}, 0, 0) - // offset is not taken into account in SizeInBytes() - expectedSize := uint64(90) - if actualSize := dataWithSlicedChildren.SizeInBytes(); actualSize != expectedSize { - t.Errorf("expected size %d, got %d", expectedSize, actualSize) - } - }) -} diff --git a/go/arrow/array/decimal128.go b/go/arrow/array/decimal128.go deleted file mode 100644 index fd9e53f7f4c06..0000000000000 --- a/go/arrow/array/decimal128.go +++ /dev/null @@ -1,368 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "math/big" - "reflect" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A type which represents an immutable sequence of 128-bit decimal values. -type Decimal128 struct { - array - - values []decimal128.Num -} - -func NewDecimal128Data(data arrow.ArrayData) *Decimal128 { - a := &Decimal128{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *Decimal128) Value(i int) decimal128.Num { return a.values[i] } - -func (a *Decimal128) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.GetOneForMarshal(i).(string) -} - -func (a *Decimal128) Values() []decimal128.Num { return a.values } - -func (a *Decimal128) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Decimal128) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Decimal128Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} -func (a *Decimal128) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - typ := a.DataType().(*arrow.Decimal128Type) - n := a.Value(i) - scale := typ.Scale - f := (&big.Float{}).SetInt(n.BigInt()) - if scale < 0 { - f.SetPrec(128).Mul(f, (&big.Float{}).SetInt(decimal128.GetScaleMultiplier(int(-scale)).BigInt())) - } else { - f.SetPrec(128).Quo(f, (&big.Float{}).SetInt(decimal128.GetScaleMultiplier(int(scale)).BigInt())) - } - return f.Text('g', int(typ.Precision)) -} - -// ["1.23", ] -func (a *Decimal128) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - return json.Marshal(vals) -} - -func arrayEqualDecimal128(left, right *Decimal128) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type Decimal128Builder struct { - builder - - dtype *arrow.Decimal128Type - data *memory.Buffer - rawData []decimal128.Num -} - -func NewDecimal128Builder(mem memory.Allocator, dtype *arrow.Decimal128Type) *Decimal128Builder { - return &Decimal128Builder{ - builder: builder{refCount: 1, mem: mem}, - dtype: dtype, - } -} - -func (b *Decimal128Builder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Decimal128Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Decimal128Builder) Append(v decimal128.Num) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Decimal128Builder) UnsafeAppend(v decimal128.Num) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Decimal128Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Decimal128Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Decimal128Builder) AppendEmptyValue() { - b.Append(decimal128.Num{}) -} - -func (b *Decimal128Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Decimal128Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Decimal128Builder) AppendValues(v []decimal128.Num, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - if len(v) > 0 { - arrow.Decimal128Traits.Copy(b.rawData[b.length:], v) - } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Decimal128Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Decimal128Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Decimal128Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Decimal128Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Decimal128Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Decimal128Traits.BytesRequired(n)) - b.rawData = arrow.Decimal128Traits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a Decimal128 array from the memory buffers used by the builder and resets the Decimal128Builder -// so it can be used to build a new array. -func (b *Decimal128Builder) NewArray() arrow.Array { - return b.NewDecimal128Array() -} - -// NewDecimal128Array creates a Decimal128 array from the memory buffers used by the builder and resets the Decimal128Builder -// so it can be used to build a new array. -func (b *Decimal128Builder) NewDecimal128Array() (a *Decimal128) { - data := b.newData() - a = NewDecimal128Data(data) - data.Release() - return -} - -func (b *Decimal128Builder) newData() (data *Data) { - bytesRequired := arrow.Decimal128Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Decimal128Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - val, err := decimal128.FromString(s, b.dtype.Precision, b.dtype.Scale) - if err != nil { - b.AppendNull() - return err - } - b.Append(val) - return nil -} - -func (b *Decimal128Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case float64: - val, err := decimal128.FromFloat64(v, b.dtype.Precision, b.dtype.Scale) - if err != nil { - return err - } - b.Append(val) - case string: - val, err := decimal128.FromString(v, b.dtype.Precision, b.dtype.Scale) - if err != nil { - return err - } - b.Append(val) - case json.Number: - val, err := decimal128.FromString(v.String(), b.dtype.Precision, b.dtype.Scale) - if err != nil { - return err - } - b.Append(val) - case nil: - b.AppendNull() - return nil - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(decimal128.Num{}), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Decimal128Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON will add the unmarshalled values to this builder. -// -// If the values are strings, they will get parsed with big.ParseFloat using -// a rounding mode of big.ToNearestAway currently. -func (b *Decimal128Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("decimal128 builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*Decimal128)(nil) - _ Builder = (*Decimal128Builder)(nil) -) diff --git a/go/arrow/array/decimal128_test.go b/go/arrow/array/decimal128_test.go deleted file mode 100644 index 707a4f1a6c8d5..0000000000000 --- a/go/arrow/array/decimal128_test.go +++ /dev/null @@ -1,283 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestNewDecimal128Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDecimal128Builder(mem, &arrow.Decimal128Type{Precision: 10, Scale: 1}) - defer ab.Release() - - ab.Retain() - ab.Release() - - want := []decimal128.Num{ - decimal128.New(1, 1), - decimal128.New(2, 2), - decimal128.New(3, 3), - {}, - decimal128.FromI64(-5), - decimal128.FromI64(-6), - {}, - decimal128.FromI64(8), - decimal128.FromI64(9), - decimal128.FromI64(10), - } - valids := []bool{true, true, true, false, true, true, false, true, true, true} - - for i, valid := range valids { - switch { - case valid: - ab.Append(want[i]) - default: - ab.AppendNull() - } - } - - // check state of builder before NewDecimal128Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewArray().(*array.Decimal128) - a.Retain() - a.Release() - - // check state of builder after NewDecimal128Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDecimal128Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDecimal128Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDecimal128Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - - assert.Equal(t, want, a.Values(), "unexpected Decimal128Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") - assert.Len(t, a.Values(), 10, "unexpected length of Decimal128Values") - assert.Equal(t, 10*arrow.Decimal128SizeBytes, a.Data().Buffers()[1].Len()) - - a.Release() - ab.Append(decimal128.FromI64(7)) - ab.Append(decimal128.FromI64(8)) - - a = ab.NewDecimal128Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []decimal128.Num{decimal128.FromI64(7), decimal128.FromI64(8)}, a.Values()) - assert.Len(t, a.Values(), 2) - assert.Equal(t, 2*arrow.Decimal128SizeBytes, a.Data().Buffers()[1].Len()) - - a.Release() -} - -func TestDecimal128Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDecimal128Builder(mem, &arrow.Decimal128Type{Precision: 10, Scale: 1}) - defer ab.Release() - - want := []decimal128.Num{decimal128.FromI64(3), decimal128.FromI64(4)} - - ab.AppendValues([]decimal128.Num{}, nil) - a := ab.NewDecimal128Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewDecimal128Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(want, nil) - a = ab.NewDecimal128Array() - assert.Equal(t, want, a.Values()) - a.Release() - - ab.AppendValues([]decimal128.Num{}, nil) - ab.AppendValues(want, nil) - a = ab.NewDecimal128Array() - assert.Equal(t, want, a.Values()) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]decimal128.Num{}, nil) - a = ab.NewDecimal128Array() - assert.Equal(t, want, a.Values()) - a.Release() -} - -func TestDecimal128Slice(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Decimal128Type{Precision: 10, Scale: 1} - b := array.NewDecimal128Builder(mem, dtype) - defer b.Release() - - var data = []decimal128.Num{ - decimal128.FromI64(-1), - decimal128.FromI64(+0), - decimal128.FromI64(+1), - decimal128.New(-4, 4), - } - b.AppendValues(data[:2], nil) - b.AppendNull() - b.Append(data[3]) - - arr := b.NewDecimal128Array() - defer arr.Release() - - if got, want := arr.Len(), len(data); got != want { - t.Fatalf("invalid array length: got=%d, want=%d", got, want) - } - - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Decimal128) - if !ok { - t.Fatalf("could not type-assert to array.String") - } - - if got, want := v.String(), `[(null) {4 -4}]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - assert.Equal(t, array.NullValueStr, v.ValueStr(0)) - assert.Equal(t, "-7.378697629e+18", v.ValueStr(1)) - - if got, want := v.NullN(), 1; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if got, want := v.Data().Offset(), 2; got != want { - t.Fatalf("invalid offset: got=%d, want=%d", got, want) - } -} - -func TestDecimal128StringRoundTrip(t *testing.T) { - dt := &arrow.Decimal128Type{Precision: 20, Scale: 5} - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewDecimal128Builder(mem, dt) - defer b.Release() - - values := []decimal128.Num{ - decimal128.New(1, 1), - decimal128.New(1, 2), - decimal128.New(1, 3), - {}, - decimal128.FromI64(-5), - decimal128.FromI64(-6), - {}, - decimal128.FromI64(8), - decimal128.FromI64(9), - decimal128.FromI64(10), - } - val1, err := decimal128.FromString("0.99", dt.Precision, dt.Scale) - if err != nil { - t.Fatal(err) - } - val2, err := decimal128.FromString("1234567890.12345", dt.Precision, dt.Scale) - if err != nil { - t.Fatal(err) - } - values = append(values, val1, val2) - - valid := []bool{true, true, true, false, true, true, false, true, true, true, true, true} - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.Decimal128) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDecimal128Builder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Decimal128) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestDecimal128GetOneForMarshal(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Decimal128Type{Precision: 38, Scale: 20} - - b := array.NewDecimal128Builder(mem, dtype) - defer b.Release() - - cases := []struct { - give any - want any - }{ - {"1", "1"}, - {"1.25", "1.25"}, - {"0.99", "0.99"}, - {"1234567890.123456789", "1234567890.123456789"}, - {nil, nil}, - {"-0.99", "-0.99"}, - {"-1234567890.123456789", "-1234567890.123456789"}, - {"0.0000000000000000001", "1e-19"}, - } - for _, v := range cases { - if v.give == nil { - b.AppendNull() - continue - } - - dt, err := decimal128.FromString(v.give.(string), dtype.Precision, dtype.Scale) - if err != nil { - t.Fatal(err) - } - b.Append(dt) - } - - arr := b.NewDecimal128Array() - defer arr.Release() - - if got, want := arr.Len(), len(cases); got != want { - t.Fatalf("invalid array length: got=%d, want=%d", got, want) - } - - for i := range cases { - assert.Equalf(t, cases[i].want, arr.GetOneForMarshal(i), "unexpected value at index %d", i) - } -} diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go deleted file mode 100644 index 6431306f969c3..0000000000000 --- a/go/arrow/array/decimal256.go +++ /dev/null @@ -1,368 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "math/big" - "reflect" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// Decimal256 is a type that represents an immutable sequence of 256-bit decimal values. -type Decimal256 struct { - array - - values []decimal256.Num -} - -func NewDecimal256Data(data arrow.ArrayData) *Decimal256 { - a := &Decimal256{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *Decimal256) Value(i int) decimal256.Num { return a.values[i] } - -func (a *Decimal256) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.GetOneForMarshal(i).(string) -} - -func (a *Decimal256) Values() []decimal256.Num { return a.values } - -func (a *Decimal256) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Decimal256) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Decimal256Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Decimal256) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - typ := a.DataType().(*arrow.Decimal256Type) - n := a.Value(i) - scale := typ.Scale - f := (&big.Float{}).SetInt(n.BigInt()) - if scale < 0 { - f.SetPrec(256).Mul(f, (&big.Float{}).SetInt(decimal256.GetScaleMultiplier(int(-scale)).BigInt())) - } else { - f.SetPrec(256).Quo(f, (&big.Float{}).SetInt(decimal256.GetScaleMultiplier(int(scale)).BigInt())) - } - return f.Text('g', int(typ.Precision)) -} - -func (a *Decimal256) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - return json.Marshal(vals) -} - -func arrayEqualDecimal256(left, right *Decimal256) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type Decimal256Builder struct { - builder - - dtype *arrow.Decimal256Type - data *memory.Buffer - rawData []decimal256.Num -} - -func NewDecimal256Builder(mem memory.Allocator, dtype *arrow.Decimal256Type) *Decimal256Builder { - return &Decimal256Builder{ - builder: builder{refCount: 1, mem: mem}, - dtype: dtype, - } -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Decimal256Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Decimal256Builder) Append(v decimal256.Num) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Decimal256Builder) UnsafeAppend(v decimal256.Num) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Decimal256Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Decimal256Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Decimal256Builder) AppendEmptyValue() { - b.Append(decimal256.Num{}) -} - -func (b *Decimal256Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Decimal256Builder) Type() arrow.DataType { return b.dtype } - -func (b *Decimal256Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Decimal256Builder) AppendValues(v []decimal256.Num, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("arrow/array: len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - if len(v) > 0 { - arrow.Decimal256Traits.Copy(b.rawData[b.length:], v) - } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Decimal256Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Decimal256Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Decimal256Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Decimal256Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Decimal256Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Decimal256Traits.BytesRequired(n)) - b.rawData = arrow.Decimal256Traits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a Decimal256 array from the memory buffers used by the builder and resets the Decimal256Builder -// so it can be used to build a new array. -func (b *Decimal256Builder) NewArray() arrow.Array { - return b.NewDecimal256Array() -} - -// NewDecimal256Array creates a Decimal256 array from the memory buffers used by the builder and resets the Decimal256Builder -// so it can be used to build a new array. -func (b *Decimal256Builder) NewDecimal256Array() (a *Decimal256) { - data := b.newData() - a = NewDecimal256Data(data) - data.Release() - return -} - -func (b *Decimal256Builder) newData() (data *Data) { - bytesRequired := arrow.Decimal256Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Decimal256Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - val, err := decimal256.FromString(s, b.dtype.Precision, b.dtype.Scale) - if err != nil { - b.AppendNull() - return err - } - b.Append(val) - return nil -} - -func (b *Decimal256Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case float64: - val, err := decimal256.FromFloat64(v, b.dtype.Precision, b.dtype.Scale) - if err != nil { - return err - } - b.Append(val) - case string: - out, err := decimal256.FromString(v, b.dtype.Precision, b.dtype.Scale) - if err != nil { - return err - } - b.Append(out) - case json.Number: - out, err := decimal256.FromString(v.String(), b.dtype.Precision, b.dtype.Scale) - if err != nil { - return err - } - b.Append(out) - case nil: - b.AppendNull() - return nil - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(decimal256.Num{}), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Decimal256Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON will add the unmarshalled values to this builder. -// -// If the values are strings, they will get parsed with big.ParseFloat using -// a rounding mode of big.ToNearestAway currently. -func (b *Decimal256Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("arrow/array: decimal256 builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*Decimal256)(nil) - _ Builder = (*Decimal256Builder)(nil) -) diff --git a/go/arrow/array/decimal256_test.go b/go/arrow/array/decimal256_test.go deleted file mode 100644 index 8adb810165430..0000000000000 --- a/go/arrow/array/decimal256_test.go +++ /dev/null @@ -1,293 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestNewDecimal256Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 10, Scale: 1}) - defer ab.Release() - - ab.Retain() - ab.Release() - - want := []decimal256.Num{ - decimal256.New(1, 1, 1, 1), - decimal256.New(2, 2, 2, 2), - decimal256.New(3, 3, 3, 3), - {}, - decimal256.FromI64(-5), - decimal256.FromI64(-6), - {}, - decimal256.FromI64(8), - decimal256.FromI64(9), - decimal256.FromI64(10), - } - valids := []bool{true, true, true, false, true, true, false, true, true, true} - - for i, valid := range valids { - switch { - case valid: - ab.Append(want[i]) - default: - ab.AppendNull() - } - } - - // check state of builder before NewDecimal256Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewArray().(*array.Decimal256) - a.Retain() - a.Release() - - // check state of builder after NewDecimal256Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDecimal256Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDecimal256Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDecimal256Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - - assert.Equal(t, want, a.Values(), "unexpected Decimal256Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") - assert.Len(t, a.Values(), 10, "unexpected length of Decimal256Values") - assert.Equal(t, 10*arrow.Decimal256SizeBytes, a.Data().Buffers()[1].Len()) - - a.Release() - ab.Append(decimal256.FromI64(7)) - ab.Append(decimal256.FromI64(8)) - - a = ab.NewDecimal256Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, 4, a.Data().Buffers()[0].Len(), "should be 4 bytes due to minBuilderCapacity") - assert.Equal(t, []decimal256.Num{decimal256.FromI64(7), decimal256.FromI64(8)}, a.Values()) - assert.Len(t, a.Values(), 2) - assert.Equal(t, 2*arrow.Decimal256SizeBytes, a.Data().Buffers()[1].Len()) - - a.Release() -} - -func TestDecimal256Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDecimal256Builder(mem, &arrow.Decimal256Type{Precision: 10, Scale: 1}) - defer ab.Release() - - want := []decimal256.Num{decimal256.FromI64(3), decimal256.FromI64(4)} - - ab.AppendValues([]decimal256.Num{}, nil) - a := ab.NewDecimal256Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewDecimal256Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(want, nil) - a = ab.NewDecimal256Array() - assert.Equal(t, want, a.Values()) - a.Release() - - ab.AppendValues([]decimal256.Num{}, nil) - ab.AppendValues(want, nil) - a = ab.NewDecimal256Array() - assert.Equal(t, want, a.Values()) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]decimal256.Num{}, nil) - a = ab.NewDecimal256Array() - assert.Equal(t, want, a.Values()) - a.Release() -} - -func TestDecimal256Slice(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Decimal256Type{Precision: 10, Scale: 1} - b := array.NewDecimal256Builder(mem, dtype) - defer b.Release() - - var data = []decimal256.Num{ - decimal256.FromI64(-1), - decimal256.FromI64(+0), - decimal256.FromI64(+1), - decimal256.New(4, 4, 4, 4), - } - b.AppendValues(data[:2], nil) - b.AppendNull() - b.Append(data[3]) - - arr := b.NewDecimal256Array() - defer arr.Release() - - if got, want := arr.Len(), len(data); got != want { - t.Fatalf("invalid array length: got=%d, want=%d", got, want) - } - - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Decimal256) - if !ok { - t.Fatalf("could not type-assert to array.String") - } - - if got, want := v.String(), `[(null) {[4 4 4 4]}]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - assert.Equal(t, array.NullValueStr, v.ValueStr(0)) - assert.Equal(t, "2.510840694e+57", v.ValueStr(1)) - - if got, want := v.NullN(), 1; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if got, want := v.Data().Offset(), 2; got != want { - t.Fatalf("invalid offset: got=%d, want=%d", got, want) - } -} - -func TestDecimal256StringRoundTrip(t *testing.T) { - dt := &arrow.Decimal256Type{Precision: 70, Scale: 10} - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewDecimal256Builder(mem, dt) - defer b.Release() - - values := []decimal256.Num{ - decimal256.New(1, 1, 1, 1), - decimal256.New(2, 2, 2, 2), - decimal256.New(3, 3, 3, 3), - {}, - decimal256.FromI64(-5), - decimal256.FromI64(-6), - {}, - decimal256.FromI64(8), - decimal256.FromI64(9), - decimal256.FromI64(10), - } - val1, err := decimal256.FromString("0.99", dt.Precision, dt.Scale) - if err != nil { - t.Fatal(err) - } - val2, err := decimal256.FromString("1234567890.123456789", dt.Precision, dt.Scale) - if err != nil { - t.Fatal(err) - } - values = append(values, val1, val2) - - valid := []bool{true, true, true, false, true, true, false, true, true, true, true, true} - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.Decimal256) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDecimal256Builder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - v := arr.ValueStr(i) - assert.NoError(t, b1.AppendValueFromString(v)) - } - - arr1 := b1.NewArray().(*array.Decimal256) - defer arr1.Release() - - for i := 0; i < arr.Len(); i++ { - if arr.IsNull(i) && arr1.IsNull(i) { - continue - } - if arr.Value(i) != arr1.Value(i) { - t.Fatalf("unexpected value at index %d: got=%v, want=%v", i, arr1.Value(i), arr.Value(i)) - } - } - assert.True(t, array.Equal(arr, arr1)) -} - -func TestDecimal256GetOneForMarshal(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Decimal256Type{Precision: 38, Scale: 20} - - b := array.NewDecimal256Builder(mem, dtype) - defer b.Release() - - cases := []struct { - give any - want any - }{ - {"1", "1"}, - {"1.25", "1.25"}, - {"0.99", "0.99"}, - {"1234567890.123456789", "1234567890.123456789"}, - {nil, nil}, - {"-0.99", "-0.99"}, - {"-1234567890.123456789", "-1234567890.123456789"}, - {"0.0000000000000000001", "1e-19"}, - } - for _, v := range cases { - if v.give == nil { - b.AppendNull() - continue - } - - dt, err := decimal256.FromString(v.give.(string), dtype.Precision, dtype.Scale) - if err != nil { - t.Fatal(err) - } - b.Append(dt) - } - - arr := b.NewDecimal256Array() - defer arr.Release() - - if got, want := arr.Len(), len(cases); got != want { - t.Fatalf("invalid array length: got=%d, want=%d", got, want) - } - - for i := range cases { - assert.Equalf(t, cases[i].want, arr.GetOneForMarshal(i), "unexpected value at index %d", i) - } -} diff --git a/go/arrow/array/decimal_test.go b/go/arrow/array/decimal_test.go deleted file mode 100644 index b321bd7fbbe7b..0000000000000 --- a/go/arrow/array/decimal_test.go +++ /dev/null @@ -1,222 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "math/big" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/suite" -) - -type decimalValue interface{} - -func bitmapFromSlice(vals []bool) []byte { - out := make([]byte, int(bitutil.BytesForBits(int64(len(vals))))) - writer := bitutil.NewBitmapWriter(out, 0, len(vals)) - for _, val := range vals { - if val { - writer.Set() - } else { - writer.Clear() - } - writer.Next() - } - writer.Finish() - return out -} - -type DecimalTestSuite struct { - suite.Suite - - dt arrow.DataType - mem *memory.CheckedAllocator -} - -func (d *DecimalTestSuite) SetupTest() { - d.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) -} - -func (d *DecimalTestSuite) TearDownTest() { - d.mem.AssertSize(d.T(), 0) -} - -func (d *DecimalTestSuite) makeData(input []decimalValue, out []byte) { - switch d.dt.ID() { - case arrow.DECIMAL128: - for _, v := range input { - arrow.Decimal128Traits.PutValue(out, v.(decimal128.Num)) - out = out[arrow.Decimal128SizeBytes:] - } - case arrow.DECIMAL256: - for _, v := range input { - arrow.Decimal256Traits.PutValue(out, v.(decimal256.Num)) - out = out[arrow.Decimal256SizeBytes:] - } - } -} - -func (d *DecimalTestSuite) testCreate(bitWidth int, prec int32, draw []decimalValue, valids []bool, offset int64) arrow.Array { - switch bitWidth { - case 128: - d.dt = &arrow.Decimal128Type{Precision: prec, Scale: 4} - case 256: - d.dt = &arrow.Decimal256Type{Precision: prec, Scale: 4} - } - - bldr := array.NewBuilder(d.mem, d.dt) - defer bldr.Release() - bldr.Reserve(len(draw)) - - nullCount := 0 - for i, b := range valids { - if b { - switch v := draw[i].(type) { - case decimal128.Num: - bldr.(*array.Decimal128Builder).Append(v) - case decimal256.Num: - bldr.(*array.Decimal256Builder).Append(v) - } - } else { - bldr.AppendNull() - nullCount++ - } - } - - arr := bldr.NewArray() - d.EqualValues(0, bldr.Len()) - - rawBytes := make([]byte, len(draw)*(d.dt.(arrow.FixedWidthDataType).BitWidth()/8)) - d.makeData(draw, rawBytes) - - expectedData := memory.NewBufferBytes(rawBytes) - expectedNullBitmap := bitmapFromSlice(valids) - expectedNullCount := len(draw) - bitutil.CountSetBits(expectedNullBitmap, 0, len(valids)) - - expected := array.NewData(d.dt, len(valids), []*memory.Buffer{memory.NewBufferBytes(expectedNullBitmap), expectedData}, nil, expectedNullCount, 0) - defer expected.Release() - - expectedArr := array.MakeFromData(expected) - defer expectedArr.Release() - - lhs := array.NewSlice(arr, offset, int64(arr.Len())-offset) - rhs := array.NewSlice(expectedArr, offset, int64(expectedArr.Len())-offset) - defer func() { - lhs.Release() - rhs.Release() - }() - - d.Truef(array.Equal(lhs, rhs), "expected: %s, got: %s\n", rhs, lhs) - return arr -} - -type Decimal128TestSuite struct { - DecimalTestSuite -} - -func (d *Decimal128TestSuite) runTest(f func(prec int32)) { - for prec := int32(1); prec <= 38; prec++ { - d.Run(fmt.Sprintf("prec=%d", prec), func() { f(prec) }) - } -} - -func (d *Decimal128TestSuite) TestNoNulls() { - d.runTest(func(prec int32) { - draw := []decimalValue{decimal128.FromU64(1), decimal128.FromI64(-2), - decimal128.FromU64(2389), decimal128.FromU64(4), - decimal128.FromI64(-12348)} - valids := []bool{true, true, true, true, true} - arr := d.testCreate(128, prec, draw, valids, 0) - arr.Release() - arr = d.testCreate(128, prec, draw, valids, 2) - arr.Release() - }) -} - -func (d *Decimal128TestSuite) TestWithNulls() { - d.runTest(func(prec int32) { - draw := []decimalValue{decimal128.FromU64(1), decimal128.FromU64(2), - decimal128.FromI64(-1), decimal128.FromI64(4), decimal128.FromI64(-1), - decimal128.FromI64(1), decimal128.FromI64(2)} - bigVal, _ := (&big.Int{}).SetString("230342903942234234", 10) - draw = append(draw, decimal128.FromBigInt(bigVal)) - - bigNeg, _ := (&big.Int{}).SetString("-23049302932235234", 10) - draw = append(draw, decimal128.FromBigInt(bigNeg)) - - valids := []bool{true, true, false, true, false, true, true, true, true} - arr := d.testCreate(128, prec, draw, valids, 0) - arr.Release() - arr = d.testCreate(128, prec, draw, valids, 2) - arr.Release() - }) -} - -type Decimal256TestSuite struct { - DecimalTestSuite -} - -func (d *Decimal256TestSuite) runTest(f func(prec int32)) { - for _, prec := range []int32{1, 2, 5, 10, 38, 39, 40, 75, 76} { - d.Run(fmt.Sprintf("prec=%d", prec), func() { f(prec) }) - } -} - -func (d *Decimal256TestSuite) TestNoNulls() { - d.runTest(func(prec int32) { - draw := []decimalValue{decimal256.FromU64(1), decimal256.FromI64(-2), - decimal256.FromU64(2389), decimal256.FromU64(4), - decimal256.FromI64(-12348)} - valids := []bool{true, true, true, true, true} - arr := d.testCreate(256, prec, draw, valids, 0) - arr.Release() - arr = d.testCreate(256, prec, draw, valids, 2) - arr.Release() - }) -} - -func (d *Decimal256TestSuite) TestWithNulls() { - d.runTest(func(prec int32) { - draw := []decimalValue{decimal256.FromU64(1), decimal256.FromU64(2), - decimal256.FromI64(-1), decimal256.FromI64(4), decimal256.FromI64(-1), - decimal256.FromI64(1), decimal256.FromI64(2)} - - // (pow(2, 255) - 1) - bigVal, _ := (&big.Int{}).SetString("57896044618658097711785492504343953926634992332820282019728792003956564819967", 10) - draw = append(draw, decimal256.FromBigInt(bigVal)) - - draw = append(draw, decimal256.FromBigInt(bigVal.Neg(bigVal))) - - valids := []bool{true, true, false, true, false, true, true, true, true} - arr := d.testCreate(256, prec, draw, valids, 0) - arr.Release() - arr = d.testCreate(256, prec, draw, valids, 2) - arr.Release() - }) -} - -func TestDecimal(t *testing.T) { - suite.Run(t, new(Decimal128TestSuite)) - suite.Run(t, new(Decimal256TestSuite)) -} diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go deleted file mode 100644 index ca7fed5257085..0000000000000 --- a/go/arrow/array/dictionary.go +++ /dev/null @@ -1,1958 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "errors" - "fmt" - "math" - "math/bits" - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/hashing" - "github.com/apache/arrow/go/v18/internal/json" - "github.com/apache/arrow/go/v18/internal/utils" -) - -// Dictionary represents the type for dictionary-encoded data with a data -// dependent dictionary. -// -// A dictionary array contains an array of non-negative integers (the "dictionary" -// indices") along with a data type containing a "dictionary" corresponding to -// the distinct values represented in the data. -// -// For example, the array: -// -// ["foo", "bar", "foo", "bar", "foo", "bar"] -// -// with dictionary ["bar", "foo"], would have the representation of: -// -// indices: [1, 0, 1, 0, 1, 0] -// dictionary: ["bar", "foo"] -// -// The indices in principle may be any integer type. -type Dictionary struct { - array - - indices arrow.Array - dict arrow.Array -} - -// NewDictionaryArray constructs a dictionary array with the provided indices -// and dictionary using the given type. -func NewDictionaryArray(typ arrow.DataType, indices, dict arrow.Array) *Dictionary { - a := &Dictionary{} - a.array.refCount = 1 - dictdata := NewData(typ, indices.Len(), indices.Data().Buffers(), indices.Data().Children(), indices.NullN(), indices.Data().Offset()) - dictdata.dictionary = dict.Data().(*Data) - dict.Data().Retain() - - defer dictdata.Release() - a.setData(dictdata) - return a -} - -// checkIndexBounds returns an error if any value in the provided integer -// arraydata is >= the passed upperlimit or < 0. otherwise nil -func checkIndexBounds(indices *Data, upperlimit uint64) error { - if indices.length == 0 { - return nil - } - - var maxval uint64 - switch indices.dtype.ID() { - case arrow.UINT8: - maxval = math.MaxUint8 - case arrow.UINT16: - maxval = math.MaxUint16 - case arrow.UINT32: - maxval = math.MaxUint32 - case arrow.UINT64: - maxval = math.MaxUint64 - } - // for unsigned integers, if the values array is larger than the maximum - // index value (especially for UINT8/UINT16), then there's no need to - // boundscheck. for signed integers we still need to bounds check - // because a value could be < 0. - isSigned := maxval == 0 - if !isSigned && upperlimit > maxval { - return nil - } - - start := indices.offset - end := indices.offset + indices.length - - // TODO(ARROW-15950): lift BitSetRunReader from parquet to utils - // and use it here for performance improvement. - - switch indices.dtype.ID() { - case arrow.INT8: - data := arrow.Int8Traits.CastFromBytes(indices.buffers[1].Bytes()) - min, max := utils.GetMinMaxInt8(data[start:end]) - if min < 0 || max >= int8(upperlimit) { - return fmt.Errorf("contains out of bounds index: min: %d, max: %d", min, max) - } - case arrow.UINT8: - data := arrow.Uint8Traits.CastFromBytes(indices.buffers[1].Bytes()) - _, max := utils.GetMinMaxUint8(data[start:end]) - if max >= uint8(upperlimit) { - return fmt.Errorf("contains out of bounds index: max: %d", max) - } - case arrow.INT16: - data := arrow.Int16Traits.CastFromBytes(indices.buffers[1].Bytes()) - min, max := utils.GetMinMaxInt16(data[start:end]) - if min < 0 || max >= int16(upperlimit) { - return fmt.Errorf("contains out of bounds index: min: %d, max: %d", min, max) - } - case arrow.UINT16: - data := arrow.Uint16Traits.CastFromBytes(indices.buffers[1].Bytes()) - _, max := utils.GetMinMaxUint16(data[start:end]) - if max >= uint16(upperlimit) { - return fmt.Errorf("contains out of bounds index: max: %d", max) - } - case arrow.INT32: - data := arrow.Int32Traits.CastFromBytes(indices.buffers[1].Bytes()) - min, max := utils.GetMinMaxInt32(data[start:end]) - if min < 0 || max >= int32(upperlimit) { - return fmt.Errorf("contains out of bounds index: min: %d, max: %d", min, max) - } - case arrow.UINT32: - data := arrow.Uint32Traits.CastFromBytes(indices.buffers[1].Bytes()) - _, max := utils.GetMinMaxUint32(data[start:end]) - if max >= uint32(upperlimit) { - return fmt.Errorf("contains out of bounds index: max: %d", max) - } - case arrow.INT64: - data := arrow.Int64Traits.CastFromBytes(indices.buffers[1].Bytes()) - min, max := utils.GetMinMaxInt64(data[start:end]) - if min < 0 || max >= int64(upperlimit) { - return fmt.Errorf("contains out of bounds index: min: %d, max: %d", min, max) - } - case arrow.UINT64: - data := arrow.Uint64Traits.CastFromBytes(indices.buffers[1].Bytes()) - _, max := utils.GetMinMaxUint64(data[indices.offset : indices.offset+indices.length]) - if max >= upperlimit { - return fmt.Errorf("contains out of bounds value: max: %d", max) - } - default: - return fmt.Errorf("invalid type for bounds checking: %T", indices.dtype) - } - - return nil -} - -// NewValidatedDictionaryArray constructs a dictionary array from the provided indices -// and dictionary arrays, while also performing validation checks to ensure correctness -// such as bounds checking at are usually skipped for performance. -func NewValidatedDictionaryArray(typ *arrow.DictionaryType, indices, dict arrow.Array) (*Dictionary, error) { - if indices.DataType().ID() != typ.IndexType.ID() { - return nil, fmt.Errorf("dictionary type index (%T) does not match indices array type (%T)", typ.IndexType, indices.DataType()) - } - - if !arrow.TypeEqual(typ.ValueType, dict.DataType()) { - return nil, fmt.Errorf("dictionary value type (%T) does not match dict array type (%T)", typ.ValueType, dict.DataType()) - } - - if err := checkIndexBounds(indices.Data().(*Data), uint64(dict.Len())); err != nil { - return nil, err - } - - return NewDictionaryArray(typ, indices, dict), nil -} - -// NewDictionaryData creates a strongly typed Dictionary array from -// an ArrayData object with a datatype of arrow.Dictionary and a dictionary -func NewDictionaryData(data arrow.ArrayData) *Dictionary { - a := &Dictionary{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (d *Dictionary) Retain() { - atomic.AddInt64(&d.refCount, 1) -} - -func (d *Dictionary) Release() { - debug.Assert(atomic.LoadInt64(&d.refCount) > 0, "too many releases") - - if atomic.AddInt64(&d.refCount, -1) == 0 { - d.data.Release() - d.data, d.nullBitmapBytes = nil, nil - d.indices.Release() - d.indices = nil - if d.dict != nil { - d.dict.Release() - d.dict = nil - } - } -} - -func (d *Dictionary) setData(data *Data) { - d.array.setData(data) - - dictType := data.dtype.(*arrow.DictionaryType) - if data.dictionary == nil { - if data.length > 0 { - panic("arrow/array: no dictionary set in Data for Dictionary array") - } - } else { - debug.Assert(arrow.TypeEqual(dictType.ValueType, data.dictionary.DataType()), "mismatched dictionary value types") - } - - indexData := NewData(dictType.IndexType, data.length, data.buffers, data.childData, data.nulls, data.offset) - defer indexData.Release() - d.indices = MakeFromData(indexData) -} - -// Dictionary returns the values array that makes up the dictionary for this -// array. -func (d *Dictionary) Dictionary() arrow.Array { - if d.dict == nil { - d.dict = MakeFromData(d.data.dictionary) - } - return d.dict -} - -// Indices returns the underlying array of indices as it's own array -func (d *Dictionary) Indices() arrow.Array { - return d.indices -} - -// CanCompareIndices returns true if the dictionary arrays can be compared -// without having to unify the dictionaries themselves first. -// This means that the index types are equal too. -func (d *Dictionary) CanCompareIndices(other *Dictionary) bool { - if !arrow.TypeEqual(d.indices.DataType(), other.indices.DataType()) { - return false - } - - minlen := int64(min(d.data.dictionary.length, other.data.dictionary.length)) - return SliceEqual(d.Dictionary(), 0, minlen, other.Dictionary(), 0, minlen) -} - -func (d *Dictionary) ValueStr(i int) string { - if d.IsNull(i) { - return NullValueStr - } - return d.Dictionary().ValueStr(d.GetValueIndex(i)) -} - -func (d *Dictionary) String() string { - return fmt.Sprintf("{ dictionary: %v\n indices: %v }", d.Dictionary(), d.Indices()) -} - -// GetValueIndex returns the dictionary index for the value at index i of the array. -// The actual value can be retrieved by using d.Dictionary().(valuetype).Value(d.GetValueIndex(i)) -func (d *Dictionary) GetValueIndex(i int) int { - indiceData := d.data.buffers[1].Bytes() - // we know the value is non-negative per the spec, so - // we can use the unsigned value regardless. - switch d.indices.DataType().ID() { - case arrow.UINT8, arrow.INT8: - return int(uint8(indiceData[d.data.offset+i])) - case arrow.UINT16, arrow.INT16: - return int(arrow.Uint16Traits.CastFromBytes(indiceData)[d.data.offset+i]) - case arrow.UINT32, arrow.INT32: - idx := arrow.Uint32Traits.CastFromBytes(indiceData)[d.data.offset+i] - debug.Assert(bits.UintSize == 64 || idx <= math.MaxInt32, "arrow/dictionary: truncation of index value") - return int(idx) - case arrow.UINT64, arrow.INT64: - idx := arrow.Uint64Traits.CastFromBytes(indiceData)[d.data.offset+i] - debug.Assert((bits.UintSize == 32 && idx <= math.MaxInt32) || (bits.UintSize == 64 && idx <= math.MaxInt64), "arrow/dictionary: truncation of index value") - return int(idx) - } - debug.Assert(false, "unreachable dictionary index") - return -1 -} - -func (d *Dictionary) GetOneForMarshal(i int) interface{} { - if d.IsNull(i) { - return nil - } - vidx := d.GetValueIndex(i) - return d.Dictionary().GetOneForMarshal(vidx) -} - -func (d *Dictionary) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, d.Len()) - for i := 0; i < d.Len(); i++ { - vals[i] = d.GetOneForMarshal(i) - } - return json.Marshal(vals) -} - -func arrayEqualDict(l, r *Dictionary) bool { - return Equal(l.Dictionary(), r.Dictionary()) && Equal(l.indices, r.indices) -} - -func arrayApproxEqualDict(l, r *Dictionary, opt equalOption) bool { - return arrayApproxEqual(l.Dictionary(), r.Dictionary(), opt) && arrayApproxEqual(l.indices, r.indices, opt) -} - -// helper for building the properly typed indices of the dictionary builder -type IndexBuilder struct { - Builder - Append func(int) -} - -func createIndexBuilder(mem memory.Allocator, dt arrow.FixedWidthDataType) (ret IndexBuilder, err error) { - ret = IndexBuilder{Builder: NewBuilder(mem, dt)} - switch dt.ID() { - case arrow.INT8: - ret.Append = func(idx int) { - ret.Builder.(*Int8Builder).Append(int8(idx)) - } - case arrow.UINT8: - ret.Append = func(idx int) { - ret.Builder.(*Uint8Builder).Append(uint8(idx)) - } - case arrow.INT16: - ret.Append = func(idx int) { - ret.Builder.(*Int16Builder).Append(int16(idx)) - } - case arrow.UINT16: - ret.Append = func(idx int) { - ret.Builder.(*Uint16Builder).Append(uint16(idx)) - } - case arrow.INT32: - ret.Append = func(idx int) { - ret.Builder.(*Int32Builder).Append(int32(idx)) - } - case arrow.UINT32: - ret.Append = func(idx int) { - ret.Builder.(*Uint32Builder).Append(uint32(idx)) - } - case arrow.INT64: - ret.Append = func(idx int) { - ret.Builder.(*Int64Builder).Append(int64(idx)) - } - case arrow.UINT64: - ret.Append = func(idx int) { - ret.Builder.(*Uint64Builder).Append(uint64(idx)) - } - default: - debug.Assert(false, "dictionary index type must be integral") - err = fmt.Errorf("dictionary index type must be integral, not %s", dt) - } - - return -} - -// helper function to construct an appropriately typed memo table based on -// the value type for the dictionary -func createMemoTable(mem memory.Allocator, dt arrow.DataType) (ret hashing.MemoTable, err error) { - switch dt.ID() { - case arrow.INT8: - ret = hashing.NewInt8MemoTable(0) - case arrow.UINT8: - ret = hashing.NewUint8MemoTable(0) - case arrow.INT16: - ret = hashing.NewInt16MemoTable(0) - case arrow.UINT16: - ret = hashing.NewUint16MemoTable(0) - case arrow.INT32: - ret = hashing.NewInt32MemoTable(0) - case arrow.UINT32: - ret = hashing.NewUint32MemoTable(0) - case arrow.INT64: - ret = hashing.NewInt64MemoTable(0) - case arrow.UINT64: - ret = hashing.NewUint64MemoTable(0) - case arrow.DURATION, arrow.TIMESTAMP, arrow.DATE64, arrow.TIME64: - ret = hashing.NewInt64MemoTable(0) - case arrow.TIME32, arrow.DATE32, arrow.INTERVAL_MONTHS: - ret = hashing.NewInt32MemoTable(0) - case arrow.FLOAT16: - ret = hashing.NewUint16MemoTable(0) - case arrow.FLOAT32: - ret = hashing.NewFloat32MemoTable(0) - case arrow.FLOAT64: - ret = hashing.NewFloat64MemoTable(0) - case arrow.BINARY, arrow.FIXED_SIZE_BINARY, arrow.DECIMAL128, arrow.DECIMAL256, arrow.INTERVAL_DAY_TIME, arrow.INTERVAL_MONTH_DAY_NANO: - ret = hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)) - case arrow.STRING: - ret = hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(mem, arrow.BinaryTypes.String)) - case arrow.NULL: - default: - err = fmt.Errorf("unimplemented dictionary value type, %s", dt) - } - - return -} - -type DictionaryBuilder interface { - Builder - - NewDictionaryArray() *Dictionary - NewDelta() (indices, delta arrow.Array, err error) - AppendArray(arrow.Array) error - AppendIndices([]int, []bool) - ResetFull() - DictionarySize() int -} - -type dictionaryBuilder struct { - builder - - dt *arrow.DictionaryType - deltaOffset int - memoTable hashing.MemoTable - idxBuilder IndexBuilder -} - -// NewDictionaryBuilderWithDict initializes a dictionary builder and inserts the values from `init` as the first -// values in the dictionary, but does not insert them as values into the array. -func NewDictionaryBuilderWithDict(mem memory.Allocator, dt *arrow.DictionaryType, init arrow.Array) DictionaryBuilder { - if init != nil && !arrow.TypeEqual(dt.ValueType, init.DataType()) { - panic(fmt.Errorf("arrow/array: cannot initialize dictionary type %T with array of type %T", dt.ValueType, init.DataType())) - } - - idxbldr, err := createIndexBuilder(mem, dt.IndexType.(arrow.FixedWidthDataType)) - if err != nil { - panic(fmt.Errorf("arrow/array: unsupported builder for index type of %T", dt)) - } - - memo, err := createMemoTable(mem, dt.ValueType) - if err != nil { - panic(fmt.Errorf("arrow/array: unsupported builder for value type of %T", dt)) - } - - bldr := dictionaryBuilder{ - builder: builder{refCount: 1, mem: mem}, - idxBuilder: idxbldr, - memoTable: memo, - dt: dt, - } - - switch dt.ValueType.ID() { - case arrow.NULL: - ret := &NullDictionaryBuilder{bldr} - debug.Assert(init == nil, "arrow/array: doesn't make sense to init a null dictionary") - return ret - case arrow.UINT8: - ret := &Uint8DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Uint8)); err != nil { - panic(err) - } - } - return ret - case arrow.INT8: - ret := &Int8DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Int8)); err != nil { - panic(err) - } - } - return ret - case arrow.UINT16: - ret := &Uint16DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Uint16)); err != nil { - panic(err) - } - } - return ret - case arrow.INT16: - ret := &Int16DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Int16)); err != nil { - panic(err) - } - } - return ret - case arrow.UINT32: - ret := &Uint32DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Uint32)); err != nil { - panic(err) - } - } - return ret - case arrow.INT32: - ret := &Int32DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Int32)); err != nil { - panic(err) - } - } - return ret - case arrow.UINT64: - ret := &Uint64DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Uint64)); err != nil { - panic(err) - } - } - return ret - case arrow.INT64: - ret := &Int64DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Int64)); err != nil { - panic(err) - } - } - return ret - case arrow.FLOAT16: - ret := &Float16DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Float16)); err != nil { - panic(err) - } - } - return ret - case arrow.FLOAT32: - ret := &Float32DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Float32)); err != nil { - panic(err) - } - } - return ret - case arrow.FLOAT64: - ret := &Float64DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Float64)); err != nil { - panic(err) - } - } - return ret - case arrow.STRING: - ret := &BinaryDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertStringDictValues(init.(*String)); err != nil { - panic(err) - } - } - return ret - case arrow.BINARY: - ret := &BinaryDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Binary)); err != nil { - panic(err) - } - } - return ret - case arrow.FIXED_SIZE_BINARY: - ret := &FixedSizeBinaryDictionaryBuilder{ - bldr, dt.ValueType.(*arrow.FixedSizeBinaryType).ByteWidth, - } - if init != nil { - if err = ret.InsertDictValues(init.(*FixedSizeBinary)); err != nil { - panic(err) - } - } - return ret - case arrow.DATE32: - ret := &Date32DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Date32)); err != nil { - panic(err) - } - } - return ret - case arrow.DATE64: - ret := &Date64DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Date64)); err != nil { - panic(err) - } - } - return ret - case arrow.TIMESTAMP: - ret := &TimestampDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Timestamp)); err != nil { - panic(err) - } - } - return ret - case arrow.TIME32: - ret := &Time32DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Time32)); err != nil { - panic(err) - } - } - return ret - case arrow.TIME64: - ret := &Time64DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Time64)); err != nil { - panic(err) - } - } - return ret - case arrow.INTERVAL_MONTHS: - ret := &MonthIntervalDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*MonthInterval)); err != nil { - panic(err) - } - } - return ret - case arrow.INTERVAL_DAY_TIME: - ret := &DayTimeDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*DayTimeInterval)); err != nil { - panic(err) - } - } - return ret - case arrow.DECIMAL128: - ret := &Decimal128DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Decimal128)); err != nil { - panic(err) - } - } - return ret - case arrow.DECIMAL256: - ret := &Decimal256DictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Decimal256)); err != nil { - panic(err) - } - } - return ret - case arrow.LIST: - case arrow.STRUCT: - case arrow.SPARSE_UNION: - case arrow.DENSE_UNION: - case arrow.DICTIONARY: - case arrow.MAP: - case arrow.EXTENSION: - case arrow.FIXED_SIZE_LIST: - case arrow.DURATION: - ret := &DurationDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*Duration)); err != nil { - panic(err) - } - } - return ret - case arrow.LARGE_STRING: - case arrow.LARGE_BINARY: - case arrow.LARGE_LIST: - case arrow.INTERVAL_MONTH_DAY_NANO: - ret := &MonthDayNanoDictionaryBuilder{bldr} - if init != nil { - if err = ret.InsertDictValues(init.(*MonthDayNanoInterval)); err != nil { - panic(err) - } - } - return ret - } - - panic("arrow/array: unimplemented dictionary key type") -} - -func NewDictionaryBuilder(mem memory.Allocator, dt *arrow.DictionaryType) DictionaryBuilder { - return NewDictionaryBuilderWithDict(mem, dt, nil) -} - -func (b *dictionaryBuilder) Type() arrow.DataType { return b.dt } - -func (b *dictionaryBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - b.idxBuilder.Release() - b.idxBuilder.Builder = nil - if binmemo, ok := b.memoTable.(*hashing.BinaryMemoTable); ok { - binmemo.Release() - } - b.memoTable = nil - } -} - -func (b *dictionaryBuilder) AppendNull() { - b.length += 1 - b.nulls += 1 - b.idxBuilder.AppendNull() -} - -func (b *dictionaryBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *dictionaryBuilder) AppendEmptyValue() { - b.length += 1 - b.idxBuilder.AppendEmptyValue() -} - -func (b *dictionaryBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *dictionaryBuilder) Reserve(n int) { - b.idxBuilder.Reserve(n) -} - -func (b *dictionaryBuilder) Resize(n int) { - b.idxBuilder.Resize(n) - b.length = b.idxBuilder.Len() -} - -func (b *dictionaryBuilder) ResetFull() { - b.builder.reset() - b.idxBuilder.NewArray().Release() - b.memoTable.Reset() -} - -func (b *dictionaryBuilder) Cap() int { return b.idxBuilder.Cap() } - -func (b *dictionaryBuilder) IsNull(i int) bool { return b.idxBuilder.IsNull(i) } - -func (b *dictionaryBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("dictionary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -func (b *dictionaryBuilder) Unmarshal(dec *json.Decoder) error { - bldr := NewBuilder(b.mem, b.dt.ValueType) - defer bldr.Release() - - if err := bldr.Unmarshal(dec); err != nil { - return err - } - - arr := bldr.NewArray() - defer arr.Release() - return b.AppendArray(arr) -} - -func (b *dictionaryBuilder) AppendValueFromString(s string) error { - bldr := NewBuilder(b.mem, b.dt.ValueType) - defer bldr.Release() - - if err := bldr.AppendValueFromString(s); err != nil { - return err - } - - arr := bldr.NewArray() - defer arr.Release() - return b.AppendArray(arr) -} - -func (b *dictionaryBuilder) UnmarshalOne(dec *json.Decoder) error { - bldr := NewBuilder(b.mem, b.dt.ValueType) - defer bldr.Release() - - if err := bldr.UnmarshalOne(dec); err != nil { - return err - } - - arr := bldr.NewArray() - defer arr.Release() - return b.AppendArray(arr) -} - -func (b *dictionaryBuilder) NewArray() arrow.Array { - return b.NewDictionaryArray() -} - -func (b *dictionaryBuilder) newData() *Data { - indices, dict, err := b.newWithDictOffset(0) - if err != nil { - panic(err) - } - - indices.dtype = b.dt - indices.dictionary = dict - return indices -} - -func (b *dictionaryBuilder) NewDictionaryArray() *Dictionary { - a := &Dictionary{} - a.refCount = 1 - - indices := b.newData() - a.setData(indices) - indices.Release() - return a -} - -func (b *dictionaryBuilder) newWithDictOffset(offset int) (indices, dict *Data, err error) { - idxarr := b.idxBuilder.NewArray() - defer idxarr.Release() - - indices = idxarr.Data().(*Data) - - b.deltaOffset = b.memoTable.Size() - dict, err = GetDictArrayData(b.mem, b.dt.ValueType, b.memoTable, offset) - b.reset() - indices.Retain() - return -} - -// NewDelta returns the dictionary indices and a delta dictionary since the -// last time NewArray or NewDictionaryArray were called, and resets the state -// of the builder (except for the dictionary / memotable) -func (b *dictionaryBuilder) NewDelta() (indices, delta arrow.Array, err error) { - indicesData, deltaData, err := b.newWithDictOffset(b.deltaOffset) - if err != nil { - return nil, nil, err - } - - defer indicesData.Release() - defer deltaData.Release() - indices, delta = MakeFromData(indicesData), MakeFromData(deltaData) - return -} - -func (b *dictionaryBuilder) insertDictValue(val interface{}) error { - _, _, err := b.memoTable.GetOrInsert(val) - return err -} - -func (b *dictionaryBuilder) insertDictBytes(val []byte) error { - _, _, err := b.memoTable.GetOrInsertBytes(val) - return err -} - -func (b *dictionaryBuilder) appendValue(val interface{}) error { - idx, _, err := b.memoTable.GetOrInsert(val) - b.idxBuilder.Append(idx) - b.length += 1 - return err -} - -func (b *dictionaryBuilder) appendBytes(val []byte) error { - idx, _, err := b.memoTable.GetOrInsertBytes(val) - b.idxBuilder.Append(idx) - b.length += 1 - return err -} - -func getvalFn(arr arrow.Array) func(i int) interface{} { - switch typedarr := arr.(type) { - case *Int8: - return func(i int) interface{} { return typedarr.Value(i) } - case *Uint8: - return func(i int) interface{} { return typedarr.Value(i) } - case *Int16: - return func(i int) interface{} { return typedarr.Value(i) } - case *Uint16: - return func(i int) interface{} { return typedarr.Value(i) } - case *Int32: - return func(i int) interface{} { return typedarr.Value(i) } - case *Uint32: - return func(i int) interface{} { return typedarr.Value(i) } - case *Int64: - return func(i int) interface{} { return typedarr.Value(i) } - case *Uint64: - return func(i int) interface{} { return typedarr.Value(i) } - case *Float16: - return func(i int) interface{} { return typedarr.Value(i).Uint16() } - case *Float32: - return func(i int) interface{} { return typedarr.Value(i) } - case *Float64: - return func(i int) interface{} { return typedarr.Value(i) } - case *Duration: - return func(i int) interface{} { return int64(typedarr.Value(i)) } - case *Timestamp: - return func(i int) interface{} { return int64(typedarr.Value(i)) } - case *Date64: - return func(i int) interface{} { return int64(typedarr.Value(i)) } - case *Time64: - return func(i int) interface{} { return int64(typedarr.Value(i)) } - case *Time32: - return func(i int) interface{} { return int32(typedarr.Value(i)) } - case *Date32: - return func(i int) interface{} { return int32(typedarr.Value(i)) } - case *MonthInterval: - return func(i int) interface{} { return int32(typedarr.Value(i)) } - case *Binary: - return func(i int) interface{} { return typedarr.Value(i) } - case *FixedSizeBinary: - return func(i int) interface{} { return typedarr.Value(i) } - case *String: - return func(i int) interface{} { return typedarr.Value(i) } - case *Decimal128: - return func(i int) interface{} { - val := typedarr.Value(i) - return (*(*[arrow.Decimal128SizeBytes]byte)(unsafe.Pointer(&val)))[:] - } - case *Decimal256: - return func(i int) interface{} { - val := typedarr.Value(i) - return (*(*[arrow.Decimal256SizeBytes]byte)(unsafe.Pointer(&val)))[:] - } - case *DayTimeInterval: - return func(i int) interface{} { - val := typedarr.Value(i) - return (*(*[arrow.DayTimeIntervalSizeBytes]byte)(unsafe.Pointer(&val)))[:] - } - case *MonthDayNanoInterval: - return func(i int) interface{} { - val := typedarr.Value(i) - return (*(*[arrow.MonthDayNanoIntervalSizeBytes]byte)(unsafe.Pointer(&val)))[:] - } - } - - panic("arrow/array: invalid dictionary value type") -} - -func (b *dictionaryBuilder) AppendArray(arr arrow.Array) error { - debug.Assert(arrow.TypeEqual(b.dt.ValueType, arr.DataType()), "wrong value type of array to append to dict") - - valfn := getvalFn(arr) - for i := 0; i < arr.Len(); i++ { - if arr.IsNull(i) { - b.AppendNull() - } else { - if err := b.appendValue(valfn(i)); err != nil { - return err - } - } - } - return nil -} - -func (b *dictionaryBuilder) IndexBuilder() IndexBuilder { - return b.idxBuilder -} - -func (b *dictionaryBuilder) AppendIndices(indices []int, valid []bool) { - b.length += len(indices) - switch idxbldr := b.idxBuilder.Builder.(type) { - case *Int8Builder: - vals := make([]int8, len(indices)) - for i, v := range indices { - vals[i] = int8(v) - } - idxbldr.AppendValues(vals, valid) - case *Int16Builder: - vals := make([]int16, len(indices)) - for i, v := range indices { - vals[i] = int16(v) - } - idxbldr.AppendValues(vals, valid) - case *Int32Builder: - vals := make([]int32, len(indices)) - for i, v := range indices { - vals[i] = int32(v) - } - idxbldr.AppendValues(vals, valid) - case *Int64Builder: - vals := make([]int64, len(indices)) - for i, v := range indices { - vals[i] = int64(v) - } - idxbldr.AppendValues(vals, valid) - case *Uint8Builder: - vals := make([]uint8, len(indices)) - for i, v := range indices { - vals[i] = uint8(v) - } - idxbldr.AppendValues(vals, valid) - case *Uint16Builder: - vals := make([]uint16, len(indices)) - for i, v := range indices { - vals[i] = uint16(v) - } - idxbldr.AppendValues(vals, valid) - case *Uint32Builder: - vals := make([]uint32, len(indices)) - for i, v := range indices { - vals[i] = uint32(v) - } - idxbldr.AppendValues(vals, valid) - case *Uint64Builder: - vals := make([]uint64, len(indices)) - for i, v := range indices { - vals[i] = uint64(v) - } - idxbldr.AppendValues(vals, valid) - } -} - -func (b *dictionaryBuilder) DictionarySize() int { - return b.memoTable.Size() -} - -type NullDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *NullDictionaryBuilder) NewArray() arrow.Array { - return b.NewDictionaryArray() -} - -func (b *NullDictionaryBuilder) NewDictionaryArray() *Dictionary { - idxarr := b.idxBuilder.NewArray() - defer idxarr.Release() - - out := idxarr.Data().(*Data) - dictarr := NewNull(0) - defer dictarr.Release() - - dictarr.data.Retain() - out.dtype = b.dt - out.dictionary = dictarr.data - - return NewDictionaryData(out) -} - -func (b *NullDictionaryBuilder) AppendArray(arr arrow.Array) error { - if arr.DataType().ID() != arrow.NULL { - return fmt.Errorf("cannot append non-null array to null dictionary") - } - - for i := 0; i < arr.(*Null).Len(); i++ { - b.AppendNull() - } - return nil -} - -type Int8DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Int8DictionaryBuilder) Append(v int8) error { return b.appendValue(v) } -func (b *Int8DictionaryBuilder) InsertDictValues(arr *Int8) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Uint8DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Uint8DictionaryBuilder) Append(v uint8) error { return b.appendValue(v) } -func (b *Uint8DictionaryBuilder) InsertDictValues(arr *Uint8) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Int16DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Int16DictionaryBuilder) Append(v int16) error { return b.appendValue(v) } -func (b *Int16DictionaryBuilder) InsertDictValues(arr *Int16) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Uint16DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Uint16DictionaryBuilder) Append(v uint16) error { return b.appendValue(v) } -func (b *Uint16DictionaryBuilder) InsertDictValues(arr *Uint16) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Int32DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Int32DictionaryBuilder) Append(v int32) error { return b.appendValue(v) } -func (b *Int32DictionaryBuilder) InsertDictValues(arr *Int32) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Uint32DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Uint32DictionaryBuilder) Append(v uint32) error { return b.appendValue(v) } -func (b *Uint32DictionaryBuilder) InsertDictValues(arr *Uint32) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Int64DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Int64DictionaryBuilder) Append(v int64) error { return b.appendValue(v) } -func (b *Int64DictionaryBuilder) InsertDictValues(arr *Int64) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Uint64DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Uint64DictionaryBuilder) Append(v uint64) error { return b.appendValue(v) } -func (b *Uint64DictionaryBuilder) InsertDictValues(arr *Uint64) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type DurationDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *DurationDictionaryBuilder) Append(v arrow.Duration) error { return b.appendValue(int64(v)) } -func (b *DurationDictionaryBuilder) InsertDictValues(arr *Duration) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int64(v)); err != nil { - break - } - } - return -} - -type TimestampDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *TimestampDictionaryBuilder) Append(v arrow.Timestamp) error { return b.appendValue(int64(v)) } -func (b *TimestampDictionaryBuilder) InsertDictValues(arr *Timestamp) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int64(v)); err != nil { - break - } - } - return -} - -type Time32DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Time32DictionaryBuilder) Append(v arrow.Time32) error { return b.appendValue(int32(v)) } -func (b *Time32DictionaryBuilder) InsertDictValues(arr *Time32) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int32(v)); err != nil { - break - } - } - return -} - -type Time64DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Time64DictionaryBuilder) Append(v arrow.Time64) error { return b.appendValue(int64(v)) } -func (b *Time64DictionaryBuilder) InsertDictValues(arr *Time64) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int64(v)); err != nil { - break - } - } - return -} - -type Date32DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Date32DictionaryBuilder) Append(v arrow.Date32) error { return b.appendValue(int32(v)) } -func (b *Date32DictionaryBuilder) InsertDictValues(arr *Date32) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int32(v)); err != nil { - break - } - } - return -} - -type Date64DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Date64DictionaryBuilder) Append(v arrow.Date64) error { return b.appendValue(int64(v)) } -func (b *Date64DictionaryBuilder) InsertDictValues(arr *Date64) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int64(v)); err != nil { - break - } - } - return -} - -type MonthIntervalDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *MonthIntervalDictionaryBuilder) Append(v arrow.MonthInterval) error { - return b.appendValue(int32(v)) -} -func (b *MonthIntervalDictionaryBuilder) InsertDictValues(arr *MonthInterval) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(int32(v)); err != nil { - break - } - } - return -} - -type Float16DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Float16DictionaryBuilder) Append(v float16.Num) error { return b.appendValue(v.Uint16()) } -func (b *Float16DictionaryBuilder) InsertDictValues(arr *Float16) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v.Uint16()); err != nil { - break - } - } - return -} - -type Float32DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Float32DictionaryBuilder) Append(v float32) error { return b.appendValue(v) } -func (b *Float32DictionaryBuilder) InsertDictValues(arr *Float32) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type Float64DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Float64DictionaryBuilder) Append(v float64) error { return b.appendValue(v) } -func (b *Float64DictionaryBuilder) InsertDictValues(arr *Float64) (err error) { - for _, v := range arr.values { - if err = b.insertDictValue(v); err != nil { - break - } - } - return -} - -type BinaryDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *BinaryDictionaryBuilder) Append(v []byte) error { - if v == nil { - b.AppendNull() - return nil - } - - return b.appendBytes(v) -} - -func (b *BinaryDictionaryBuilder) AppendString(v string) error { return b.appendBytes([]byte(v)) } -func (b *BinaryDictionaryBuilder) InsertDictValues(arr *Binary) (err error) { - if !arrow.TypeEqual(arr.DataType(), b.dt.ValueType) { - return fmt.Errorf("dictionary insert type mismatch: cannot insert values of type %T to dictionary type %T", arr.DataType(), b.dt.ValueType) - } - - for i := 0; i < arr.Len(); i++ { - if err = b.insertDictBytes(arr.Value(i)); err != nil { - break - } - } - return -} -func (b *BinaryDictionaryBuilder) InsertStringDictValues(arr *String) (err error) { - if !arrow.TypeEqual(arr.DataType(), b.dt.ValueType) { - return fmt.Errorf("dictionary insert type mismatch: cannot insert values of type %T to dictionary type %T", arr.DataType(), b.dt.ValueType) - } - - for i := 0; i < arr.Len(); i++ { - if err = b.insertDictValue(arr.Value(i)); err != nil { - break - } - } - return -} - -func (b *BinaryDictionaryBuilder) GetValueIndex(i int) int { - switch b := b.idxBuilder.Builder.(type) { - case *Uint8Builder: - return int(b.Value(i)) - case *Int8Builder: - return int(b.Value(i)) - case *Uint16Builder: - return int(b.Value(i)) - case *Int16Builder: - return int(b.Value(i)) - case *Uint32Builder: - return int(b.Value(i)) - case *Int32Builder: - return int(b.Value(i)) - case *Uint64Builder: - return int(b.Value(i)) - case *Int64Builder: - return int(b.Value(i)) - default: - return -1 - } -} - -func (b *BinaryDictionaryBuilder) Value(i int) []byte { - switch mt := b.memoTable.(type) { - case *hashing.BinaryMemoTable: - return mt.Value(i) - } - return nil -} - -func (b *BinaryDictionaryBuilder) ValueStr(i int) string { - return string(b.Value(i)) -} - -type FixedSizeBinaryDictionaryBuilder struct { - dictionaryBuilder - byteWidth int -} - -func (b *FixedSizeBinaryDictionaryBuilder) Append(v []byte) error { - return b.appendValue(v[:b.byteWidth]) -} -func (b *FixedSizeBinaryDictionaryBuilder) InsertDictValues(arr *FixedSizeBinary) (err error) { - var ( - beg = arr.array.data.offset * b.byteWidth - end = (arr.array.data.offset + arr.data.length) * b.byteWidth - ) - data := arr.valueBytes[beg:end] - for len(data) > 0 { - if err = b.insertDictValue(data[:b.byteWidth]); err != nil { - break - } - data = data[b.byteWidth:] - } - return -} - -type Decimal128DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Decimal128DictionaryBuilder) Append(v decimal128.Num) error { - return b.appendValue((*(*[arrow.Decimal128SizeBytes]byte)(unsafe.Pointer(&v)))[:]) -} -func (b *Decimal128DictionaryBuilder) InsertDictValues(arr *Decimal128) (err error) { - data := arrow.Decimal128Traits.CastToBytes(arr.values) - for len(data) > 0 { - if err = b.insertDictValue(data[:arrow.Decimal128SizeBytes]); err != nil { - break - } - data = data[arrow.Decimal128SizeBytes:] - } - return -} - -type Decimal256DictionaryBuilder struct { - dictionaryBuilder -} - -func (b *Decimal256DictionaryBuilder) Append(v decimal256.Num) error { - return b.appendValue((*(*[arrow.Decimal256SizeBytes]byte)(unsafe.Pointer(&v)))[:]) -} -func (b *Decimal256DictionaryBuilder) InsertDictValues(arr *Decimal256) (err error) { - data := arrow.Decimal256Traits.CastToBytes(arr.values) - for len(data) > 0 { - if err = b.insertDictValue(data[:arrow.Decimal256SizeBytes]); err != nil { - break - } - data = data[arrow.Decimal256SizeBytes:] - } - return -} - -type MonthDayNanoDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *MonthDayNanoDictionaryBuilder) Append(v arrow.MonthDayNanoInterval) error { - return b.appendValue((*(*[arrow.MonthDayNanoIntervalSizeBytes]byte)(unsafe.Pointer(&v)))[:]) -} -func (b *MonthDayNanoDictionaryBuilder) InsertDictValues(arr *MonthDayNanoInterval) (err error) { - data := arrow.MonthDayNanoIntervalTraits.CastToBytes(arr.values) - for len(data) > 0 { - if err = b.insertDictValue(data[:arrow.MonthDayNanoIntervalSizeBytes]); err != nil { - break - } - data = data[arrow.MonthDayNanoIntervalSizeBytes:] - } - return -} - -type DayTimeDictionaryBuilder struct { - dictionaryBuilder -} - -func (b *DayTimeDictionaryBuilder) Append(v arrow.DayTimeInterval) error { - return b.appendValue((*(*[arrow.DayTimeIntervalSizeBytes]byte)(unsafe.Pointer(&v)))[:]) -} -func (b *DayTimeDictionaryBuilder) InsertDictValues(arr *DayTimeInterval) (err error) { - data := arrow.DayTimeIntervalTraits.CastToBytes(arr.values) - for len(data) > 0 { - if err = b.insertDictValue(data[:arrow.DayTimeIntervalSizeBytes]); err != nil { - break - } - data = data[arrow.DayTimeIntervalSizeBytes:] - } - return -} - -func IsTrivialTransposition(transposeMap []int32) bool { - for i, t := range transposeMap { - if t != int32(i) { - return false - } - } - return true -} - -func TransposeDictIndices(mem memory.Allocator, data arrow.ArrayData, inType, outType arrow.DataType, dict arrow.ArrayData, transposeMap []int32) (arrow.ArrayData, error) { - // inType may be different from data->dtype if data is ExtensionType - if inType.ID() != arrow.DICTIONARY || outType.ID() != arrow.DICTIONARY { - return nil, errors.New("arrow/array: expected dictionary type") - } - - var ( - inDictType = inType.(*arrow.DictionaryType) - outDictType = outType.(*arrow.DictionaryType) - inIndexType = inDictType.IndexType - outIndexType = outDictType.IndexType.(arrow.FixedWidthDataType) - ) - - if inIndexType.ID() == outIndexType.ID() && IsTrivialTransposition(transposeMap) { - // index type and values will be identical, we can reuse the existing buffers - return NewDataWithDictionary(outType, data.Len(), []*memory.Buffer{data.Buffers()[0], data.Buffers()[1]}, - data.NullN(), data.Offset(), dict.(*Data)), nil - } - - // default path: compute the transposed indices as a new buffer - outBuf := memory.NewResizableBuffer(mem) - outBuf.Resize(data.Len() * int(bitutil.BytesForBits(int64(outIndexType.BitWidth())))) - defer outBuf.Release() - - // shift null buffer if original offset is non-zero - var nullBitmap *memory.Buffer - if data.Offset() != 0 && data.NullN() != 0 { - nullBitmap = memory.NewResizableBuffer(mem) - nullBitmap.Resize(int(bitutil.BytesForBits(int64(data.Len())))) - bitutil.CopyBitmap(data.Buffers()[0].Bytes(), data.Offset(), data.Len(), nullBitmap.Bytes(), 0) - defer nullBitmap.Release() - } else { - nullBitmap = data.Buffers()[0] - } - - outData := NewDataWithDictionary(outType, data.Len(), - []*memory.Buffer{nullBitmap, outBuf}, data.NullN(), 0, dict.(*Data)) - err := utils.TransposeIntsBuffers(inIndexType, outIndexType, - data.Buffers()[1].Bytes(), outBuf.Bytes(), data.Offset(), outData.offset, data.Len(), transposeMap) - return outData, err -} - -// DictionaryUnifier defines the interface used for unifying, and optionally producing -// transposition maps for, multiple dictionary arrays incrementally. -type DictionaryUnifier interface { - // Unify adds the provided array of dictionary values to be unified. - Unify(arrow.Array) error - // UnifyAndTranspose adds the provided array of dictionary values, - // just like Unify but returns an allocated buffer containing a mapping - // to transpose dictionary indices. - UnifyAndTranspose(dict arrow.Array) (transposed *memory.Buffer, err error) - // GetResult returns the dictionary type (choosing the smallest index type - // that can represent all the values) and the new unified dictionary. - // - // Calling GetResult clears the existing dictionary from the unifier so it - // can be reused by calling Unify/UnifyAndTranspose again with new arrays. - GetResult() (outType arrow.DataType, outDict arrow.Array, err error) - // GetResultWithIndexType is like GetResult, but allows specifying the type - // of the dictionary indexes rather than letting the unifier pick. If the - // passed in index type isn't large enough to represent all of the dictionary - // values, an error will be returned instead. The new unified dictionary - // is returned. - GetResultWithIndexType(indexType arrow.DataType) (arrow.Array, error) - // Release should be called to clean up any allocated scratch memo-table used - // for building the unified dictionary. - Release() -} - -type unifier struct { - mem memory.Allocator - valueType arrow.DataType - memoTable hashing.MemoTable -} - -// NewDictionaryUnifier constructs and returns a new dictionary unifier for dictionaries -// of valueType, using the provided allocator for allocating the unified dictionary -// and the memotable used for building it. -// -// This will only work for non-nested types currently. a nested valueType or dictionary type -// will result in an error. -func NewDictionaryUnifier(alloc memory.Allocator, valueType arrow.DataType) (DictionaryUnifier, error) { - memoTable, err := createMemoTable(alloc, valueType) - if err != nil { - return nil, err - } - return &unifier{ - mem: alloc, - valueType: valueType, - memoTable: memoTable, - }, nil -} - -func (u *unifier) Release() { - if bin, ok := u.memoTable.(*hashing.BinaryMemoTable); ok { - bin.Release() - } -} - -func (u *unifier) Unify(dict arrow.Array) (err error) { - if !arrow.TypeEqual(u.valueType, dict.DataType()) { - return fmt.Errorf("dictionary type different from unifier: %s, expected: %s", dict.DataType(), u.valueType) - } - - valFn := getvalFn(dict) - for i := 0; i < dict.Len(); i++ { - if dict.IsNull(i) { - u.memoTable.GetOrInsertNull() - continue - } - - if _, _, err = u.memoTable.GetOrInsert(valFn(i)); err != nil { - return err - } - } - return -} - -func (u *unifier) UnifyAndTranspose(dict arrow.Array) (transposed *memory.Buffer, err error) { - if !arrow.TypeEqual(u.valueType, dict.DataType()) { - return nil, fmt.Errorf("dictionary type different from unifier: %s, expected: %s", dict.DataType(), u.valueType) - } - - transposed = memory.NewResizableBuffer(u.mem) - transposed.Resize(arrow.Int32Traits.BytesRequired(dict.Len())) - - newIdxes := arrow.Int32Traits.CastFromBytes(transposed.Bytes()) - valFn := getvalFn(dict) - for i := 0; i < dict.Len(); i++ { - if dict.IsNull(i) { - idx, _ := u.memoTable.GetOrInsertNull() - newIdxes[i] = int32(idx) - continue - } - - idx, _, err := u.memoTable.GetOrInsert(valFn(i)) - if err != nil { - transposed.Release() - return nil, err - } - newIdxes[i] = int32(idx) - } - return -} - -func (u *unifier) GetResult() (outType arrow.DataType, outDict arrow.Array, err error) { - dictLen := u.memoTable.Size() - var indexType arrow.DataType - switch { - case dictLen <= math.MaxInt8: - indexType = arrow.PrimitiveTypes.Int8 - case dictLen <= math.MaxInt16: - indexType = arrow.PrimitiveTypes.Int16 - case dictLen <= math.MaxInt32: - indexType = arrow.PrimitiveTypes.Int32 - default: - indexType = arrow.PrimitiveTypes.Int64 - } - outType = &arrow.DictionaryType{IndexType: indexType, ValueType: u.valueType} - - dictData, err := GetDictArrayData(u.mem, u.valueType, u.memoTable, 0) - if err != nil { - return nil, nil, err - } - - u.memoTable.Reset() - - defer dictData.Release() - outDict = MakeFromData(dictData) - return -} - -func (u *unifier) GetResultWithIndexType(indexType arrow.DataType) (arrow.Array, error) { - dictLen := u.memoTable.Size() - var toobig bool - switch indexType.ID() { - case arrow.UINT8: - toobig = dictLen > math.MaxUint8 - case arrow.INT8: - toobig = dictLen > math.MaxInt8 - case arrow.UINT16: - toobig = dictLen > math.MaxUint16 - case arrow.INT16: - toobig = dictLen > math.MaxInt16 - case arrow.UINT32: - toobig = uint(dictLen) > math.MaxUint32 - case arrow.INT32: - toobig = dictLen > math.MaxInt32 - case arrow.UINT64: - toobig = uint64(dictLen) > uint64(math.MaxUint64) - case arrow.INT64: - default: - return nil, fmt.Errorf("arrow/array: invalid dictionary index type: %s, must be integral", indexType) - } - if toobig { - return nil, errors.New("arrow/array: cannot combine dictionaries. unified dictionary requires a larger index type") - } - - dictData, err := GetDictArrayData(u.mem, u.valueType, u.memoTable, 0) - if err != nil { - return nil, err - } - - u.memoTable.Reset() - - defer dictData.Release() - return MakeFromData(dictData), nil -} - -type binaryUnifier struct { - mem memory.Allocator - memoTable *hashing.BinaryMemoTable -} - -// NewBinaryDictionaryUnifier constructs and returns a new dictionary unifier for dictionaries -// of binary values, using the provided allocator for allocating the unified dictionary -// and the memotable used for building it. -func NewBinaryDictionaryUnifier(alloc memory.Allocator) DictionaryUnifier { - return &binaryUnifier{ - mem: alloc, - memoTable: hashing.NewBinaryMemoTable(0, 0, NewBinaryBuilder(alloc, arrow.BinaryTypes.Binary)), - } -} - -func (u *binaryUnifier) Release() { - u.memoTable.Release() -} - -func (u *binaryUnifier) Unify(dict arrow.Array) (err error) { - if !arrow.TypeEqual(arrow.BinaryTypes.Binary, dict.DataType()) { - return fmt.Errorf("dictionary type different from unifier: %s, expected: %s", dict.DataType(), arrow.BinaryTypes.Binary) - } - - typedDict := dict.(*Binary) - for i := 0; i < dict.Len(); i++ { - if dict.IsNull(i) { - u.memoTable.GetOrInsertNull() - continue - } - - if _, _, err = u.memoTable.GetOrInsertBytes(typedDict.Value(i)); err != nil { - return err - } - } - return -} - -func (u *binaryUnifier) UnifyAndTranspose(dict arrow.Array) (transposed *memory.Buffer, err error) { - if !arrow.TypeEqual(arrow.BinaryTypes.Binary, dict.DataType()) { - return nil, fmt.Errorf("dictionary type different from unifier: %s, expected: %s", dict.DataType(), arrow.BinaryTypes.Binary) - } - - transposed = memory.NewResizableBuffer(u.mem) - transposed.Resize(arrow.Int32Traits.BytesRequired(dict.Len())) - - newIdxes := arrow.Int32Traits.CastFromBytes(transposed.Bytes()) - typedDict := dict.(*Binary) - for i := 0; i < dict.Len(); i++ { - if dict.IsNull(i) { - idx, _ := u.memoTable.GetOrInsertNull() - newIdxes[i] = int32(idx) - continue - } - - idx, _, err := u.memoTable.GetOrInsertBytes(typedDict.Value(i)) - if err != nil { - transposed.Release() - return nil, err - } - newIdxes[i] = int32(idx) - } - return -} - -func (u *binaryUnifier) GetResult() (outType arrow.DataType, outDict arrow.Array, err error) { - dictLen := u.memoTable.Size() - var indexType arrow.DataType - switch { - case dictLen <= math.MaxInt8: - indexType = arrow.PrimitiveTypes.Int8 - case dictLen <= math.MaxInt16: - indexType = arrow.PrimitiveTypes.Int16 - case dictLen <= math.MaxInt32: - indexType = arrow.PrimitiveTypes.Int32 - default: - indexType = arrow.PrimitiveTypes.Int64 - } - outType = &arrow.DictionaryType{IndexType: indexType, ValueType: arrow.BinaryTypes.Binary} - - dictData, err := GetDictArrayData(u.mem, arrow.BinaryTypes.Binary, u.memoTable, 0) - if err != nil { - return nil, nil, err - } - - u.memoTable.Reset() - - defer dictData.Release() - outDict = MakeFromData(dictData) - return -} - -func (u *binaryUnifier) GetResultWithIndexType(indexType arrow.DataType) (arrow.Array, error) { - dictLen := u.memoTable.Size() - var toobig bool - switch indexType.ID() { - case arrow.UINT8: - toobig = dictLen > math.MaxUint8 - case arrow.INT8: - toobig = dictLen > math.MaxInt8 - case arrow.UINT16: - toobig = dictLen > math.MaxUint16 - case arrow.INT16: - toobig = dictLen > math.MaxInt16 - case arrow.UINT32: - toobig = uint(dictLen) > math.MaxUint32 - case arrow.INT32: - toobig = dictLen > math.MaxInt32 - case arrow.UINT64: - toobig = uint64(dictLen) > uint64(math.MaxUint64) - case arrow.INT64: - default: - return nil, fmt.Errorf("arrow/array: invalid dictionary index type: %s, must be integral", indexType) - } - if toobig { - return nil, errors.New("arrow/array: cannot combine dictionaries. unified dictionary requires a larger index type") - } - - dictData, err := GetDictArrayData(u.mem, arrow.BinaryTypes.Binary, u.memoTable, 0) - if err != nil { - return nil, err - } - - u.memoTable.Reset() - - defer dictData.Release() - return MakeFromData(dictData), nil -} - -func unifyRecursive(mem memory.Allocator, typ arrow.DataType, chunks []*Data) (changed bool, err error) { - debug.Assert(len(chunks) != 0, "must provide non-zero length chunk slice") - var extType arrow.DataType - - if typ.ID() == arrow.EXTENSION { - extType = typ - typ = typ.(arrow.ExtensionType).StorageType() - } - - if nestedTyp, ok := typ.(arrow.NestedType); ok { - children := make([]*Data, len(chunks)) - for i, f := range nestedTyp.Fields() { - for j, c := range chunks { - children[j] = c.childData[i].(*Data) - } - - childChanged, err := unifyRecursive(mem, f.Type, children) - if err != nil { - return false, err - } - if childChanged { - // only when unification actually occurs - for j := range chunks { - chunks[j].childData[i] = children[j] - } - changed = true - } - } - } - - if typ.ID() == arrow.DICTIONARY { - dictType := typ.(*arrow.DictionaryType) - var ( - uni DictionaryUnifier - newDict arrow.Array - ) - // unify any nested dictionaries first, but the unifier doesn't support - // nested dictionaries yet so this would fail. - uni, err = NewDictionaryUnifier(mem, dictType.ValueType) - if err != nil { - return changed, err - } - defer uni.Release() - transposeMaps := make([]*memory.Buffer, len(chunks)) - for i, c := range chunks { - debug.Assert(c.dictionary != nil, "missing dictionary data for dictionary array") - arr := MakeFromData(c.dictionary) - defer arr.Release() - if transposeMaps[i], err = uni.UnifyAndTranspose(arr); err != nil { - return - } - defer transposeMaps[i].Release() - } - - if newDict, err = uni.GetResultWithIndexType(dictType.IndexType); err != nil { - return - } - defer newDict.Release() - - for j := range chunks { - chnk, err := TransposeDictIndices(mem, chunks[j], typ, typ, newDict.Data(), arrow.Int32Traits.CastFromBytes(transposeMaps[j].Bytes())) - if err != nil { - return changed, err - } - chunks[j].Release() - chunks[j] = chnk.(*Data) - if extType != nil { - chunks[j].dtype = extType - } - } - changed = true - } - - return -} - -// UnifyChunkedDicts takes a chunked array of dictionary type and will unify -// the dictionary across all of the chunks with the returned chunked array -// having all chunks share the same dictionary. -// -// The return from this *must* have Release called on it unless an error is returned -// in which case the *arrow.Chunked will be nil. -// -// If there is 1 or fewer chunks, then nothing is modified and this function will just -// call Retain on the passed in Chunked array (so Release can safely be called on it). -// The same is true if the type of the array is not a dictionary or if no changes are -// needed for all of the chunks to be using the same dictionary. -func UnifyChunkedDicts(alloc memory.Allocator, chnkd *arrow.Chunked) (*arrow.Chunked, error) { - if len(chnkd.Chunks()) <= 1 { - chnkd.Retain() - return chnkd, nil - } - - chunksData := make([]*Data, len(chnkd.Chunks())) - for i, c := range chnkd.Chunks() { - c.Data().Retain() - chunksData[i] = c.Data().(*Data) - } - changed, err := unifyRecursive(alloc, chnkd.DataType(), chunksData) - if err != nil || !changed { - for _, c := range chunksData { - c.Release() - } - if err == nil { - chnkd.Retain() - } else { - chnkd = nil - } - return chnkd, err - } - - chunks := make([]arrow.Array, len(chunksData)) - for i, c := range chunksData { - chunks[i] = MakeFromData(c) - defer chunks[i].Release() - c.Release() - } - - return arrow.NewChunked(chnkd.DataType(), chunks), nil -} - -// UnifyTableDicts performs UnifyChunkedDicts on each column of the table so that -// any dictionary column will have the dictionaries of its chunks unified. -// -// The returned Table should always be Release'd unless a non-nil error was returned, -// in which case the table returned will be nil. -func UnifyTableDicts(alloc memory.Allocator, table arrow.Table) (arrow.Table, error) { - cols := make([]arrow.Column, table.NumCols()) - for i := 0; i < int(table.NumCols()); i++ { - chnkd, err := UnifyChunkedDicts(alloc, table.Column(i).Data()) - if err != nil { - return nil, err - } - defer chnkd.Release() - cols[i] = *arrow.NewColumn(table.Schema().Field(i), chnkd) - defer cols[i].Release() - } - return NewTable(table.Schema(), cols, table.NumRows()), nil -} - -var ( - _ arrow.Array = (*Dictionary)(nil) - _ Builder = (*dictionaryBuilder)(nil) -) diff --git a/go/arrow/array/dictionary_test.go b/go/arrow/array/dictionary_test.go deleted file mode 100644 index ea9587d8dcdf9..0000000000000 --- a/go/arrow/array/dictionary_test.go +++ /dev/null @@ -1,1918 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "math" - "math/rand" - "reflect" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/types" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" -) - -type PrimitiveDictionaryTestSuite struct { - suite.Suite - - mem *memory.CheckedAllocator - typ arrow.DataType - reftyp reflect.Type -} - -func (p *PrimitiveDictionaryTestSuite) SetupTest() { - p.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) -} - -func (p *PrimitiveDictionaryTestSuite) TearDownTest() { - p.mem.AssertSize(p.T(), 0) -} - -func TestPrimitiveDictionaryBuilders(t *testing.T) { - tests := []struct { - name string - typ arrow.DataType - reftyp reflect.Type - }{ - {"int8", arrow.PrimitiveTypes.Int8, reflect.TypeOf(int8(0))}, - {"uint8", arrow.PrimitiveTypes.Uint8, reflect.TypeOf(uint8(0))}, - {"int16", arrow.PrimitiveTypes.Int16, reflect.TypeOf(int16(0))}, - {"uint16", arrow.PrimitiveTypes.Uint16, reflect.TypeOf(uint16(0))}, - {"int32", arrow.PrimitiveTypes.Int32, reflect.TypeOf(int32(0))}, - {"uint32", arrow.PrimitiveTypes.Uint32, reflect.TypeOf(uint32(0))}, - {"int64", arrow.PrimitiveTypes.Int64, reflect.TypeOf(int64(0))}, - {"uint64", arrow.PrimitiveTypes.Uint64, reflect.TypeOf(uint64(0))}, - {"float32", arrow.PrimitiveTypes.Float32, reflect.TypeOf(float32(0))}, - {"float64", arrow.PrimitiveTypes.Float64, reflect.TypeOf(float64(0))}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - suite.Run(t, &PrimitiveDictionaryTestSuite{typ: tt.typ, reftyp: tt.reftyp}) - }) - } -} - -func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderBasic() { - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} - bldr := array.NewDictionaryBuilder(p.mem, expectedType) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - bldr.AppendNull() - - p.EqualValues(4, bldr.Len()) - p.EqualValues(1, bldr.NullN()) - - p.EqualValues(2, bldr.DictionarySize()) - - arr := bldr.NewArray().(*array.Dictionary) - defer arr.Release() - - p.True(arrow.TypeEqual(expectedType, arr.DataType())) - expectedDict, _, err := array.FromJSON(p.mem, expectedType.ValueType, strings.NewReader("[1, 2]")) - p.NoError(err) - defer expectedDict.Release() - - expectedIndices, _, err := array.FromJSON(p.mem, expectedType.IndexType, strings.NewReader("[0, 1, 0, null]")) - p.NoError(err) - defer expectedIndices.Release() - - expected := array.NewDictionaryArray(expectedType, expectedIndices, expectedDict) - defer expected.Release() - - p.True(array.Equal(expected, arr)) -} - -func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderInit() { - valueType := p.typ - dictArr, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2]")) - p.NoError(err) - defer dictArr.Release() - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: valueType} - bldr := array.NewDictionaryBuilderWithDict(p.mem, dictType, dictArr) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - bldr.AppendNull() - - p.EqualValues(4, bldr.Len()) - p.EqualValues(1, bldr.NullN()) - - arr := bldr.NewDictionaryArray() - defer arr.Release() - - expectedIndices, _, err := array.FromJSON(p.mem, dictType.IndexType, strings.NewReader("[0, 1, 0, null]")) - p.NoError(err) - defer expectedIndices.Release() - - expected := array.NewDictionaryArray(dictType, expectedIndices, dictArr) - defer expected.Release() - - p.True(array.Equal(expected, arr)) -} - -func (p *PrimitiveDictionaryTestSuite) TestDictionaryNewBuilder() { - valueType := p.typ - dictArr, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2]")) - p.NoError(err) - defer dictArr.Release() - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: valueType} - bldr := array.NewBuilder(p.mem, dictType) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - bldr.AppendNull() - - p.EqualValues(4, bldr.Len()) - p.EqualValues(1, bldr.NullN()) - - arr := bldr.NewArray().(*array.Dictionary) - defer arr.Release() - - expectedIndices, _, err := array.FromJSON(p.mem, dictType.IndexType, strings.NewReader("[0, 1, 0, null]")) - p.NoError(err) - defer expectedIndices.Release() - - expected := array.NewDictionaryArray(dictType, expectedIndices, dictArr) - defer expected.Release() - - p.True(array.Equal(expected, arr)) -} - -func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderAppendArr() { - valueType := p.typ - intermediate, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2, 1]")) - p.NoError(err) - defer intermediate.Release() - - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} - bldr := array.NewDictionaryBuilder(p.mem, expectedType) - defer bldr.Release() - - bldr.AppendArray(intermediate) - result := bldr.NewArray() - defer result.Release() - - expectedDict, _, err := array.FromJSON(p.mem, expectedType.ValueType, strings.NewReader("[1, 2]")) - p.NoError(err) - defer expectedDict.Release() - - expectedIndices, _, err := array.FromJSON(p.mem, expectedType.IndexType, strings.NewReader("[0, 1, 0]")) - p.NoError(err) - defer expectedIndices.Release() - - expected := array.NewDictionaryArray(expectedType, expectedIndices, expectedDict) - defer expected.Release() - - p.True(array.Equal(expected, result)) -} - -func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderDeltaDictionary() { - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} - bldr := array.NewDictionaryBuilder(p.mem, expectedType) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - - result := bldr.NewArray() - defer result.Release() - - exdict, _, err := array.FromJSON(p.mem, p.typ, strings.NewReader("[1, 2]")) - p.NoError(err) - defer exdict.Release() - exindices, _, err := array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0, 1]")) - p.NoError(err) - defer exindices.Release() - expected := array.NewDictionaryArray(result.DataType().(*arrow.DictionaryType), exindices, exdict) - defer expected.Release() - p.True(array.Equal(expected, result)) - - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - - indices, delta, err := bldr.NewDelta() - p.NoError(err) - defer indices.Release() - defer delta.Release() - - exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[1, 2, 2, 0, 2]")) - defer exindices.Release() - exdelta, _, _ := array.FromJSON(p.mem, p.typ, strings.NewReader("[3]")) - defer exdelta.Release() - - p.True(array.Equal(exindices, indices)) - p.True(array.Equal(exdelta, delta)) -} - -func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderDoubleDeltaDictionary() { - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} - bldr := array.NewDictionaryBuilder(p.mem, expectedType) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - - result := bldr.NewArray() - defer result.Release() - - exdict, _, err := array.FromJSON(p.mem, p.typ, strings.NewReader("[1, 2]")) - p.NoError(err) - defer exdict.Release() - exindices, _, err := array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0, 1]")) - p.NoError(err) - defer exindices.Release() - expected := array.NewDictionaryArray(result.DataType().(*arrow.DictionaryType), exindices, exdict) - defer expected.Release() - p.True(array.Equal(expected, result)) - - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - - indices, delta, err := bldr.NewDelta() - p.NoError(err) - defer indices.Release() - defer delta.Release() - - exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[1, 2, 2, 0, 2]")) - defer exindices.Release() - exdelta, _, _ := array.FromJSON(p.mem, p.typ, strings.NewReader("[3]")) - defer exdelta.Release() - - p.True(array.Equal(exindices, indices)) - p.True(array.Equal(exdelta, delta)) - - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(4).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(5).Convert(p.reftyp)})[0].Interface()) - - indices, delta, err = bldr.NewDelta() - p.NoError(err) - defer indices.Release() - defer delta.Release() - - exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 2, 3, 4]")) - defer exindices.Release() - exdelta, _, _ = array.FromJSON(p.mem, p.typ, strings.NewReader("[4, 5]")) - defer exdelta.Release() - - p.True(array.Equal(exindices, indices)) - p.True(array.Equal(exdelta, delta)) -} - -func (p *PrimitiveDictionaryTestSuite) TestNewResetBehavior() { - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} - bldr := array.NewDictionaryBuilder(p.mem, expectedType) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - bldr.AppendNull() - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - - p.Less(0, bldr.Cap()) - p.Less(0, bldr.NullN()) - p.Equal(4, bldr.Len()) - - result := bldr.NewDictionaryArray() - defer result.Release() - - p.Zero(bldr.Cap()) - p.Zero(bldr.Len()) - p.Zero(bldr.NullN()) - - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - bldr.AppendNull() - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(4).Convert(p.reftyp)})[0].Interface()) - - result = bldr.NewDictionaryArray() - defer result.Release() - - p.Equal(4, result.Dictionary().Len()) -} - -func (p *PrimitiveDictionaryTestSuite) TestResetFull() { - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int32Type{}, ValueType: p.typ} - bldr := array.NewDictionaryBuilder(p.mem, expectedType) - defer bldr.Release() - - builder := reflect.ValueOf(bldr) - appfn := builder.MethodByName("Append") - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - bldr.AppendNull() - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - - result := bldr.NewDictionaryArray() - defer result.Release() - - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(3).Convert(p.reftyp)})[0].Interface()) - result = bldr.NewDictionaryArray() - defer result.Release() - - exindices, _, _ := array.FromJSON(p.mem, arrow.PrimitiveTypes.Int32, strings.NewReader("[2]")) - exdict, _, _ := array.FromJSON(p.mem, p.typ, strings.NewReader("[1, 2, 3]")) - defer exindices.Release() - defer exdict.Release() - - p.True(array.Equal(exindices, result.Indices())) - p.True(array.Equal(exdict, result.Dictionary())) - - bldr.ResetFull() - p.Nil(appfn.Call([]reflect.Value{reflect.ValueOf(4).Convert(p.reftyp)})[0].Interface()) - result = bldr.NewDictionaryArray() - defer result.Release() - - exindices, _, _ = array.FromJSON(p.mem, arrow.PrimitiveTypes.Int32, strings.NewReader("[0]")) - exdict, _, _ = array.FromJSON(p.mem, p.typ, strings.NewReader("[4]")) - defer exindices.Release() - defer exdict.Release() - - p.True(array.Equal(exindices, result.Indices())) - p.True(array.Equal(exdict, result.Dictionary())) -} - -func (p *PrimitiveDictionaryTestSuite) TestStringRoundTrip() { - dt := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} - b := array.NewDictionaryBuilder(p.mem, dt) - defer b.Release() - - builder := reflect.ValueOf(b) - fn := builder.MethodByName("Append") - p.Nil(fn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - p.Nil(fn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})[0].Interface()) - p.Nil(fn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})[0].Interface()) - b.AppendNull() - - p.EqualValues(4, b.Len()) - p.EqualValues(1, b.NullN()) - - arr := b.NewArray().(*array.Dictionary) - defer arr.Release() - p.True(arrow.TypeEqual(dt, arr.DataType())) - - b1 := array.NewDictionaryBuilder(p.mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - p.NoError(b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Dictionary) - defer arr1.Release() - - p.Equal(arr.Len(), arr1.Len()) - p.True(array.Equal(arr, arr1)) -} - -func TestBasicStringDictionaryBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.BinaryDictionaryBuilder) - assert.NoError(t, builder.Append([]byte("test"))) - assert.NoError(t, builder.AppendString("test2")) - assert.NoError(t, builder.AppendString("test")) - - assert.Equal(t, "test", builder.ValueStr(builder.GetValueIndex(0))) - assert.Equal(t, "test2", builder.ValueStr(builder.GetValueIndex(1))) - assert.Equal(t, "test", builder.ValueStr(builder.GetValueIndex(2))) - - result := bldr.NewDictionaryArray() - defer result.Release() - - exdict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test", "test2"]`)) - defer exdict.Release() - exint, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) - defer exint.Release() - - assert.True(t, arrow.TypeEqual(dictType, result.DataType())) - expected := array.NewDictionaryArray(dictType, exint, exdict) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) -} - -func TestStringDictionaryInsertValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - exdict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["c", "a", "b", "d"]`)) - defer exdict.Release() - - invalidDict, _, err := array.FromJSON(mem, arrow.BinaryTypes.Binary, strings.NewReader(`["ZQ==", "Zg=="]`)) - assert.NoError(t, err) - defer invalidDict.Release() - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int16Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.BinaryDictionaryBuilder) - assert.NoError(t, builder.InsertStringDictValues(exdict.(*array.String))) - // inserting again should have no effect - assert.NoError(t, builder.InsertStringDictValues(exdict.(*array.String))) - - assert.Error(t, builder.InsertDictValues(invalidDict.(*array.Binary))) - - for i := 0; i < 2; i++ { - builder.AppendString("c") - builder.AppendString("a") - builder.AppendString("b") - builder.AppendNull() - builder.AppendString("d") - } - - assert.Equal(t, 10, bldr.Len()) - - result := bldr.NewDictionaryArray() - defer result.Release() - - exindices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader("[0, 1, 2, null, 3, 0, 1, 2, null, 3]")) - defer exindices.Release() - expected := array.NewDictionaryArray(dictType, exindices, exdict) - defer expected.Release() - assert.True(t, array.Equal(expected, result)) -} - -func TestStringDictionaryBuilderInit(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictArr, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test", "test2"]`)) - defer dictArr.Release() - intarr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) - defer intarr.Release() - - dictType := &arrow.DictionaryType{IndexType: intarr.DataType().(arrow.FixedWidthDataType), ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilderWithDict(mem, dictType, dictArr) - defer bldr.Release() - - builder := bldr.(*array.BinaryDictionaryBuilder) - assert.NoError(t, builder.AppendString("test")) - assert.NoError(t, builder.AppendString("test2")) - assert.NoError(t, builder.AppendString("test")) - - result := bldr.NewDictionaryArray() - defer result.Release() - - expected := array.NewDictionaryArray(dictType, intarr, dictArr) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) -} - -func TestStringDictionaryBuilderOnlyNull(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - bldr.AppendNull() - result := bldr.NewDictionaryArray() - defer result.Release() - - dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader("[]")) - defer dict.Release() - intarr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[null]")) - defer intarr.Release() - - expected := array.NewDictionaryArray(dictType, intarr, dict) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) -} - -func TestStringDictionaryBuilderDelta(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.BinaryDictionaryBuilder) - assert.NoError(t, builder.AppendString("test")) - assert.NoError(t, builder.AppendString("test2")) - assert.NoError(t, builder.AppendString("test")) - - result := bldr.NewDictionaryArray() - defer result.Release() - - exdict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test", "test2"]`)) - defer exdict.Release() - exint, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) - defer exint.Release() - - assert.True(t, arrow.TypeEqual(dictType, result.DataType())) - expected := array.NewDictionaryArray(dictType, exint, exdict) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) - - assert.NoError(t, builder.AppendString("test2")) - assert.NoError(t, builder.AppendString("test3")) - assert.NoError(t, builder.AppendString("test2")) - - indices, delta, err := builder.NewDelta() - assert.NoError(t, err) - defer indices.Release() - defer delta.Release() - - exdelta, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["test3"]`)) - defer exdelta.Release() - exint, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[1, 2, 1]")) - defer exint.Release() - - assert.True(t, array.Equal(exdelta, delta)) - assert.True(t, array.Equal(exint, indices)) -} - -func TestStringDictionaryBuilderBigDelta(t *testing.T) { - const testlen = 2048 - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int16Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - builder := bldr.(*array.BinaryDictionaryBuilder) - - strbldr := array.NewStringBuilder(mem) - defer strbldr.Release() - - intbldr := array.NewInt16Builder(mem) - defer intbldr.Release() - - for idx := int16(0); idx < testlen; idx++ { - var b strings.Builder - b.WriteString("test") - fmt.Fprint(&b, idx) - - val := b.String() - assert.NoError(t, builder.AppendString(val)) - strbldr.Append(val) - intbldr.Append(idx) - } - - result := bldr.NewDictionaryArray() - defer result.Release() - strarr := strbldr.NewStringArray() - defer strarr.Release() - intarr := intbldr.NewInt16Array() - defer intarr.Release() - - expected := array.NewDictionaryArray(dictType, intarr, strarr) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) - - strbldr2 := array.NewStringBuilder(mem) - defer strbldr2.Release() - intbldr2 := array.NewInt16Builder(mem) - defer intbldr2.Release() - - for idx := int16(0); idx < testlen; idx++ { - builder.AppendString("test1") - intbldr2.Append(1) - } - for idx := int16(0); idx < testlen; idx++ { - builder.AppendString("test_new_value1") - intbldr2.Append(testlen) - } - strbldr2.Append("test_new_value1") - - indices2, delta2, err := bldr.NewDelta() - assert.NoError(t, err) - defer indices2.Release() - defer delta2.Release() - strarr2 := strbldr2.NewStringArray() - defer strarr2.Release() - intarr2 := intbldr2.NewInt16Array() - defer intarr2.Release() - - assert.True(t, array.Equal(intarr2, indices2)) - assert.True(t, array.Equal(strarr2, delta2)) - - strbldr3 := array.NewStringBuilder(mem) - defer strbldr3.Release() - intbldr3 := array.NewInt16Builder(mem) - defer intbldr3.Release() - - for idx := int16(0); idx < testlen; idx++ { - assert.NoError(t, builder.AppendString("test2")) - intbldr3.Append(2) - } - for idx := int16(0); idx < testlen; idx++ { - assert.NoError(t, builder.AppendString("test_new_value2")) - intbldr3.Append(testlen + 1) - } - strbldr3.Append("test_new_value2") - - indices3, delta3, err := bldr.NewDelta() - assert.NoError(t, err) - defer indices3.Release() - defer delta3.Release() - strarr3 := strbldr3.NewStringArray() - defer strarr3.Release() - intarr3 := intbldr3.NewInt16Array() - defer intarr3.Release() - - assert.True(t, array.Equal(intarr3, indices3)) - assert.True(t, array.Equal(strarr3, delta3)) -} - -func TestStringDictionaryBuilderIsNull(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.BinaryDictionaryBuilder) - assert.NoError(t, builder.AppendString("test")) - builder.AppendNull() - assert.NoError(t, builder.AppendString("test2")) - assert.NoError(t, builder.AppendString("test")) - - assert.False(t, bldr.IsNull(0)) - assert.True(t, bldr.IsNull(1)) - assert.False(t, bldr.IsNull(2)) - assert.False(t, bldr.IsNull(3)) -} - -func TestFixedSizeBinaryDictionaryBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 4}} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder) - test := []byte{12, 12, 11, 12} - test2 := []byte{12, 12, 11, 11} - assert.NoError(t, builder.Append(test)) - assert.NoError(t, builder.Append(test2)) - assert.NoError(t, builder.Append(test)) - - result := builder.NewDictionaryArray() - defer result.Release() - - fsbBldr := array.NewFixedSizeBinaryBuilder(mem, dictType.ValueType.(*arrow.FixedSizeBinaryType)) - defer fsbBldr.Release() - - fsbBldr.Append(test) - fsbBldr.Append(test2) - fsbArr := fsbBldr.NewFixedSizeBinaryArray() - defer fsbArr.Release() - - intbldr := array.NewInt8Builder(mem) - defer intbldr.Release() - - intbldr.AppendValues([]int8{0, 1, 0}, nil) - intArr := intbldr.NewInt8Array() - defer intArr.Release() - - expected := array.NewDictionaryArray(dictType, intArr, fsbArr) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) -} - -func TestFixedSizeBinaryDictionaryBuilderInit(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - fsbBldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 4}) - defer fsbBldr.Release() - - test, test2 := []byte("abcd"), []byte("wxyz") - fsbBldr.AppendValues([][]byte{test, test2}, nil) - dictArr := fsbBldr.NewFixedSizeBinaryArray() - defer dictArr.Release() - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: dictArr.DataType()} - bldr := array.NewDictionaryBuilderWithDict(mem, dictType, dictArr) - defer bldr.Release() - - builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder) - assert.NoError(t, builder.Append(test)) - assert.NoError(t, builder.Append(test2)) - assert.NoError(t, builder.Append(test)) - - result := builder.NewDictionaryArray() - defer result.Release() - - indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) - defer indices.Release() - - expected := array.NewDictionaryArray(dictType, indices, dictArr) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) -} - -func TestFixedSizeBinaryDictionaryBuilderMakeBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - fsbBldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 4}) - defer fsbBldr.Release() - - test, test2 := []byte("abcd"), []byte("wxyz") - fsbBldr.AppendValues([][]byte{test, test2}, nil) - dictArr := fsbBldr.NewFixedSizeBinaryArray() - defer dictArr.Release() - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: dictArr.DataType()} - bldr := array.NewBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder) - assert.NoError(t, builder.Append(test)) - assert.NoError(t, builder.Append(test2)) - assert.NoError(t, builder.Append(test)) - - result := builder.NewDictionaryArray() - defer result.Release() - - indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader("[0, 1, 0]")) - defer indices.Release() - - expected := array.NewDictionaryArray(dictType, indices, dictArr) - defer expected.Release() - - assert.True(t, array.Equal(expected, result)) -} - -func TestFixedSizeBinaryDictionaryBuilderDeltaDictionary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 4}} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.FixedSizeBinaryDictionaryBuilder) - test := []byte{12, 12, 11, 12} - test2 := []byte{12, 12, 11, 11} - test3 := []byte{12, 12, 11, 10} - - assert.NoError(t, builder.Append(test)) - assert.NoError(t, builder.Append(test2)) - assert.NoError(t, builder.Append(test)) - - result1 := bldr.NewDictionaryArray() - defer result1.Release() - - fsbBuilder := array.NewFixedSizeBinaryBuilder(mem, dictType.ValueType.(*arrow.FixedSizeBinaryType)) - defer fsbBuilder.Release() - - fsbBuilder.AppendValues([][]byte{test, test2}, nil) - fsbArr1 := fsbBuilder.NewFixedSizeBinaryArray() - defer fsbArr1.Release() - - intBuilder := array.NewInt8Builder(mem) - defer intBuilder.Release() - intBuilder.AppendValues([]int8{0, 1, 0}, nil) - intArr1 := intBuilder.NewInt8Array() - defer intArr1.Release() - - expected := array.NewDictionaryArray(dictType, intArr1, fsbArr1) - defer expected.Release() - assert.True(t, array.Equal(expected, result1)) - - assert.NoError(t, builder.Append(test)) - assert.NoError(t, builder.Append(test2)) - assert.NoError(t, builder.Append(test3)) - - indices2, delta2, err := builder.NewDelta() - assert.NoError(t, err) - defer indices2.Release() - defer delta2.Release() - - fsbBuilder.Append(test3) - fsbArr2 := fsbBuilder.NewFixedSizeBinaryArray() - defer fsbArr2.Release() - - intBuilder.AppendValues([]int8{0, 1, 2}, nil) - intArr2 := intBuilder.NewInt8Array() - defer intArr2.Release() - - assert.True(t, array.Equal(intArr2, indices2)) - assert.True(t, array.Equal(fsbArr2, delta2)) -} - -func TestFixedSizeBinaryDictionaryStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.FixedSizeBinaryType{ByteWidth: 4}} - b := array.NewDictionaryBuilder(mem, dictType) - defer b.Release() - - builder := b.(*array.FixedSizeBinaryDictionaryBuilder) - test := []byte{12, 12, 11, 12} - test2 := []byte{12, 12, 11, 11} - assert.NoError(t, builder.Append(test)) - assert.NoError(t, builder.Append(test2)) - assert.NoError(t, builder.Append(test)) - - arr := builder.NewDictionaryArray() - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDictionaryBuilder(mem, dictType) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Dictionary) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestDecimal128DictionaryBuilderBasic(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - test := []decimal128.Num{decimal128.FromI64(12), decimal128.FromI64(12), decimal128.FromI64(11), decimal128.FromI64(12)} - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.Decimal128Type{Precision: 2, Scale: 0}} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.Decimal128DictionaryBuilder) - for _, v := range test { - assert.NoError(t, builder.Append(v)) - } - - result := bldr.NewDictionaryArray() - defer result.Release() - - indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader("[0, 0, 1, 0]")) - defer indices.Release() - dict, _, _ := array.FromJSON(mem, dictType.ValueType, strings.NewReader("[12, 11]")) - defer dict.Release() - - expected := array.NewDictionaryArray(dictType, indices, dict) - defer expected.Release() - - assert.True(t, array.ApproxEqual(expected, result)) -} - -func TestDecimal256DictionaryBuilderBasic(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - test := []decimal256.Num{decimal256.FromI64(12), decimal256.FromI64(12), decimal256.FromI64(11), decimal256.FromI64(12)} - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: &arrow.Decimal256Type{Precision: 2, Scale: 0}} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.Decimal256DictionaryBuilder) - for _, v := range test { - assert.NoError(t, builder.Append(v)) - } - - result := bldr.NewDictionaryArray() - defer result.Release() - - indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader("[0, 0, 1, 0]")) - defer indices.Release() - dict, _, _ := array.FromJSON(mem, dictType.ValueType, strings.NewReader("[12, 11]")) - defer dict.Release() - - expected := array.NewDictionaryArray(dictType, indices, dict) - defer expected.Release() - - assert.True(t, array.ApproxEqual(expected, result)) -} - -func TestNullDictionaryBuilderBasic(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.Null} - bldr := array.NewBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.NullDictionaryBuilder) - builder.AppendNulls(3) - assert.Equal(t, 3, builder.Len()) - assert.Equal(t, 3, builder.NullN()) - - nullarr, _, _ := array.FromJSON(mem, arrow.Null, strings.NewReader("[null, null, null]")) - defer nullarr.Release() - - assert.NoError(t, builder.AppendArray(nullarr)) - assert.Equal(t, 6, bldr.Len()) - assert.Equal(t, 6, bldr.NullN()) - - result := builder.NewDictionaryArray() - defer result.Release() - assert.Equal(t, 6, result.Len()) - assert.Equal(t, 6, result.NullN()) -} - -func TestDictionaryEquals(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - var ( - isValid = []bool{true, true, false, true, true, true} - dict, dict2 arrow.Array - indices, indices2, indices3 arrow.Array - ) - - dict, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["foo", "bar", "baz"]`)) - defer dict.Release() - dictType := &arrow.DictionaryType{IndexType: &arrow.Uint16Type{}, ValueType: arrow.BinaryTypes.String} - - dict2, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["foo", "bar", "baz", "qux"]`)) - defer dict2.Release() - dictType2 := &arrow.DictionaryType{IndexType: &arrow.Uint16Type{}, ValueType: arrow.BinaryTypes.String} - - idxbuilder := array.NewUint16Builder(mem) - defer idxbuilder.Release() - - idxbuilder.AppendValues([]uint16{1, 2, math.MaxUint16, 0, 2, 0}, isValid) - indices = idxbuilder.NewArray() - defer indices.Release() - - idxbuilder.AppendValues([]uint16{1, 2, 0, 0, 2, 0}, isValid) - indices2 = idxbuilder.NewArray() - defer indices2.Release() - - idxbuilder.AppendValues([]uint16{1, 1, 0, 0, 2, 0}, isValid) - indices3 = idxbuilder.NewArray() - defer indices3.Release() - - var ( - arr = array.NewDictionaryArray(dictType, indices, dict) - arr2 = array.NewDictionaryArray(dictType, indices2, dict) - arr3 = array.NewDictionaryArray(dictType2, indices, dict2) - arr4 = array.NewDictionaryArray(dictType, indices3, dict) - ) - defer func() { - arr.Release() - arr2.Release() - arr3.Release() - arr4.Release() - }() - - assert.True(t, array.Equal(arr, arr)) - // equal because the unequal index is masked by null - assert.True(t, array.Equal(arr, arr2)) - // unequal dictionaries - assert.False(t, array.Equal(arr, arr3)) - // unequal indices - assert.False(t, array.Equal(arr, arr4)) - assert.True(t, array.SliceEqual(arr, 3, 6, arr4, 3, 6)) - assert.False(t, array.SliceEqual(arr, 1, 3, arr4, 1, 3)) - - sz := arr.Len() - slice := array.NewSlice(arr, 2, int64(sz)) - defer slice.Release() - slice2 := array.NewSlice(arr, 2, int64(sz)) - defer slice2.Release() - - assert.Equal(t, sz-2, slice.Len()) - assert.True(t, array.Equal(slice, slice2)) - assert.True(t, array.SliceEqual(arr, 2, int64(arr.Len()), slice, 0, int64(slice.Len()))) - - // chained slice - slice2 = array.NewSlice(arr, 1, int64(arr.Len())) - defer slice2.Release() - slice2 = array.NewSlice(slice2, 1, int64(slice2.Len())) - defer slice2.Release() - - assert.True(t, array.Equal(slice, slice2)) - slice = array.NewSlice(arr, 1, 4) - defer slice.Release() - slice2 = array.NewSlice(arr, 1, 4) - defer slice2.Release() - - assert.Equal(t, 3, slice.Len()) - assert.True(t, array.Equal(slice, slice2)) - assert.True(t, array.SliceEqual(arr, 1, 4, slice, 0, int64(slice.Len()))) -} - -func TestDictionaryIndexTypes(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictIndexTypes := []arrow.DataType{ - arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64, - } - - for _, indextyp := range dictIndexTypes { - t.Run(indextyp.Name(), func(t *testing.T) { - scope := memory.NewCheckedAllocatorScope(mem) - defer scope.CheckSize(t) - - dictType := &arrow.DictionaryType{IndexType: indextyp, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - builder := bldr.(*array.BinaryDictionaryBuilder) - builder.AppendString("foo") - builder.AppendString("bar") - builder.AppendString("foo") - builder.AppendString("baz") - builder.Append(nil) - - assert.Equal(t, 5, builder.Len()) - assert.Equal(t, 1, builder.NullN()) - - result := builder.NewDictionaryArray() - defer result.Release() - - expectedIndices, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[0, 1, 0, 2, null]")) - defer expectedIndices.Release() - - assert.True(t, array.Equal(expectedIndices, result.Indices())) - }) - } -} - -func TestDictionaryFromArrays(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["foo", "bar", "baz"]`)) - defer dict.Release() - - dictIndexTypes := []arrow.DataType{ - arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64, - } - - for _, indextyp := range dictIndexTypes { - t.Run(indextyp.Name(), func(t *testing.T) { - scope := memory.NewCheckedAllocatorScope(mem) - defer scope.CheckSize(t) - - dictType := &arrow.DictionaryType{IndexType: indextyp, ValueType: arrow.BinaryTypes.String} - indices1, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, 0, 0, 2, 0]")) - defer indices1.Release() - - indices2, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, 0, 3, 2, 0]")) - defer indices2.Release() - - arr1, err := array.NewValidatedDictionaryArray(dictType, indices1, dict) - assert.NoError(t, err) - defer arr1.Release() - - _, err = array.NewValidatedDictionaryArray(dictType, indices2, dict) - assert.Error(t, err) - - switch indextyp.ID() { - case arrow.INT8, arrow.INT16, arrow.INT32, arrow.INT64: - indices3, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, 0, null, 2, 0]")) - defer indices3.Release() - bitutil.ClearBit(indices3.Data().Buffers()[0].Bytes(), 2) - arr3, err := array.NewValidatedDictionaryArray(dictType, indices3, dict) - assert.NoError(t, err) - defer arr3.Release() - } - - indices4, _, _ := array.FromJSON(mem, indextyp, strings.NewReader("[1, 2, null, 3, 2, 0]")) - defer indices4.Release() - _, err = array.NewValidatedDictionaryArray(dictType, indices4, dict) - assert.Error(t, err) - - diffIndexType := arrow.PrimitiveTypes.Int8 - if indextyp.ID() == arrow.INT8 { - diffIndexType = arrow.PrimitiveTypes.Uint8 - } - _, err = array.NewValidatedDictionaryArray(&arrow.DictionaryType{IndexType: diffIndexType, ValueType: arrow.BinaryTypes.String}, indices4, dict) - assert.Error(t, err) - }) - } -} - -func TestListOfDictionary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - rootBuilder := array.NewBuilder(mem, arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: arrow.BinaryTypes.String})) - defer rootBuilder.Release() - - listBldr := rootBuilder.(*array.ListBuilder) - dictBldr := listBldr.ValueBuilder().(*array.BinaryDictionaryBuilder) - - listBldr.Append(true) - expected := []string{} - for _, a := range []byte("abc") { - for _, d := range []byte("def") { - for _, g := range []byte("ghi") { - for _, j := range []byte("jkl") { - for _, m := range []byte("mno") { - for _, p := range []byte("pqr") { - if a+d+g+j+m+p%16 == 0 { - listBldr.Append(true) - } - - str := string([]byte{a, d, g, j, m, p}) - dictBldr.AppendString(str) - expected = append(expected, str) - } - } - } - } - } - } - - strbldr := array.NewStringBuilder(mem) - defer strbldr.Release() - strbldr.AppendValues(expected, nil) - - expectedDict := strbldr.NewStringArray() - defer expectedDict.Release() - - arr := rootBuilder.NewArray() - defer arr.Release() - - actualDict := arr.(*array.List).ListValues().(*array.Dictionary) - assert.True(t, array.Equal(expectedDict, actualDict.Dictionary())) -} - -func TestDictionaryCanCompareIndices(t *testing.T) { - makeDict := func(mem memory.Allocator, idxType, valueType arrow.DataType, dictJSON string) *array.Dictionary { - indices, _, _ := array.FromJSON(mem, idxType, strings.NewReader("[]")) - defer indices.Release() - dict, _, _ := array.FromJSON(mem, valueType, strings.NewReader(dictJSON)) - defer dict.Release() - - out, _ := array.NewValidatedDictionaryArray(&arrow.DictionaryType{IndexType: idxType, ValueType: valueType}, indices, dict) - return out - } - - compareSwap := func(t *testing.T, l, r *array.Dictionary, expected bool) { - assert.Equalf(t, expected, l.CanCompareIndices(r), "left: %s\nright: %s\n", l, r) - assert.Equalf(t, expected, r.CanCompareIndices(l), "left: %s\nright: %s\n", r, l) - } - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - t.Run("same", func(t *testing.T) { - arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`) - defer arr.Release() - same := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`) - defer same.Release() - compareSwap(t, arr, same, true) - }) - - t.Run("prefix dict", func(t *testing.T) { - arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar", "quux"]`) - defer arr.Release() - prefixDict := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`) - defer prefixDict.Release() - compareSwap(t, arr, prefixDict, true) - }) - - t.Run("indices need cast", func(t *testing.T) { - arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar"]`) - defer arr.Release() - needcast := makeDict(mem, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String, `["foo", "bar"]`) - defer needcast.Release() - compareSwap(t, arr, needcast, false) - }) - - t.Run("non prefix", func(t *testing.T) { - arr := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "bar", "quux"]`) - defer arr.Release() - nonPrefix := makeDict(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String, `["foo", "blink"]`) - defer nonPrefix.Release() - compareSwap(t, arr, nonPrefix, false) - }) -} - -func TestDictionaryGetValueIndex(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - indicesJson := "[5, 0, 1, 3, 2, 4]" - indices64, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(indicesJson)) - defer indices64.Release() - dict, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader("[10, 20, 30, 40, 50, 60]")) - defer dict.Release() - - dictIndexTypes := []arrow.DataType{ - arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Uint64, - } - i64Index := indices64.(*array.Int64) - for _, idxt := range dictIndexTypes { - t.Run(idxt.Name(), func(t *testing.T) { - indices, _, _ := array.FromJSON(mem, idxt, strings.NewReader(indicesJson)) - defer indices.Release() - dictType := &arrow.DictionaryType{IndexType: idxt, ValueType: arrow.PrimitiveTypes.Int32} - - dictArr := array.NewDictionaryArray(dictType, indices, dict) - defer dictArr.Release() - - const offset = 1 - slicedDictArr := array.NewSlice(dictArr, offset, int64(dictArr.Len())) - defer slicedDictArr.Release() - assert.EqualValues(t, "10", slicedDictArr.(*array.Dictionary).ValueStr(0)) - for i := 0; i < indices.Len(); i++ { - assert.EqualValues(t, i64Index.Value(i), dictArr.GetValueIndex(i)) - if i < slicedDictArr.Len() { - assert.EqualValues(t, i64Index.Value(i+offset), slicedDictArr.(*array.Dictionary).GetValueIndex(i)) - } - } - }) - } -} - -func checkTransposeMap(t *testing.T, b *memory.Buffer, exp []int32) bool { - got := arrow.Int32Traits.CastFromBytes(b.Bytes()) - return assert.Equal(t, exp, got) -} - -func TestDictionaryUnifierNumeric(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := arrow.PrimitiveTypes.Int64 - - d1, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[3, 4, 7]`)) - require.NoError(t, err) - d2, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[1, 7, 4, 8]`)) - require.NoError(t, err) - d3, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[1, -200]`)) - require.NoError(t, err) - - expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType} - expectedDict, _, err := array.FromJSON(mem, dictType, strings.NewReader(`[3, 4, 7, 1, 8, -200]`)) - require.NoError(t, err) - defer func() { - d1.Release() - d2.Release() - d3.Release() - expectedDict.Release() - }() - - unifier, err := array.NewDictionaryUnifier(mem, dictType) - assert.NoError(t, err) - defer unifier.Release() - - assert.NoError(t, unifier.Unify(d1)) - assert.NoError(t, unifier.Unify(d2)) - assert.NoError(t, unifier.Unify(d3)) - - invalid, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, -200]`)) - defer invalid.Release() - assert.EqualError(t, unifier.Unify(invalid), "dictionary type different from unifier: int32, expected: int64") - - outType, outDict, err := unifier.GetResult() - assert.NoError(t, err) - defer outDict.Release() - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - b1, err := unifier.UnifyAndTranspose(d1) - assert.NoError(t, err) - b2, err := unifier.UnifyAndTranspose(d2) - assert.NoError(t, err) - b3, err := unifier.UnifyAndTranspose(d3) - assert.NoError(t, err) - - outType, outDict, err = unifier.GetResult() - assert.NoError(t, err) - defer func() { - outDict.Release() - b1.Release() - b2.Release() - b3.Release() - }() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - checkTransposeMap(t, b1, []int32{0, 1, 2}) - checkTransposeMap(t, b2, []int32{3, 2, 1, 4}) - checkTransposeMap(t, b3, []int32{3, 5}) -} - -func TestDictionaryUnifierString(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := arrow.BinaryTypes.String - d1, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["foo", "bar"]`)) - require.NoError(t, err) - defer d1.Release() - - d2, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["quux", "foo"]`)) - require.NoError(t, err) - defer d2.Release() - - expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType} - expectedDict, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["foo", "bar", "quux"]`)) - defer expectedDict.Release() - - unifier, err := array.NewDictionaryUnifier(mem, dictType) - assert.NoError(t, err) - defer unifier.Release() - - assert.NoError(t, unifier.Unify(d1)) - assert.NoError(t, unifier.Unify(d2)) - outType, outDict, err := unifier.GetResult() - assert.NoError(t, err) - defer outDict.Release() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - b1, err := unifier.UnifyAndTranspose(d1) - assert.NoError(t, err) - b2, err := unifier.UnifyAndTranspose(d2) - assert.NoError(t, err) - - outType, outDict, err = unifier.GetResult() - assert.NoError(t, err) - defer func() { - outDict.Release() - b1.Release() - b2.Release() - }() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - checkTransposeMap(t, b1, []int32{0, 1}) - checkTransposeMap(t, b2, []int32{2, 0}) -} - -func TestDictionaryUnifierBinary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := arrow.BinaryTypes.Binary - d1, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["Zm9vCg==", "YmFyCg=="]`)) // base64("foo\n"), base64("bar\n") - require.NoError(t, err) - defer d1.Release() - - d2, _, err := array.FromJSON(mem, dictType, strings.NewReader(`["cXV1eAo=", "Zm9vCg=="]`)) // base64("quux\n"), base64("foo\n") - require.NoError(t, err) - defer d2.Release() - - expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType} - expectedDict, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["Zm9vCg==", "YmFyCg==", "cXV1eAo="]`)) - defer expectedDict.Release() - - unifier := array.NewBinaryDictionaryUnifier(mem) - defer unifier.Release() - - assert.NoError(t, unifier.Unify(d1)) - assert.NoError(t, unifier.Unify(d2)) - outType, outDict, err := unifier.GetResult() - assert.NoError(t, err) - defer outDict.Release() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - b1, err := unifier.UnifyAndTranspose(d1) - assert.NoError(t, err) - b2, err := unifier.UnifyAndTranspose(d2) - assert.NoError(t, err) - - outType, outDict, err = unifier.GetResult() - assert.NoError(t, err) - defer func() { - outDict.Release() - b1.Release() - b2.Release() - }() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - checkTransposeMap(t, b1, []int32{0, 1}) - checkTransposeMap(t, b2, []int32{2, 0}) -} - -func TestDictionaryUnifierFixedSizeBinary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.FixedSizeBinaryType{ByteWidth: 3} - data := memory.NewBufferBytes([]byte(`foobarbazqux`)) - - fsbData := array.NewData(dictType, 2, []*memory.Buffer{nil, memory.SliceBuffer(data, 0, 6)}, nil, 0, 0) - defer fsbData.Release() - d1 := array.NewFixedSizeBinaryData(fsbData) - fsbData = array.NewData(dictType, 3, []*memory.Buffer{nil, memory.SliceBuffer(data, 3, 9)}, nil, 0, 0) - defer fsbData.Release() - d2 := array.NewFixedSizeBinaryData(fsbData) - - fsbData = array.NewData(dictType, 4, []*memory.Buffer{nil, data}, nil, 0, 0) - defer fsbData.Release() - expectedDict := array.NewFixedSizeBinaryData(fsbData) - expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: dictType} - - unifier, err := array.NewDictionaryUnifier(mem, dictType) - assert.NoError(t, err) - - defer func() { - d1.Release() - d2.Release() - expectedDict.Release() - unifier.Release() - }() - - assert.NoError(t, unifier.Unify(d1)) - assert.NoError(t, unifier.Unify(d2)) - outType, outDict, err := unifier.GetResult() - assert.NoError(t, err) - defer outDict.Release() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - b1, err := unifier.UnifyAndTranspose(d1) - assert.NoError(t, err) - b2, err := unifier.UnifyAndTranspose(d2) - assert.NoError(t, err) - - outType, outDict, err = unifier.GetResult() - assert.NoError(t, err) - defer func() { - outDict.Release() - b1.Release() - b2.Release() - }() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) - - checkTransposeMap(t, b1, []int32{0, 1}) - checkTransposeMap(t, b2, []int32{1, 2, 3}) -} - -func TestDictionaryUnifierLarge(t *testing.T) { - // unifying larger dictionaries should choose the right index type - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - bldr := array.NewInt32Builder(mem) - defer bldr.Release() - bldr.Reserve(120) - for i := int32(0); i < 120; i++ { - bldr.UnsafeAppend(i) - } - - d1 := bldr.NewInt32Array() - defer d1.Release() - assert.EqualValues(t, 120, d1.Len()) - - bldr.Reserve(30) - for i := int32(110); i < 140; i++ { - bldr.UnsafeAppend(i) - } - - d2 := bldr.NewInt32Array() - defer d2.Release() - assert.EqualValues(t, 30, d2.Len()) - - bldr.Reserve(140) - for i := int32(0); i < 140; i++ { - bldr.UnsafeAppend(i) - } - - expectedDict := bldr.NewInt32Array() - defer expectedDict.Release() - assert.EqualValues(t, 140, expectedDict.Len()) - - // int8 would be too narrow to hold all the values - expected := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: arrow.PrimitiveTypes.Int32} - - unifier, err := array.NewDictionaryUnifier(mem, arrow.PrimitiveTypes.Int32) - assert.NoError(t, err) - defer unifier.Release() - - assert.NoError(t, unifier.Unify(d1)) - assert.NoError(t, unifier.Unify(d2)) - outType, outDict, err := unifier.GetResult() - assert.NoError(t, err) - defer outDict.Release() - - assert.Truef(t, arrow.TypeEqual(expected, outType), "got: %s, expected: %s", outType, expected) - assert.Truef(t, array.Equal(expectedDict, outDict), "got: %s, expected: %s", outDict, expectedDict) -} - -func checkDictionaryArray(t *testing.T, arr, expectedVals, expectedIndices arrow.Array) bool { - require.IsType(t, (*array.Dictionary)(nil), arr) - dictArr := arr.(*array.Dictionary) - ret := true - ret = ret && assert.Truef(t, array.Equal(expectedVals, dictArr.Dictionary()), "got: %s, expected: %s", dictArr.Dictionary(), expectedVals) - return ret && assert.Truef(t, array.Equal(expectedIndices, dictArr.Indices()), "got: %s, expected: %s", dictArr.Indices(), expectedIndices) -} - -func TestDictionaryUnifierSimpleChunkedArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String} - chunk1, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ab", "cd", null, "cd"]`)) - chunk2, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ef", "cd", "ef"]`)) - chunk3, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ef", "ab", null, "ab"]`)) - chunk4, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`[]`)) - chunked := arrow.NewChunked(dictType, []arrow.Array{chunk1, chunk2, chunk3, chunk4}) - defer func() { - chunk1.Release() - chunk2.Release() - chunk3.Release() - chunk4.Release() - chunked.Release() - }() - - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.NoError(t, err) - defer unified.Release() - - assert.Len(t, unified.Chunks(), 4) - expectedDict, _, _ := array.FromJSON(mem, dictType.ValueType, strings.NewReader(`["ab", "cd", "ef"]`)) - defer expectedDict.Release() - - c1Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[0, 1, null, 1]`)) - defer c1Indices.Release() - c2Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[2, 1, 2]`)) - defer c2Indices.Release() - c3Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[2, 0, null, 0]`)) - defer c3Indices.Release() - c4Indices, _, _ := array.FromJSON(mem, dictType.IndexType, strings.NewReader(`[]`)) - defer c4Indices.Release() - checkDictionaryArray(t, unified.Chunk(0), expectedDict, c1Indices) - checkDictionaryArray(t, unified.Chunk(1), expectedDict, c2Indices) - checkDictionaryArray(t, unified.Chunk(2), expectedDict, c3Indices) - checkDictionaryArray(t, unified.Chunk(3), expectedDict, c4Indices) -} - -func TestDictionaryUnifierChunkedArrayZeroChunks(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String} - chunked := arrow.NewChunked(dictType, []arrow.Array{}) - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.NoError(t, err) - assert.True(t, array.ChunkedEqual(unified, chunked)) -} - -func TestDictionaryUnifierChunkedArrayOneChunk(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dictType := &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String} - chunk1, _, _ := array.FromJSON(mem, dictType, strings.NewReader(`["ab", "cd", null, "cd"]`)) - defer chunk1.Release() - - chunked := arrow.NewChunked(dictType, []arrow.Array{chunk1}) - defer chunked.Release() - - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.NoError(t, err) - defer unified.Release() - - assert.True(t, array.ChunkedEqual(unified, chunked)) - assert.Same(t, unified, chunked) -} - -func TestDictionaryUnifierChunkedArrayNoDict(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - typ := arrow.PrimitiveTypes.Int8 - chunk1, _, _ := array.FromJSON(mem, typ, strings.NewReader(`[1, 1, 2, 3]`)) - defer chunk1.Release() - - chunk2, _, _ := array.FromJSON(mem, typ, strings.NewReader(`[5, 8, 13]`)) - defer chunk2.Release() - - chunked := arrow.NewChunked(typ, []arrow.Array{chunk1, chunk2}) - defer chunked.Release() - - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.NoError(t, err) - defer unified.Release() - - assert.True(t, array.ChunkedEqual(unified, chunked)) - assert.Same(t, unified, chunked) -} - -func TestDictionaryUnifierChunkedArrayNested(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - typ := arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: arrow.BinaryTypes.String}) - chunk1, _, err := array.FromJSON(mem, typ, strings.NewReader(`[["ab", "cd"], ["cd"]]`)) - assert.NoError(t, err) - // defer chunk1.Release() - chunk2, _, err := array.FromJSON(mem, typ, strings.NewReader(`[[], ["ef", "cd", "ef"]]`)) - assert.NoError(t, err) - // defer chunk2.Release() - chunked := arrow.NewChunked(typ, []arrow.Array{chunk1, chunk2}) - // defer chunked.Release() - - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.NoError(t, err) - // defer unified.Release() - assert.Len(t, unified.Chunks(), 2) - - expectedDict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["ab", "cd", "ef"]`)) - // defer expectedDict.Release() - - unified1 := unified.Chunk(0).(*array.List) - assert.Equal(t, []int32{0, 2, 3}, unified1.Offsets()) - expectedIndices1, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader(`[0, 1, 1]`)) - // defer expectedIndices1.Release() - checkDictionaryArray(t, unified1.ListValues(), expectedDict, expectedIndices1) - - unified2 := unified.Chunk(1).(*array.List) - assert.Equal(t, []int32{0, 0, 3}, unified2.Offsets()) - expectedIndices2, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int16, strings.NewReader(`[2, 1, 2]`)) - // defer expectedIndices2.Release() - checkDictionaryArray(t, unified2.ListValues(), expectedDict, expectedIndices2) - defer func() { - expectedIndices1.Release() - expectedIndices2.Release() - expectedDict.Release() - unified.Release() - chunked.Release() - chunk2.Release() - chunk1.Release() - }() -} - -func TestDictionaryUnifierChunkedArrayExtension(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dt := types.NewDictExtensionType() - chunk1, _, err := array.FromJSON(mem, dt, strings.NewReader(`["ab", null, "cd", "ab"]`)) - assert.NoError(t, err) - defer chunk1.Release() - - chunk2, _, err := array.FromJSON(mem, dt, strings.NewReader(`["ef", "ab", "ab"]`)) - assert.NoError(t, err) - defer chunk2.Release() - - chunked := arrow.NewChunked(dt, []arrow.Array{chunk1, chunk2}) - defer chunked.Release() - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.NoError(t, err) - defer unified.Release() - assert.Len(t, unified.Chunks(), 2) - - expectedDict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["ab", "cd", "ef"]`)) - defer expectedDict.Release() - - unified1 := unified.Chunk(0).(array.ExtensionArray) - assert.Truef(t, arrow.TypeEqual(dt, unified1.DataType()), "expected: %s, got: %s", dt, unified1.DataType()) - indices, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0, null, 1, 0]`)) - defer indices.Release() - checkDictionaryArray(t, unified1.Storage(), expectedDict, indices) - - unified2 := unified.Chunk(1).(array.ExtensionArray) - assert.Truef(t, arrow.TypeEqual(dt, unified2.DataType()), "expected: %s, got: %s", dt, unified1.DataType()) - indices, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[2, 0, 0]`)) - defer indices.Release() - checkDictionaryArray(t, unified2.Storage(), expectedDict, indices) -} - -func TestDictionaryUnifierChunkedArrayNestedDict(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - innerType := arrow.ListOf(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: arrow.BinaryTypes.String}) - innerDict1, _, err := array.FromJSON(mem, innerType, strings.NewReader(`[["ab", "cd"], [], ["cd", null]]`)) - assert.NoError(t, err) - defer innerDict1.Release() - indices1, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[2, 1, 0, 1, 2]`)) - defer indices1.Release() - - chunk1 := array.NewDictionaryArray(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: innerType}, indices1, innerDict1) - defer chunk1.Release() - - innerDict2, _, err := array.FromJSON(mem, innerType, strings.NewReader(`[["cd", "ef"], ["cd", null], []]`)) - assert.NoError(t, err) - defer innerDict2.Release() - indices2, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 2, 0]`)) - defer indices2.Release() - - chunk2 := array.NewDictionaryArray(&arrow.DictionaryType{IndexType: indices2.DataType(), ValueType: innerType}, indices2, innerDict2) - defer chunk2.Release() - - chunked := arrow.NewChunked(chunk1.DataType(), []arrow.Array{chunk1, chunk2}) - defer chunked.Release() - - unified, err := array.UnifyChunkedDicts(mem, chunked) - assert.Nil(t, unified) - assert.EqualError(t, err, "unimplemented dictionary value type, list, nullable>") -} - -func TestDictionaryUnifierTableZeroColumns(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema([]arrow.Field{}, nil) - table := array.NewTable(schema, []arrow.Column{}, 42) - defer table.Release() - - unified, err := array.UnifyTableDicts(mem, table) - assert.NoError(t, err) - assert.True(t, schema.Equal(unified.Schema())) - assert.EqualValues(t, 42, unified.NumRows()) - assert.True(t, array.TableEqual(table, unified)) -} - -func TestDictionaryAppendIndices(t *testing.T) { - indexTypes := []arrow.DataType{ - arrow.PrimitiveTypes.Int8, - arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Int16, - arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Int32, - arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Int64, - arrow.PrimitiveTypes.Uint64, - } - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - dict, _, err := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["a", "b", "c", "d", "e", "f"]`)) - require.NoError(t, err) - defer dict.Release() - - indices := []int{3, 4, 0, 3, 1, 4, 4, 5} - - for _, typ := range indexTypes { - t.Run(typ.String(), func(t *testing.T) { - scoped := memory.NewCheckedAllocatorScope(mem) - defer scoped.CheckSize(t) - - dictType := &arrow.DictionaryType{ - IndexType: typ, ValueType: dict.DataType()} - bldr := array.NewDictionaryBuilderWithDict(mem, dictType, dict) - defer bldr.Release() - - bldr.AppendIndices(indices, nil) - - arr := bldr.NewDictionaryArray() - defer arr.Release() - - arrIndices := arr.Indices() - assert.EqualValues(t, len(indices), arr.Len()) - assert.EqualValues(t, len(indices), arrIndices.Len()) - - assert.Equal(t, fmt.Sprint(indices), arrIndices.String()) - }) - } -} - -type panicAllocator struct { - n int - paniced bool - memory.Allocator -} - -func (p *panicAllocator) Allocate(size int) []byte { - if size > p.n { - p.paniced = true - panic("panic allocator") - } - return p.Allocator.Allocate(size) -} - -func (p *panicAllocator) Reallocate(size int, b []byte) []byte { - return p.Allocator.Reallocate(size, b) -} - -func (p *panicAllocator) Free(b []byte) { - p.Allocator.Free(b) -} - -func TestBinaryDictionaryPanic(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - allocator := &panicAllocator{ - n: 400, - Allocator: mem, - } - - expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(allocator, expectedType) - defer bldr.Release() - - bldr.AppendNull() - allocator.n = 0 // force panic - func() { - defer func() { - recover() - }() - bldr.NewArray() - }() - assert.True(t, allocator.paniced) -} - -func BenchmarkBinaryDictionaryBuilder(b *testing.B) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(b, 0) - - dictType := &arrow.DictionaryType{IndexType: &arrow.Int32Type{}, ValueType: arrow.BinaryTypes.String} - bldr := array.NewDictionaryBuilder(mem, dictType) - defer bldr.Release() - - randString := func() string { - return fmt.Sprintf("test-%d", rand.Intn(30)) - } - - builder := bldr.(*array.BinaryDictionaryBuilder) - for i := 0; i < b.N; i++ { - assert.NoError(b, builder.AppendString(randString())) - } -} diff --git a/go/arrow/array/diff.go b/go/arrow/array/diff.go deleted file mode 100644 index e5c1ce1521d95..0000000000000 --- a/go/arrow/array/diff.go +++ /dev/null @@ -1,315 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "strings" - - "github.com/apache/arrow/go/v18/arrow" -) - -// Edit represents one entry in the edit script to compare two arrays. -type Edit struct { - Insert bool - RunLength int64 -} - -// Edits is a slice of Edit structs that represents an edit script to compare two arrays. -// When applied to the base array, it produces the target array. -// Each element of "insert" determines whether an element was inserted into (true) -// or deleted from (false) base. Each insertion or deletion is followed by a run of -// elements which are unchanged from base to target; the length of this run is stored -// in RunLength. (Note that the edit script begins and ends with a run of shared -// elements but both fields of the struct must have the same length. To accommodate this -// the first element of "insert" should be ignored.) -// -// For example for base "hlloo" and target "hello", the edit script would be -// [ -// -// {"insert": false, "run_length": 1}, // leading run of length 1 ("h") -// {"insert": true, "run_length": 3}, // insert("e") then a run of length 3 ("llo") -// {"insert": false, "run_length": 0} // delete("o") then an empty run -// -// ] -type Edits []Edit - -// String returns a simple string representation of the edit script. -func (e Edits) String() string { - return fmt.Sprintf("%v", []Edit(e)) -} - -// UnifiedDiff returns a string representation of the diff of base and target in Unified Diff format. -func (e Edits) UnifiedDiff(base, target arrow.Array) string { - var s strings.Builder - baseIndex := int64(0) - targetIndex := int64(0) - wrotePosition := false - for i := 0; i < len(e); i++ { - if i > 0 { - if !wrotePosition { - s.WriteString(fmt.Sprintf("@@ -%d, +%d @@\n", baseIndex, targetIndex)) - wrotePosition = true - } - if e[i].Insert { - s.WriteString(fmt.Sprintf("+%v\n", stringAt(target, targetIndex))) - targetIndex++ - } else { - s.WriteString(fmt.Sprintf("-%v\n", stringAt(base, baseIndex))) - baseIndex++ - } - } - for j := int64(0); j < e[i].RunLength; j++ { - baseIndex++ - targetIndex++ - wrotePosition = false - } - } - return s.String() -} - -func stringAt(arr arrow.Array, i int64) string { - if arr.IsNull(int(i)) { - return "null" - } - dt := arr.DataType() - switch { - case arrow.TypeEqual(dt, arrow.PrimitiveTypes.Float32): - return fmt.Sprintf("%f", arr.(*Float32).Value(int(i))) - case arrow.TypeEqual(dt, arrow.PrimitiveTypes.Float64): - return fmt.Sprintf("%f", arr.(*Float64).Value(int(i))) - case arrow.TypeEqual(dt, arrow.PrimitiveTypes.Date32): - return arr.(*Date32).Value(int(i)).FormattedString() - case arrow.TypeEqual(dt, arrow.PrimitiveTypes.Date64): - return arr.(*Date64).Value(int(i)).FormattedString() - case arrow.TypeEqual(dt, arrow.FixedWidthTypes.Timestamp_s): - return arr.(*Timestamp).Value(int(i)).ToTime(arrow.Second).String() - case arrow.TypeEqual(dt, arrow.FixedWidthTypes.Timestamp_ms): - return arr.(*Timestamp).Value(int(i)).ToTime(arrow.Millisecond).String() - case arrow.TypeEqual(dt, arrow.FixedWidthTypes.Timestamp_us): - return arr.(*Timestamp).Value(int(i)).ToTime(arrow.Microsecond).String() - case arrow.TypeEqual(dt, arrow.FixedWidthTypes.Timestamp_ns): - return arr.(*Timestamp).Value(int(i)).ToTime(arrow.Nanosecond).String() - } - s := NewSlice(arr, i, i+1) - defer s.Release() - st, _ := s.MarshalJSON() - return strings.Trim(string(st[1:len(st)-1]), "\n") -} - -// Diff compares two arrays, returning an edit script which expresses the difference -// between them. The edit script can be applied to the base array to produce the target. -// 'base' is a baseline for comparison. -// 'target' is an array of identical type to base whose elements differ from base's. -func Diff(base, target arrow.Array) (edits Edits, err error) { - if !arrow.TypeEqual(base.DataType(), target.DataType()) { - return nil, fmt.Errorf("%w: only taking the diff of like-typed arrays is supported", arrow.ErrNotImplemented) - } - switch base.DataType().ID() { - case arrow.EXTENSION: - return Diff(base.(ExtensionArray).Storage(), target.(ExtensionArray).Storage()) - case arrow.DICTIONARY: - return nil, fmt.Errorf("%w: diffing arrays of type %s is not implemented", arrow.ErrNotImplemented, base.DataType()) - case arrow.RUN_END_ENCODED: - return nil, fmt.Errorf("%w: diffing arrays of type %s is not implemented", arrow.ErrNotImplemented, base.DataType()) - } - d := newQuadraticSpaceMyersDiff(base, target) - return d.Diff() -} - -// editPoint represents an intermediate state in the comparison of two arrays -type editPoint struct { - base int - target int -} - -type quadraticSpaceMyersDiff struct { - base arrow.Array - target arrow.Array - finishIndex int - editCount int - endpointBase []int - insert []bool - baseBegin int - targetBegin int - baseEnd int - targetEnd int -} - -func newQuadraticSpaceMyersDiff(base, target arrow.Array) *quadraticSpaceMyersDiff { - d := &quadraticSpaceMyersDiff{ - base: base, - target: target, - finishIndex: -1, - editCount: 0, - endpointBase: []int{}, - insert: []bool{}, - baseBegin: 0, - targetBegin: 0, - baseEnd: base.Len(), - targetEnd: target.Len(), - } - d.endpointBase = []int{d.extendFrom(editPoint{d.baseBegin, d.targetBegin}).base} - if d.baseEnd-d.baseBegin == d.targetEnd-d.targetBegin && d.endpointBase[0] == d.baseEnd { - // trivial case: base == target - d.finishIndex = 0 - } - return d -} - -func (d *quadraticSpaceMyersDiff) valuesEqual(baseIndex, targetIndex int) bool { - baseNull := d.base.IsNull(baseIndex) - targetNull := d.target.IsNull(targetIndex) - if baseNull || targetNull { - return baseNull && targetNull - } - return SliceEqual(d.base, int64(baseIndex), int64(baseIndex+1), d.target, int64(targetIndex), int64(targetIndex+1)) -} - -// increment the position within base and target (the elements skipped in this way were -// present in both sequences) -func (d *quadraticSpaceMyersDiff) extendFrom(p editPoint) editPoint { - for p.base != d.baseEnd && p.target != d.targetEnd { - if !d.valuesEqual(p.base, p.target) { - break - } - p.base++ - p.target++ - } - return p -} - -// increment the position within base (the element pointed to was deleted) -// then extend maximally -func (d *quadraticSpaceMyersDiff) deleteOne(p editPoint) editPoint { - if p.base != d.baseEnd { - p.base++ - } - return d.extendFrom(p) -} - -// increment the position within target (the element pointed to was inserted) -// then extend maximally -func (d *quadraticSpaceMyersDiff) insertOne(p editPoint) editPoint { - if p.target != d.targetEnd { - p.target++ - } - return d.extendFrom(p) -} - -// beginning of a range for storing per-edit state in endpointBase and insert -func storageOffset(editCount int) int { - return editCount * (editCount + 1) / 2 -} - -// given edit_count and index, augment endpointBase[index] with the corresponding -// position in target (which is only implicitly represented in editCount, index) -func (d *quadraticSpaceMyersDiff) getEditPoint(editCount, index int) editPoint { - insertionsMinusDeletions := 2*(index-storageOffset(editCount)) - editCount - maximalBase := d.endpointBase[index] - maximalTarget := min(d.targetBegin+((maximalBase-d.baseBegin)+insertionsMinusDeletions), d.targetEnd) - return editPoint{maximalBase, maximalTarget} -} - -func (d *quadraticSpaceMyersDiff) Next() { - d.editCount++ - if len(d.endpointBase) < storageOffset(d.editCount+1) { - d.endpointBase = append(d.endpointBase, make([]int, storageOffset(d.editCount+1)-len(d.endpointBase))...) - } - if len(d.insert) < storageOffset(d.editCount+1) { - d.insert = append(d.insert, make([]bool, storageOffset(d.editCount+1)-len(d.insert))...) - } - previousOffset := storageOffset(d.editCount - 1) - currentOffset := storageOffset(d.editCount) - - // try deleting from base first - for i, iOut := 0, 0; i < d.editCount; i, iOut = i+1, iOut+1 { - previousEndpoint := d.getEditPoint(d.editCount-1, i+previousOffset) - d.endpointBase[iOut+currentOffset] = d.deleteOne(previousEndpoint).base - } - - // check if inserting from target could do better - for i, iOut := 0, 1; i < d.editCount; i, iOut = i+1, iOut+1 { - // retrieve the previously computed best endpoint for (editCount, iOut) - // for comparison with the best endpoint achievable with an insertion - endpointAfterDeletion := d.getEditPoint(d.editCount, iOut+currentOffset) - - previousEndpoint := d.getEditPoint(d.editCount-1, i+previousOffset) - endpointAfterInsertion := d.insertOne(previousEndpoint) - - if endpointAfterInsertion.base-endpointAfterDeletion.base >= 0 { - // insertion was more efficient; keep it and mark the insertion in insert - d.insert[iOut+currentOffset] = true - d.endpointBase[iOut+currentOffset] = endpointAfterInsertion.base - } - } - - finish := editPoint{d.baseEnd, d.targetEnd} - for iOut := 0; iOut < d.editCount+1; iOut++ { - if d.getEditPoint(d.editCount, iOut+currentOffset) == finish { - d.finishIndex = iOut + currentOffset - return - } - } -} - -func (d *quadraticSpaceMyersDiff) Done() bool { - return d.finishIndex != -1 -} - -func (d *quadraticSpaceMyersDiff) GetEdits() (Edits, error) { - if !d.Done() { - panic("GetEdits called but Done() = false") - } - - length := d.editCount + 1 - edits := make(Edits, length) - index := d.finishIndex - endpoint := d.getEditPoint(d.editCount, d.finishIndex) - - for i := d.editCount; i > 0; i-- { - insert := d.insert[index] - edits[i].Insert = insert - insertionsMinusDeletions := (endpoint.base - d.baseBegin) - (endpoint.target - d.targetBegin) - if insert { - insertionsMinusDeletions++ - } else { - insertionsMinusDeletions-- - } - index = (i-1-insertionsMinusDeletions)/2 + storageOffset(i-1) - - // endpoint of previous edit - previous := d.getEditPoint(i-1, index) - in := 0 - if insert { - in = 1 - } - edits[i].RunLength = int64(endpoint.base - previous.base - (1 - in)) - endpoint = previous - } - edits[0].Insert = false - edits[0].RunLength = int64(endpoint.base - d.baseBegin) - - return edits, nil -} - -func (d *quadraticSpaceMyersDiff) Diff() (edits Edits, err error) { - for !d.Done() { - d.Next() - } - return d.GetEdits() -} diff --git a/go/arrow/array/diff_test.go b/go/arrow/array/diff_test.go deleted file mode 100644 index 9c9ce6a53aed0..0000000000000 --- a/go/arrow/array/diff_test.go +++ /dev/null @@ -1,878 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "math/rand" - "reflect" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/extensions" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type diffTestCase struct { - dataType arrow.DataType - - baseJSON string - targetJSON string - wantInsert []bool - wantRunLength []int64 -} - -func (s *diffTestCase) check(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - base, _, err := array.FromJSON(mem, s.dataType, strings.NewReader(s.baseJSON)) - if err != nil { - t.Fatal(err) - } - defer base.Release() - - target, _, err := array.FromJSON(mem, s.dataType, strings.NewReader(s.targetJSON)) - if err != nil { - t.Fatal(err) - } - defer target.Release() - - edits, err := array.Diff(base, target) - if err != nil { - t.Fatalf("got unexpected error %v", err) - } - - gotInserts := make([]bool, len(edits)) - gotRunLengths := make([]int64, len(edits)) - for i, edit := range edits { - gotInserts[i] = edit.Insert - gotRunLengths[i] = edit.RunLength - } - if !reflect.DeepEqual(gotInserts, s.wantInsert) { - t.Errorf("Diff(\n base=%v, \ntarget=%v\n) got insert %v, want %v", base, target, gotInserts, s.wantInsert) - } - if !reflect.DeepEqual(gotRunLengths, s.wantRunLength) { - t.Errorf("Diff(\n base=%v, \ntarget=%v\n) got run length %v, want %v", base, target, gotRunLengths, s.wantRunLength) - } -} - -func TestDiff_Trivial(t *testing.T) { - cases := []struct { - name string - base string - target string - wantInsert []bool - wantRunLength []int64 - }{ - { - name: "empty", - base: `[]`, - target: `[]`, - wantInsert: []bool{false}, - wantRunLength: []int64{0}, - }, - { - name: "nulls", - base: `[null, null]`, - target: `[null, null, null, null]`, - wantInsert: []bool{false, true, true}, - wantRunLength: []int64{2, 0, 0}, - }, - { - name: "equal", - base: `[1, 2, 3]`, - target: `[1, 2, 3]`, - wantInsert: []bool{false}, - wantRunLength: []int64{3}, - }, - } - for _, tc := range cases { - d := diffTestCase{ - dataType: arrow.PrimitiveTypes.Int32, - baseJSON: tc.base, - targetJSON: tc.target, - wantInsert: tc.wantInsert, - wantRunLength: tc.wantRunLength, - } - t.Run(tc.name, d.check) - } -} - -func TestDiff_Basics(t *testing.T) { - cases := []struct { - name string - base string - target string - wantInsert []bool - wantRunLength []int64 - }{ - { - name: "insert one", - base: `[1, 2, null, 5]`, - target: `[1, 2, 3, null, 5]`, - wantInsert: []bool{false, true}, - wantRunLength: []int64{2, 2}, - }, - { - name: "delete one", - base: `[1, 2, 3, null, 5]`, - target: `[1, 2, null, 5]`, - wantInsert: []bool{false, false}, - wantRunLength: []int64{2, 2}, - }, - { - name: "change one", - base: `[1, 2, 3, null, 5]`, - target: `[1, 2, 23, null, 5]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{2, 0, 2}, - }, - { - name: "null out one", - base: `[1, 2, 3, null, 5]`, - target: `[1, 2, null, null, 5]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{2, 1, 1}, - }, - { - name: "append some", - base: `[1, 2, 3, null, 5]`, - target: `[1, 2, 3, null, 5, 6, 7, 8, 9]`, - wantInsert: []bool{false, true, true, true, true}, - wantRunLength: []int64{5, 0, 0, 0, 0}, - }, - { - name: "prepend some", - base: `[1, 2, 3, null, 5]`, - target: `[6, 4, 2, 0, 1, 2, 3, null, 5]`, - wantInsert: []bool{false, true, true, true, true}, - wantRunLength: []int64{0, 0, 0, 0, 5}, - }, - } - for _, tc := range cases { - d := diffTestCase{ - dataType: arrow.PrimitiveTypes.Int32, - baseJSON: tc.base, - targetJSON: tc.target, - wantInsert: tc.wantInsert, - wantRunLength: tc.wantRunLength, - } - t.Run(tc.name, d.check) - } -} - -func TestDiff_BasicsWithBooleans(t *testing.T) { - cases := []struct { - name string - base string - target string - wantInsert []bool - wantRunLength []int64 - }{ - { - name: "insert one", - base: `[true, true, true]`, - target: `[true, false, true, true]`, - wantInsert: []bool{false, true}, - wantRunLength: []int64{1, 2}, - }, - { - name: "delete one", - base: `[true, false, true, true]`, - target: `[true, true, true]`, - wantInsert: []bool{false, false}, - wantRunLength: []int64{1, 2}, - }, - { - name: "change one", - base: `[false, false, true]`, - target: `[true, false, true]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{0, 0, 2}, - }, - { - name: "null out one", - base: `[true, false, true]`, - target: `[true, false, null]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{2, 0, 0}, - }, - } - for _, tc := range cases { - d := diffTestCase{ - dataType: &arrow.BooleanType{}, - baseJSON: tc.base, - targetJSON: tc.target, - wantInsert: tc.wantInsert, - wantRunLength: tc.wantRunLength, - } - t.Run(tc.name, d.check) - } -} - -func TestDiff_BasicsWithStrings(t *testing.T) { - cases := []struct { - name string - base string - target string - wantInsert []bool - wantRunLength []int64 - }{ - { - name: "insert one", - base: `["give", "a", "break"]`, - target: `["give", "me", "a", "break"]`, - wantInsert: []bool{false, true}, - wantRunLength: []int64{1, 2}, - }, - { - name: "delete one", - base: `["give", "me", "a", "break"]`, - target: `["give", "a", "break"]`, - wantInsert: []bool{false, false}, - wantRunLength: []int64{1, 2}, - }, - { - name: "change one", - base: `["give", "a", "break"]`, - target: `["gimme", "a", "break"]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{0, 0, 2}, - }, - { - name: "null out one", - base: `["give", "a", "break"]`, - target: `["give", "a", null]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{2, 0, 0}, - }, - } - for _, tc := range cases { - d := diffTestCase{ - dataType: &arrow.StringType{}, - baseJSON: tc.base, - targetJSON: tc.target, - wantInsert: tc.wantInsert, - wantRunLength: tc.wantRunLength, - } - t.Run(tc.name, d.check) - } -} - -func TestDiff_BasicsWithLists(t *testing.T) { - cases := []struct { - name string - base string - target string - wantInsert []bool - wantRunLength []int64 - }{ - { - name: "insert one", - base: `[[2, 3, 1], [], [13]]`, - target: `[[2, 3, 1], [5, 9], [], [13]]`, - wantInsert: []bool{false, true}, - wantRunLength: []int64{1, 2}, - }, - { - name: "delete one", - base: `[[2, 3, 1], [5, 9], [], [13]]`, - target: `[[2, 3, 1], [], [13]]`, - wantInsert: []bool{false, false}, - wantRunLength: []int64{1, 2}, - }, - { - name: "change one", - base: `[[2, 3, 1], [], [13]]`, - target: `[[3, 3, 3], [], [13]]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{0, 0, 2}, - }, - { - name: "null out one", - base: `[[2, 3, 1], [], [13]]`, - target: `[[2, 3, 1], [], null]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{2, 0, 0}, - }, - } - for _, tc := range cases { - d := diffTestCase{ - dataType: arrow.ListOf(arrow.PrimitiveTypes.Int32), - baseJSON: tc.base, - targetJSON: tc.target, - wantInsert: tc.wantInsert, - wantRunLength: tc.wantRunLength, - } - t.Run(tc.name, d.check) - } -} - -func TestDiff_BasicsWithStructs(t *testing.T) { - cases := []struct { - name string - base string - target string - wantInsert []bool - wantRunLength []int64 - }{ - { - name: "insert one", - base: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, - target: `[{"foo": "!", "bar": 3}, {"foo": "?"}, {}, {"bar": 13}]`, - wantInsert: []bool{false, true}, - wantRunLength: []int64{1, 2}, - }, - { - name: "delete one", - base: `[{"foo": "!", "bar": 3}, {"foo": "?"}, {}, {"bar": 13}]`, - target: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, - wantInsert: []bool{false, false}, - wantRunLength: []int64{1, 2}, - }, - { - name: "change one", - base: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, - target: `[{"foo": "!", "bar": 2}, {}, {"bar": 13}]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{0, 0, 2}, - }, - { - name: "null out one", - base: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, - target: `[{"foo": "!", "bar": 3}, {}, null]`, - wantInsert: []bool{false, false, true}, - wantRunLength: []int64{2, 0, 0}, - }, - } - for _, tc := range cases { - f1 := arrow.Field{Name: "foo", Type: arrow.BinaryTypes.String, Nullable: true} - f2 := arrow.Field{Name: "bar", Type: arrow.PrimitiveTypes.Int32, Nullable: true} - d := diffTestCase{ - dataType: arrow.StructOf(f1, f2), - baseJSON: tc.base, - targetJSON: tc.target, - wantInsert: tc.wantInsert, - wantRunLength: tc.wantRunLength, - } - t.Run(tc.name, d.check) - } -} - -func TestDiff_Random(t *testing.T) { - rng := rand.New(rand.NewSource(0xdeadbeef)) - for i := 0; i < 100; i++ { - t.Run(fmt.Sprintf("case-%d", i), func(t *testing.T) { - testRandomCase(t, rng) - }) - } -} - -func testRandomCase(t *testing.T, rng *rand.Rand) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dataType := arrow.PrimitiveTypes.Int32 - - baseValues := make([]int32, rng.Intn(10)) - for i := range baseValues { - baseValues[i] = rng.Int31() - } - baseJSON, err := json.Marshal(baseValues) - if err != nil { - t.Fatal(err) - } - - targetValues := make([]int32, rng.Intn(10)) - for i := range targetValues { - // create runs with some probability - if rng.Intn(2) == 0 && len(baseValues) > 0 { - targetValues[i] = baseValues[rng.Intn(len(baseValues))] - } else { - targetValues[i] = rng.Int31() - } - } - targetJSON, err := json.Marshal(targetValues) - if err != nil { - t.Fatal(err) - } - - base, _, err := array.FromJSON(mem, dataType, strings.NewReader(string(baseJSON))) - if err != nil { - t.Fatal(err) - } - defer base.Release() - - target, _, err := array.FromJSON(mem, dataType, strings.NewReader(string(targetJSON))) - if err != nil { - t.Fatal(err) - } - defer target.Release() - - edits, err := array.Diff(base, target) - if err != nil { - t.Fatalf("got unexpected error %v", err) - } - - validateEditScript(t, edits, base, target) -} - -// validateEditScript checks that the edit script produces target when applied to base. -func validateEditScript(t *testing.T, edits array.Edits, base, target arrow.Array) { - if len(edits) == 0 { - t.Fatalf("edit script has run length of zero") - } - - baseIndex := int64(0) - targetIndex := int64(0) - for i := 0; i < len(edits); i++ { - if i > 0 { - if edits[i].Insert { - targetIndex++ - } else { - baseIndex++ - } - } - for j := int64(0); j < edits[i].RunLength; j++ { - if !array.SliceEqual(base, baseIndex, baseIndex+1, target, targetIndex, targetIndex+1) { - t.Fatalf("edit script (%v) when applied to base %v does not produce target %v", edits, base, target) - } - baseIndex += 1 - targetIndex += 1 - } - } - if baseIndex != int64(base.Len()) || targetIndex != int64(target.Len()) { - t.Fatalf("edit script (%v) when applied to base %v does not produce target %v", edits, base, target) - } -} - -type diffStringTestCase struct { - dataType arrow.DataType - - name string - baseJSON string - targetJSON string - want string -} - -func (s *diffStringTestCase) check(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - base, _, err := array.FromJSON(mem, s.dataType, strings.NewReader(s.baseJSON)) - if err != nil { - t.Fatal(err) - } - defer base.Release() - - target, _, err := array.FromJSON(mem, s.dataType, strings.NewReader(s.targetJSON)) - if err != nil { - t.Fatal(err) - } - defer target.Release() - - edits, err := array.Diff(base, target) - if err != nil { - t.Fatalf("got unexpected error %v", err) - } - got := edits.UnifiedDiff(base, target) - if got != s.want { - t.Errorf("got:\n%v\n, want:\n%v", got, s.want) - } -} - -func TestEdits_UnifiedDiff(t *testing.T) { - msPerDay := 24 * 60 * 60 * 1000 - cases := []diffStringTestCase{ - { - name: "no changes", - dataType: arrow.BinaryTypes.String, - baseJSON: `["give", "me", "a", "break"]`, - targetJSON: `["give", "me", "a", "break"]`, - want: ``, - }, - { - name: "insert one", - dataType: arrow.BinaryTypes.String, - baseJSON: `["give", "a", "break"]`, - targetJSON: `["give", "me", "a", "break"]`, - want: `@@ -1, +1 @@ -+"me" -`, - }, - { - name: "delete one", - dataType: arrow.BinaryTypes.String, - baseJSON: `["give", "me", "a", "break"]`, - targetJSON: `["give", "a", "break"]`, - want: `@@ -1, +1 @@ --"me" -`, - }, - { - name: "change one", - dataType: arrow.BinaryTypes.String, - baseJSON: `["give", "a", "break"]`, - targetJSON: `["gimme", "a", "break"]`, - want: `@@ -0, +0 @@ --"give" -+"gimme" -`, - }, - { - name: "null out one", - dataType: arrow.BinaryTypes.String, - baseJSON: `["give", "a", "break"]`, - targetJSON: `["give", "a", null]`, - want: `@@ -2, +2 @@ --"break" -+null -`, - }, - { - name: "strings with escaped chars", - dataType: arrow.BinaryTypes.String, - baseJSON: `["newline:\\n", "quote:'", "backslash:\\\\"]`, - targetJSON: `["newline:\\n", "tab:\\t", "quote:\\\"", "backslash:\\\\"]`, - want: `@@ -1, +1 @@ --"quote:'" -+"tab:\\t" -+"quote:\\\"" -`, - }, - { - name: "date32", - dataType: arrow.PrimitiveTypes.Date32, - baseJSON: `[0, 1, 2, 31, 4]`, - targetJSON: `[0, 1, 31, 2, 4]`, - want: `@@ -2, +2 @@ --1970-01-03 -@@ -4, +3 @@ -+1970-01-03 -`, - }, - { - name: "date64", - dataType: arrow.PrimitiveTypes.Date64, - baseJSON: fmt.Sprintf(`[%d, %d, %d, %d, %d]`, 0*msPerDay, 1*msPerDay, 2*msPerDay, 31*msPerDay, 4*msPerDay), - targetJSON: fmt.Sprintf(`[%d, %d, %d, %d, %d]`, 0*msPerDay, 1*msPerDay, 31*msPerDay, 2*msPerDay, 4*msPerDay), - want: `@@ -2, +2 @@ --1970-01-03 -@@ -4, +3 @@ -+1970-01-03 -`, - }, - { - name: "timestamp_s", - dataType: arrow.FixedWidthTypes.Timestamp_s, - baseJSON: fmt.Sprintf(`[0, 1, %d, 2, 4]`, 678+(5+60*(4+60*(3+24*int64(1))))), - targetJSON: fmt.Sprintf(`[0, 1, 2, %d, 4]`, 678+(5+60*(4+60*(3+24*int64(1))))), - want: `@@ -2, +2 @@ --1970-01-02 03:15:23 +0000 UTC -@@ -4, +3 @@ -+1970-01-02 03:15:23 +0000 UTC -`, - }, - { - name: "timestamp_ms", - dataType: arrow.FixedWidthTypes.Timestamp_ms, - baseJSON: fmt.Sprintf(`[0, 1, %d, 2, 4]`, 678+1000*(5+60*(4+60*(3+24*int64(1))))), - targetJSON: fmt.Sprintf(`[0, 1, 2, %d, 4]`, 678+1000*(5+60*(4+60*(3+24*int64(1))))), - want: `@@ -2, +2 @@ --1970-01-02 03:04:05.678 +0000 UTC -@@ -4, +3 @@ -+1970-01-02 03:04:05.678 +0000 UTC -`, - }, - { - name: "timestamp_us", - dataType: arrow.FixedWidthTypes.Timestamp_us, - baseJSON: fmt.Sprintf(`[0, 1, %d, 2, 4]`, 678+1000000*(5+60*(4+60*(3+24*int64(1))))), - targetJSON: fmt.Sprintf(`[0, 1, 2, %d, 4]`, 678+1000000*(5+60*(4+60*(3+24*int64(1))))), - want: `@@ -2, +2 @@ --1970-01-02 03:04:05.000678 +0000 UTC -@@ -4, +3 @@ -+1970-01-02 03:04:05.000678 +0000 UTC -`, - }, - { - name: "timestamp_ns", - dataType: arrow.FixedWidthTypes.Timestamp_ns, - baseJSON: fmt.Sprintf(`[0, 1, %d, 2, 4]`, 678+1000000000*(5+60*(4+60*(3+24*int64(1))))), - targetJSON: fmt.Sprintf(`[0, 1, 2, %d, 4]`, 678+1000000000*(5+60*(4+60*(3+24*int64(1))))), - want: `@@ -2, +2 @@ --1970-01-02 03:04:05.000000678 +0000 UTC -@@ -4, +3 @@ -+1970-01-02 03:04:05.000000678 +0000 UTC -`, - }, - { - name: "lists", - dataType: arrow.ListOf(arrow.PrimitiveTypes.Int32), - baseJSON: `[[2, 3, 1], [], [13], []]`, - targetJSON: `[[2, 3, 1], [5, 9], [], [13]]`, - want: `@@ -1, +1 @@ -+[5,9] -@@ -3, +4 @@ --[] -`, - }, - { - name: "maps", - dataType: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32), - baseJSON: `[ - [{"key": "foo", "value": 2}, {"key": "bar", "value": 3}, {"key": "baz", "value": 1}], - [{"key": "quux", "value": 13}], - [] - ]`, - targetJSON: `[ - [{"key": "foo", "value": 2}, {"key": "bar", "value": 3}, {"key": "baz", "value": 1}], - [{"key": "ytho", "value": 11}], - [{"key": "quux", "value": 13}], - [] - ]`, - want: `@@ -1, +1 @@ -+[{"key":"ytho","value":11}] -`, - }, - { - name: "structs", - dataType: arrow.StructOf( - []arrow.Field{ - {Name: "foo", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "bar", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - }..., - ), - baseJSON: `[{"foo": "!", "bar": 3}, {}, {"bar": 13}]`, - targetJSON: `[{"foo": null, "bar": 2}, {}, {"bar": 13}]`, - want: `@@ -0, +0 @@ --{"bar":3,"foo":"!"} -+{"bar":2,"foo":null} -`, - }, - { - name: "unions", - dataType: arrow.UnionOf(arrow.SparseMode, - []arrow.Field{ - {Name: "foo", Type: arrow.BinaryTypes.String}, - {Name: "bar", Type: arrow.PrimitiveTypes.Int32}, - }, - []arrow.UnionTypeCode{2, 5}, - ), - baseJSON: `[[2, "!"], [5, 3], [5, 13]]`, - targetJSON: `[[2, "!"], [2, "3"], [5, 13]]`, - want: `@@ -1, +1 @@ --[5,3] -+[2,"3"] -`, - }, - { - name: "string", - dataType: arrow.BinaryTypes.String, - baseJSON: `["h", "l", "l", "o", "o"]`, - targetJSON: `["h", "e", "l", "l", "o", "0"]`, - want: `@@ -1, +1 @@ -+"e" -@@ -4, +5 @@ --"o" -+"0" -`, - }, - { - name: "int8", - dataType: arrow.PrimitiveTypes.Int8, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "int16", - dataType: arrow.PrimitiveTypes.Int16, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "int32", - dataType: arrow.PrimitiveTypes.Int32, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "int64", - dataType: arrow.PrimitiveTypes.Int64, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "uint8", - dataType: arrow.PrimitiveTypes.Uint8, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "uint16", - dataType: arrow.PrimitiveTypes.Uint16, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "uint32", - dataType: arrow.PrimitiveTypes.Uint32, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "uint64", - dataType: arrow.PrimitiveTypes.Uint64, - baseJSON: `[0, 1, 2, 3, 5, 8, 11, 13, 17]`, - targetJSON: `[2, 3, 5, 7, 11, 13, 17, 19]`, - want: `@@ -0, +0 @@ --0 --1 -@@ -5, +3 @@ --8 -+7 -@@ -9, +7 @@ -+19 -`, - }, - { - name: "float32", - dataType: arrow.PrimitiveTypes.Float32, - baseJSON: `[0.1, 0.3, -0.5]`, - targetJSON: `[0.1, -0.5, 0.3]`, - want: `@@ -1, +1 @@ --0.300000 -@@ -3, +2 @@ -+0.300000 -`, - }, - { - name: "float64", - dataType: arrow.PrimitiveTypes.Float64, - baseJSON: `[0.1, 0.3, -0.5]`, - targetJSON: `[0.1, -0.5, 0.3]`, - want: `@@ -1, +1 @@ --0.300000 -@@ -3, +2 @@ -+0.300000 -`, - }, - { - name: "equal nulls", - dataType: arrow.PrimitiveTypes.Int32, - baseJSON: `[null, null]`, - targetJSON: `[null, null]`, - want: ``, - }, - { - name: "nulls", - dataType: arrow.PrimitiveTypes.Int32, - baseJSON: `[1, null, null, null]`, - targetJSON: `[null, 1, null, 2]`, - want: `@@ -0, +0 @@ --1 -@@ -2, +1 @@ --null -+1 -@@ -4, +3 @@ -+2 -`, - }, - { - name: "extensions", - dataType: extensions.NewUUIDType(), - baseJSON: `["00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000001"]`, - targetJSON: `["00000000-0000-0000-0000-000000000001", "00000000-0000-0000-0000-000000000002"]`, - want: `@@ -0, +0 @@ --"00000000-0000-0000-0000-000000000000" -@@ -2, +1 @@ -+"00000000-0000-0000-0000-000000000002" -`, - }, - } - - for _, tc := range cases { - t.Run(tc.name, tc.check) - } -} diff --git a/go/arrow/array/doc.go b/go/arrow/array/doc.go deleted file mode 100644 index 5cf85408626ac..0000000000000 --- a/go/arrow/array/doc.go +++ /dev/null @@ -1,20 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* -Package array provides implementations of various Arrow array types. -*/ -package array diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go deleted file mode 100644 index 748c4c1fec641..0000000000000 --- a/go/arrow/array/encoded.go +++ /dev/null @@ -1,520 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "math" - "reflect" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/encoded" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" - "github.com/apache/arrow/go/v18/internal/utils" -) - -// RunEndEncoded represents an array containing two children: -// an array of int32 values defining the ends of each run of values -// and an array of values -type RunEndEncoded struct { - array - - ends arrow.Array - values arrow.Array -} - -func NewRunEndEncodedArray(runEnds, values arrow.Array, logicalLength, offset int) *RunEndEncoded { - data := NewData(arrow.RunEndEncodedOf(runEnds.DataType(), values.DataType()), logicalLength, - []*memory.Buffer{nil}, []arrow.ArrayData{runEnds.Data(), values.Data()}, 0, offset) - defer data.Release() - return NewRunEndEncodedData(data) -} - -func NewRunEndEncodedData(data arrow.ArrayData) *RunEndEncoded { - r := &RunEndEncoded{} - r.refCount = 1 - r.setData(data.(*Data)) - return r -} - -func (r *RunEndEncoded) Values() arrow.Array { return r.values } -func (r *RunEndEncoded) RunEndsArr() arrow.Array { return r.ends } - -func (r *RunEndEncoded) Retain() { - r.array.Retain() - r.values.Retain() - r.ends.Retain() -} - -func (r *RunEndEncoded) Release() { - r.array.Release() - r.values.Release() - r.ends.Release() -} - -// LogicalValuesArray returns an array holding the values of each -// run, only over the range of run values inside the logical offset/length -// range of the parent array. -// -// # Example -// -// For this array: -// -// RunEndEncoded: { Offset: 150, Length: 1500 } -// RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ] -// Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ] -// -// LogicalValuesArray will return the following array: -// -// [ "f", "g" ] -// -// This is because the offset of 150 tells it to skip the values until -// "f" which corresponds with the logical offset (the run from 10 - 1000), -// and stops after "g" because the length + offset goes to 1650 which is -// within the run from 1000 - 1750, corresponding to the "g" value. -// -// # Note -// -// The return from this needs to be Released. -func (r *RunEndEncoded) LogicalValuesArray() arrow.Array { - physOffset := r.GetPhysicalOffset() - physLength := r.GetPhysicalLength() - data := NewSliceData(r.data.Children()[1], int64(physOffset), int64(physOffset+physLength)) - defer data.Release() - return MakeFromData(data) -} - -// LogicalRunEndsArray returns an array holding the logical indexes -// of each run end, only over the range of run end values relative -// to the logical offset/length range of the parent array. -// -// For arrays with an offset, this is not a slice of the existing -// internal run ends array. Instead a new array is created with run-ends -// that are adjusted so the new array can have an offset of 0. As a result -// this method can be expensive to call for an array with a non-zero offset. -// -// # Example -// -// For this array: -// -// RunEndEncoded: { Offset: 150, Length: 1500 } -// RunEnds: [ 1, 2, 4, 6, 10, 1000, 1750, 2000 ] -// Values: [ "a", "b", "c", "d", "e", "f", "g", "h" ] -// -// LogicalRunEndsArray will return the following array: -// -// [ 850, 1500 ] -// -// This is because the offset of 150 tells us to skip all run-ends less -// than 150 (by finding the physical offset), and we adjust the run-ends -// accordingly (1000 - 150 = 850). The logical length of the array is 1500, -// so we know we don't want to go past the 1750 run end. Thus the last -// run-end is determined by doing: min(1750 - 150, 1500) = 1500. -// -// # Note -// -// The return from this needs to be Released -func (r *RunEndEncoded) LogicalRunEndsArray(mem memory.Allocator) arrow.Array { - physOffset := r.GetPhysicalOffset() - physLength := r.GetPhysicalLength() - - if r.data.offset == 0 { - data := NewSliceData(r.data.childData[0], 0, int64(physLength)) - defer data.Release() - return MakeFromData(data) - } - - bldr := NewBuilder(mem, r.data.childData[0].DataType()) - defer bldr.Release() - bldr.Resize(physLength) - - switch e := r.ends.(type) { - case *Int16: - for _, v := range e.Int16Values()[physOffset : physOffset+physLength] { - v -= int16(r.data.offset) - v = int16(utils.Min(int(v), r.data.length)) - bldr.(*Int16Builder).Append(v) - } - case *Int32: - for _, v := range e.Int32Values()[physOffset : physOffset+physLength] { - v -= int32(r.data.offset) - v = int32(utils.Min(int(v), r.data.length)) - bldr.(*Int32Builder).Append(v) - } - case *Int64: - for _, v := range e.Int64Values()[physOffset : physOffset+physLength] { - v -= int64(r.data.offset) - v = int64(utils.Min(int(v), r.data.length)) - bldr.(*Int64Builder).Append(v) - } - } - - return bldr.NewArray() -} - -func (r *RunEndEncoded) setData(data *Data) { - if len(data.childData) != 2 { - panic(fmt.Errorf("%w: arrow/array: RLE array must have exactly 2 children", arrow.ErrInvalid)) - } - debug.Assert(data.dtype.ID() == arrow.RUN_END_ENCODED, "invalid type for RunLengthEncoded") - if !data.dtype.(*arrow.RunEndEncodedType).ValidRunEndsType(data.childData[0].DataType()) { - panic(fmt.Errorf("%w: arrow/array: run ends array must be int16, int32, or int64", arrow.ErrInvalid)) - } - if data.childData[0].NullN() > 0 { - panic(fmt.Errorf("%w: arrow/array: run ends array cannot contain nulls", arrow.ErrInvalid)) - } - - r.array.setData(data) - - r.ends = MakeFromData(r.data.childData[0]) - r.values = MakeFromData(r.data.childData[1]) -} - -func (r *RunEndEncoded) GetPhysicalOffset() int { - return encoded.FindPhysicalOffset(r.data) -} - -func (r *RunEndEncoded) GetPhysicalLength() int { - return encoded.GetPhysicalLength(r.data) -} - -// GetPhysicalIndex can be used to get the run-encoded value instead of costly LogicalValuesArray -// in the following way: -// -// r.Values().(valuetype).Value(r.GetPhysicalIndex(i)) -func (r *RunEndEncoded) GetPhysicalIndex(i int) int { - return encoded.FindPhysicalIndex(r.data, i+r.data.offset) -} - -// ValueStr will return the str representation of the value at the logical offset i. -func (r *RunEndEncoded) ValueStr(i int) string { - return r.values.ValueStr(r.GetPhysicalIndex(i)) -} - -func (r *RunEndEncoded) String() string { - var buf bytes.Buffer - buf.WriteByte('[') - for i := 0; i < r.ends.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - - value := r.values.GetOneForMarshal(i) - if byts, ok := value.(json.RawMessage); ok { - value = string(byts) - } - fmt.Fprintf(&buf, "{%d -> %v}", r.ends.GetOneForMarshal(i), value) - } - - buf.WriteByte(']') - return buf.String() -} - -func (r *RunEndEncoded) GetOneForMarshal(i int) interface{} { - return r.values.GetOneForMarshal(r.GetPhysicalIndex(i)) -} - -func (r *RunEndEncoded) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - buf.WriteByte('[') - for i := 0; i < r.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(r.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func arrayRunEndEncodedEqual(l, r *RunEndEncoded) bool { - // types were already checked before getting here, so we know - // the encoded types are equal - mr := encoded.NewMergedRuns([2]arrow.Array{l, r}) - for mr.Next() { - lIndex := mr.IndexIntoArray(0) - rIndex := mr.IndexIntoArray(1) - if !SliceEqual(l.values, lIndex, lIndex+1, r.values, rIndex, rIndex+1) { - return false - } - } - return true -} - -func arrayRunEndEncodedApproxEqual(l, r *RunEndEncoded, opt equalOption) bool { - // types were already checked before getting here, so we know - // the encoded types are equal - mr := encoded.NewMergedRuns([2]arrow.Array{l, r}) - for mr.Next() { - lIndex := mr.IndexIntoArray(0) - rIndex := mr.IndexIntoArray(1) - if !sliceApproxEqual(l.values, lIndex, lIndex+1, r.values, rIndex, rIndex+1, opt) { - return false - } - } - return true -} - -type RunEndEncodedBuilder struct { - builder - - dt arrow.DataType - runEnds Builder - values Builder - maxRunEnd uint64 - - // currently, mixing AppendValueFromString & UnmarshalOne is unsupported - lastUnmarshalled interface{} - unmarshalled bool // tracks if Unmarshal was called (in case lastUnmarshalled is nil) - lastStr *string -} - -func NewRunEndEncodedBuilder(mem memory.Allocator, runEnds, encoded arrow.DataType) *RunEndEncodedBuilder { - dt := arrow.RunEndEncodedOf(runEnds, encoded) - if !dt.ValidRunEndsType(runEnds) { - panic("arrow/ree: invalid runEnds type for run length encoded array") - } - - var maxEnd uint64 - switch runEnds.ID() { - case arrow.INT16: - maxEnd = math.MaxInt16 - case arrow.INT32: - maxEnd = math.MaxInt32 - case arrow.INT64: - maxEnd = math.MaxInt64 - } - return &RunEndEncodedBuilder{ - builder: builder{refCount: 1, mem: mem}, - dt: dt, - runEnds: NewBuilder(mem, runEnds), - values: NewBuilder(mem, encoded), - maxRunEnd: maxEnd, - lastUnmarshalled: nil, - } -} - -func (b *RunEndEncodedBuilder) Type() arrow.DataType { - return b.dt -} - -func (b *RunEndEncodedBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - b.values.Release() - b.runEnds.Release() - } -} - -func (b *RunEndEncodedBuilder) addLength(n uint64) { - if uint64(b.length)+n > b.maxRunEnd { - panic(fmt.Errorf("%w: %s array length must fit be less than %d", arrow.ErrInvalid, b.dt, b.maxRunEnd)) - } - - b.length += int(n) -} - -func (b *RunEndEncodedBuilder) finishRun() { - b.lastUnmarshalled = nil - b.lastStr = nil - b.unmarshalled = false - if b.length == 0 { - return - } - - switch bldr := b.runEnds.(type) { - case *Int16Builder: - bldr.Append(int16(b.length)) - case *Int32Builder: - bldr.Append(int32(b.length)) - case *Int64Builder: - bldr.Append(int64(b.length)) - } -} - -func (b *RunEndEncodedBuilder) ValueBuilder() Builder { return b.values } - -func (b *RunEndEncodedBuilder) Append(n uint64) { - b.finishRun() - b.addLength(n) -} - -func (b *RunEndEncodedBuilder) AppendRuns(runs []uint64) { - for _, r := range runs { - b.finishRun() - b.addLength(r) - } -} - -func (b *RunEndEncodedBuilder) ContinueRun(n uint64) { - b.addLength(n) -} - -func (b *RunEndEncodedBuilder) AppendNull() { - b.finishRun() - b.values.AppendNull() - b.addLength(1) -} - -func (b *RunEndEncodedBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *RunEndEncodedBuilder) NullN() int { - return UnknownNullCount -} - -func (b *RunEndEncodedBuilder) AppendEmptyValue() { - b.AppendNull() -} - -func (b *RunEndEncodedBuilder) AppendEmptyValues(n int) { - b.AppendNulls(n) -} - -func (b *RunEndEncodedBuilder) Reserve(n int) { - b.values.Reserve(n) - b.runEnds.Reserve(n) -} - -func (b *RunEndEncodedBuilder) Resize(n int) { - b.values.Resize(n) - b.runEnds.Resize(n) -} - -func (b *RunEndEncodedBuilder) NewRunEndEncodedArray() *RunEndEncoded { - data := b.newData() - defer data.Release() - return NewRunEndEncodedData(data) -} - -func (b *RunEndEncodedBuilder) NewArray() arrow.Array { - return b.NewRunEndEncodedArray() -} - -func (b *RunEndEncodedBuilder) newData() (data *Data) { - b.finishRun() - values := b.values.NewArray() - defer values.Release() - runEnds := b.runEnds.NewArray() - defer runEnds.Release() - - data = NewData( - b.dt, b.length, []*memory.Buffer{}, - []arrow.ArrayData{runEnds.Data(), values.Data()}, 0, 0) - b.reset() - return -} - -// AppendValueFromString can't be used in conjunction with UnmarshalOne -func (b *RunEndEncodedBuilder) AppendValueFromString(s string) error { - // we don't support mixing AppendValueFromString & UnmarshalOne - if b.unmarshalled { - return fmt.Errorf("%w: mixing AppendValueFromString & UnmarshalOne not yet implemented", arrow.ErrNotImplemented) - } - - if s == NullValueStr { - b.AppendNull() - return nil - } - - if b.lastStr != nil && s == *b.lastStr { - b.ContinueRun(1) - return nil - } - - b.Append(1) - lastStr := s - b.lastStr = &lastStr - return b.ValueBuilder().AppendValueFromString(s) -} - -// UnmarshalOne can't be used in conjunction with AppendValueFromString -func (b *RunEndEncodedBuilder) UnmarshalOne(dec *json.Decoder) error { - // we don't support mixing AppendValueFromString & UnmarshalOne - if b.lastStr != nil { - return fmt.Errorf("%w: mixing AppendValueFromString & UnmarshalOne not yet implemented", arrow.ErrNotImplemented) - } - - var value interface{} - if err := dec.Decode(&value); err != nil { - return err - } - - // if we unmarshalled the same value as the previous one, we want to - // continue the run. However, there's an edge case. At the start of - // unmarshalling, lastUnmarshalled will be nil, but we might get - // nil as the first value we unmarshal. In that case we want to - // make sure we add a new run instead. We can detect that case by - // checking that the number of runEnds matches the number of values - // we have, which means no matter what we have to start a new run - if reflect.DeepEqual(value, b.lastUnmarshalled) && (value != nil || b.runEnds.Len() != b.values.Len()) { - b.ContinueRun(1) - return nil - } - - data, err := json.Marshal(value) - if err != nil { - return err - } - - b.Append(1) - b.lastUnmarshalled = value - b.unmarshalled = true - return b.ValueBuilder().UnmarshalOne(json.NewDecoder(bytes.NewReader(data))) -} - -// Unmarshal can't be used in conjunction with AppendValueFromString (as it calls UnmarshalOne) -func (b *RunEndEncodedBuilder) Unmarshal(dec *json.Decoder) error { - b.finishRun() - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON can't be used in conjunction with AppendValueFromString (as it calls UnmarshalOne) -func (b *RunEndEncodedBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("list builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*RunEndEncoded)(nil) - _ Builder = (*RunEndEncodedBuilder)(nil) -) diff --git a/go/arrow/array/encoded_test.go b/go/arrow/array/encoded_test.go deleted file mode 100644 index 03352ec44177c..0000000000000 --- a/go/arrow/array/encoded_test.go +++ /dev/null @@ -1,459 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -var ( - stringValues, _, _ = array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World", null]`)) - int32Values, _, _ = array.FromJSON(memory.DefaultAllocator, arrow.PrimitiveTypes.Int32, strings.NewReader(`[10, 20, 30]`)) - int32OnlyNull = array.MakeArrayOfNull(memory.DefaultAllocator, arrow.PrimitiveTypes.Int32, 3) -) - -func TestMakeRLEArray(t *testing.T) { - rleArr := array.NewRunEndEncodedArray(int32Values, stringValues, 3, 0) - defer rleArr.Release() - - arrData := rleArr.Data() - newArr := array.MakeFromData(arrData) - defer newArr.Release() - - assert.Same(t, newArr.Data(), arrData) - assert.IsType(t, (*array.RunEndEncoded)(nil), newArr) -} - -func TestRLEFromRunEndsAndValues(t *testing.T) { - rleArray := array.NewRunEndEncodedArray(int32Values, int32Values, 3, 0) - defer rleArray.Release() - - assert.EqualValues(t, 3, rleArray.Len()) - assert.Truef(t, array.Equal(int32Values, rleArray.Values()), "expected: %s\ngot: %s", int32Values, rleArray.Values()) - assert.Truef(t, array.Equal(int32Values, rleArray.RunEndsArr()), "expected: %s\ngot: %s", int32Values, rleArray.RunEndsArr()) - assert.Zero(t, rleArray.Offset()) - assert.Zero(t, rleArray.Data().NullN()) - // one dummy buffer, since code may assume there's at least one nil buffer - assert.Len(t, rleArray.Data().Buffers(), 1) - - // explicit offset - rleArray = array.NewRunEndEncodedArray(int32Values, stringValues, 2, 1) - defer rleArray.Release() - - assert.EqualValues(t, 2, rleArray.Len()) - assert.Truef(t, array.Equal(stringValues, rleArray.Values()), "expected: %s\ngot: %s", stringValues, rleArray.Values()) - assert.Truef(t, array.Equal(int32Values, rleArray.RunEndsArr()), "expected: %s\ngot: %s", int32Values, rleArray.RunEndsArr()) - assert.EqualValues(t, 1, rleArray.Offset()) - assert.Zero(t, rleArray.Data().NullN()) - - assert.PanicsWithError(t, "invalid: arrow/array: run ends array must be int16, int32, or int64", func() { - array.NewRunEndEncodedArray(stringValues, int32Values, 3, 0) - }) - assert.PanicsWithError(t, "invalid: arrow/array: run ends array cannot contain nulls", func() { - array.NewRunEndEncodedArray(int32OnlyNull, int32Values, 3, 0) - }) -} - -func TestRunLengthEncodedOffsetLength(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - runEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[100, 200, 300, 400, 500]`)) - defer runEnds.Release() - - values, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "beautiful", "world", "of", "RLE"]`)) - defer values.Release() - - rleArray := array.NewRunEndEncodedArray(runEnds, values, 500, 0) - defer rleArray.Release() - - assert.EqualValues(t, 5, rleArray.GetPhysicalLength()) - assert.EqualValues(t, 0, rleArray.GetPhysicalOffset()) - - slice := array.NewSlice(rleArray, 199, 204).(*array.RunEndEncoded) - defer slice.Release() - - assert.EqualValues(t, 2, slice.GetPhysicalLength()) - assert.EqualValues(t, 1, slice.GetPhysicalOffset()) - - slice2 := array.NewSlice(rleArray, 199, 300).(*array.RunEndEncoded) - defer slice2.Release() - - assert.EqualValues(t, 2, slice2.GetPhysicalLength()) - assert.EqualValues(t, 1, slice2.GetPhysicalOffset()) - - slice3 := array.NewSlice(rleArray, 400, 500).(*array.RunEndEncoded) - defer slice3.Release() - - assert.EqualValues(t, 1, slice3.GetPhysicalLength()) - assert.EqualValues(t, 4, slice3.GetPhysicalOffset()) - - slice4 := array.NewSlice(rleArray, 0, 150).(*array.RunEndEncoded) - defer slice4.Release() - - assert.EqualValues(t, 2, slice4.GetPhysicalLength()) - assert.EqualValues(t, 0, slice4.GetPhysicalOffset()) - - zeroLengthAtEnd := array.NewSlice(rleArray, 500, 500).(*array.RunEndEncoded) - defer zeroLengthAtEnd.Release() - - assert.EqualValues(t, 0, zeroLengthAtEnd.GetPhysicalLength()) - assert.EqualValues(t, 5, zeroLengthAtEnd.GetPhysicalOffset()) -} - -func TestRLECompare(t *testing.T) { - rleArray := array.NewRunEndEncodedArray(int32Values, stringValues, 30, 0) - // second that is a copy of the first - standardEquals := array.MakeFromData(rleArray.Data().(*array.Data).Copy()) - - defer rleArray.Release() - defer standardEquals.Release() - - assert.Truef(t, array.Equal(rleArray, standardEquals), "left: %s\nright: %s", rleArray, standardEquals) - assert.False(t, array.Equal(array.NewSlice(rleArray, 0, 29), array.NewSlice(rleArray, 1, 30))) - - // array that is logically the same as our rleArray, but has 2 small - // runs for the first value instead of one large run - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - t.Run("logical duplicate", func(t *testing.T) { - dupRunEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[5, 10, 20, 30]`)) - defer dupRunEnds.Release() - strValues, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, - strings.NewReader(`["Hello", "Hello", "World", null]`)) - defer strValues.Release() - - dupArr := array.NewRunEndEncodedArray(dupRunEnds, strValues, 30, 0) - defer dupArr.Release() - - assert.Truef(t, array.Equal(rleArray, dupArr), "expected: %sgot: %s", rleArray, dupArr) - }) - - t.Run("emptyArr", func(t *testing.T) { - emptyRuns, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[]`)) - emptyVals, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[]`)) - defer emptyRuns.Release() - defer emptyVals.Release() - - emptyArr := array.NewRunEndEncodedArray(emptyRuns, emptyVals, 0, 0) - defer emptyArr.Release() - - dataCopy := emptyArr.Data().(*array.Data).Copy() - defer dataCopy.Release() - emptyArr2 := array.MakeFromData(dataCopy) - defer emptyArr2.Release() - - assert.Truef(t, array.Equal(emptyArr, emptyArr2), "expected: %sgot: %s", emptyArr, emptyArr2) - }) - - t.Run("different offsets", func(t *testing.T) { - // three different slices that have the value [3, 3, 3, 4, 4, 4, 4] - offsetsa, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, - strings.NewReader(`[2, 5, 12, 58, 60]`)) - offsetsb, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, - strings.NewReader(`[81, 86, 99, 100]`)) - offsetsc, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, - strings.NewReader(`[3, 7]`)) - valsa, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, - strings.NewReader(`[1, 2, 3, 4, 5]`)) - valsb, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, - strings.NewReader(`[2, 3, 4, 5]`)) - valsc, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, - strings.NewReader(`[3, 4]`)) - defer func() { - offsetsa.Release() - offsetsb.Release() - offsetsc.Release() - valsa.Release() - valsb.Release() - valsc.Release() - }() - - differentOffsetsA := array.NewRunEndEncodedArray(offsetsa, valsa, 60, 0) - defer differentOffsetsA.Release() - differentOffsetsB := array.NewRunEndEncodedArray(offsetsb, valsb, 100, 0) - defer differentOffsetsB.Release() - differentOffsetsC := array.NewRunEndEncodedArray(offsetsc, valsc, 7, 0) - defer differentOffsetsC.Release() - - sliceA := array.NewSlice(differentOffsetsA, 9, 16) - defer sliceA.Release() - sliceB := array.NewSlice(differentOffsetsB, 83, 90) - defer sliceB.Release() - - assert.True(t, array.Equal(sliceA, sliceB)) - assert.True(t, array.Equal(sliceA, differentOffsetsC)) - assert.True(t, array.Equal(sliceB, differentOffsetsC)) - }) -} - -func TestRunEndEncodedBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - bldr := array.NewBuilder(mem, arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String)) - defer bldr.Release() - - assert.IsType(t, (*array.RunEndEncodedBuilder)(nil), bldr) - reeBldr := bldr.(*array.RunEndEncodedBuilder) - - valBldr := reeBldr.ValueBuilder().(*array.StringBuilder) - - reeBldr.Append(100) - valBldr.Append("Hello") - reeBldr.Append(100) - valBldr.Append("beautiful") - reeBldr.Append(50) - valBldr.Append("world") - reeBldr.ContinueRun(50) - reeBldr.Append(100) - valBldr.Append("of") - reeBldr.Append(100) - valBldr.Append("RLE") - reeBldr.AppendNull() - - rleArray := reeBldr.NewRunEndEncodedArray() - defer rleArray.Release() - - assert.EqualValues(t, 501, rleArray.Len()) - assert.EqualValues(t, 6, rleArray.GetPhysicalLength()) - assert.Equal(t, arrow.INT16, rleArray.RunEndsArr().DataType().ID()) - assert.Equal(t, []int16{100, 200, 300, 400, 500, 501}, rleArray.RunEndsArr().(*array.Int16).Int16Values()) - - strValues := rleArray.Values().(*array.String) - assert.Equal(t, "Hello", strValues.Value(0)) - assert.Equal(t, "beautiful", strValues.Value(1)) - assert.Equal(t, "world", strValues.Value(2)) - assert.Equal(t, "of", strValues.Value(3)) - assert.Equal(t, "RLE", strValues.Value(4)) - assert.True(t, strValues.IsNull(5)) - assert.Equal(t, "Hello", strValues.ValueStr(0)) -} - -func TestRunEndEncodedStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - b := array.NewRunEndEncodedBuilder(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String) - defer b.Release() - - valBldr := b.ValueBuilder().(*array.StringBuilder) - - b.Append(100) - valBldr.Append("Hello") - b.Append(100) - valBldr.Append("beautiful") - b.Append(50) - valBldr.Append("world") - b.ContinueRun(50) - b.Append(100) - valBldr.Append("of") - b.Append(100) - valBldr.Append("RLE") - b.AppendNull() - - arr := b.NewArray().(*array.RunEndEncoded) - defer arr.Release() - logical := arr.LogicalValuesArray() - defer logical.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewRunEndEncodedBuilder(mem, arrow.PrimitiveTypes.Int16, arrow.BinaryTypes.String) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.RunEndEncoded) - defer arr1.Release() - logical1 := arr1.LogicalValuesArray() - defer logical1.Release() - - assert.True(t, array.Equal(arr, arr1)) - assert.True(t, array.Equal(logical, logical1)) -} - -func TestREEBuilderOverflow(t *testing.T) { - for _, typ := range []arrow.DataType{arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64} { - t.Run("run_ends="+typ.String(), func(t *testing.T) { - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - bldr := array.NewRunEndEncodedBuilder(mem, typ, arrow.BinaryTypes.String) - defer bldr.Release() - - valBldr := bldr.ValueBuilder().(*array.StringBuilder) - assert.Panics(t, func() { - valBldr.Append("Foo") - - maxVal := uint64(1< 0 { - o.WriteString(" ") - } - if !a.IsValid(i) { - o.WriteString(NullValueStr) - continue - } - sub := a.newListValue(i) - fmt.Fprintf(o, "%v", sub) - sub.Release() - } - o.WriteString("]") - return o.String() -} - -func (a *FixedSizeList) newListValue(i int) arrow.Array { - beg, end := a.ValueOffsets(i) - return NewSlice(a.values, beg, end) -} - -func (a *FixedSizeList) setData(data *Data) { - a.array.setData(data) - a.n = a.DataType().(*arrow.FixedSizeListType).Len() - a.values = MakeFromData(data.childData[0]) -} - -func arrayEqualFixedSizeList(left, right *FixedSizeList) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return Equal(l, r) - }() - if !o { - return false - } - } - return true -} - -// Len returns the number of elements in the array. -func (a *FixedSizeList) Len() int { return a.array.Len() } - -func (a *FixedSizeList) ValueOffsets(i int) (start, end int64) { - n := int64(a.n) - off := int64(a.array.data.offset) - start, end = (off+int64(i))*n, (off+int64(i+1))*n - return -} - -func (a *FixedSizeList) Retain() { - a.array.Retain() - a.values.Retain() -} - -func (a *FixedSizeList) Release() { - a.array.Release() - a.values.Release() -} - -func (a *FixedSizeList) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - slice := a.newListValue(i) - defer slice.Release() - v, err := json.Marshal(slice) - if err != nil { - panic(err) - } - - return json.RawMessage(v) -} - -func (a *FixedSizeList) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if a.IsNull(i) { - enc.Encode(nil) - continue - } - - slice := a.newListValue(i) - if err := enc.Encode(slice); err != nil { - return nil, err - } - slice.Release() - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -type FixedSizeListBuilder struct { - baseListBuilder - n int32 // number of elements in the fixed-size list. -} - -// NewFixedSizeListBuilder returns a builder, using the provided memory allocator. -// The created list builder will create a list whose elements will be of type etype. -func NewFixedSizeListBuilder(mem memory.Allocator, n int32, etype arrow.DataType) *FixedSizeListBuilder { - return &FixedSizeListBuilder{ - baseListBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, etype), - dt: arrow.FixedSizeListOf(n, etype), - }, - n, - } -} - -// NewFixedSizeListBuilderWithField returns a builder similarly to -// NewFixedSizeListBuilder, but it accepts a child rather than just a datatype -// to ensure nullability context is preserved. -func NewFixedSizeListBuilderWithField(mem memory.Allocator, n int32, field arrow.Field) *FixedSizeListBuilder { - return &FixedSizeListBuilder{ - baseListBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, field.Type), - dt: arrow.FixedSizeListOfField(n, field), - }, - n, - } -} - -func (b *FixedSizeListBuilder) Type() arrow.DataType { return b.dt } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *FixedSizeListBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.values != nil { - b.values.Release() - b.values = nil - } - } -} - -func (b *FixedSizeListBuilder) Append(v bool) { - b.Reserve(1) - b.unsafeAppendBoolToBitmap(v) -} - -// AppendNull will append null values to the underlying values by itself -func (b *FixedSizeListBuilder) AppendNull() { - b.Reserve(1) - b.unsafeAppendBoolToBitmap(false) - // require to append this due to value indexes - for i := int32(0); i < b.n; i++ { - b.values.AppendNull() - } -} - -// AppendNulls will append n null values to the underlying values by itself -func (b *FixedSizeListBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *FixedSizeListBuilder) AppendEmptyValue() { - b.Append(true) - for i := int32(0); i < b.n; i++ { - b.values.AppendEmptyValue() - } -} - -func (b *FixedSizeListBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *FixedSizeListBuilder) AppendValues(valid []bool) { - b.Reserve(len(valid)) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) -} - -func (b *FixedSizeListBuilder) unsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -func (b *FixedSizeListBuilder) init(capacity int) { - b.builder.init(capacity) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *FixedSizeListBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *FixedSizeListBuilder) Resize(n int) { - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(n, b.builder.init) - } -} - -func (b *FixedSizeListBuilder) ValueBuilder() Builder { - return b.values -} - -// NewArray creates a List array from the memory buffers used by the builder and resets the FixedSizeListBuilder -// so it can be used to build a new array. -func (b *FixedSizeListBuilder) NewArray() arrow.Array { - return b.NewListArray() -} - -// NewListArray creates a List array from the memory buffers used by the builder and resets the FixedSizeListBuilder -// so it can be used to build a new array. -func (b *FixedSizeListBuilder) NewListArray() (a *FixedSizeList) { - data := b.newData() - a = NewFixedSizeListData(data) - data.Release() - return -} - -func (b *FixedSizeListBuilder) newData() (data *Data) { - values := b.values.NewArray() - defer values.Release() - - data = NewData( - b.dt, b.length, - []*memory.Buffer{b.nullBitmap}, - []arrow.ArrayData{values.Data()}, - b.nulls, - 0, - ) - b.reset() - - return -} - -func (b *FixedSizeListBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - dec := json.NewDecoder(strings.NewReader(s)) - return b.UnmarshalOne(dec) -} - -func (b *FixedSizeListBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t { - case json.Delim('['): - b.Append(true) - if err := b.values.Unmarshal(dec); err != nil { - return err - } - // consume ']' - _, err := dec.Token() - return err - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Struct: b.dt.String(), - } - } - - return nil -} - -func (b *FixedSizeListBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *FixedSizeListBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("fixed size list builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*FixedSizeList)(nil) - _ Builder = (*FixedSizeListBuilder)(nil) -) diff --git a/go/arrow/array/fixed_size_list_test.go b/go/arrow/array/fixed_size_list_test.go deleted file mode 100644 index e0edb9868cffd..0000000000000 --- a/go/arrow/array/fixed_size_list_test.go +++ /dev/null @@ -1,257 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestFixedSizeListArray(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 4} - isValid = []bool{true, false, true} - ) - - lb := array.NewFixedSizeListBuilder(pool, int32(len(vs)), arrow.PrimitiveTypes.Int32) - defer lb.Release() - - for i := 0; i < 10; i++ { - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - pos := 0 - for i, length := range lengths { - lb.Append(isValid[i]) - for j := 0; j < length; j++ { - vb.Append(vs[pos]) - pos++ - } - } - - arr := lb.NewArray().(*array.FixedSizeList) - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.DataType().ID(), arrow.FIXED_SIZE_LIST; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), lengths[i] == 0; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - } -} - -func TestFixedSizeListArrayEmpty(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - lb := array.NewFixedSizeListBuilder(pool, 3, arrow.PrimitiveTypes.Int32) - defer lb.Release() - arr := lb.NewArray().(*array.FixedSizeList) - defer arr.Release() - if got, want := arr.Len(), 0; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } -} - -func TestFixedSizeListArrayBulkAppend(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 4} - isValid = []bool{true, false, true} - ) - - lb := array.NewFixedSizeListBuilder(pool, int32(len(vs)), arrow.PrimitiveTypes.Int32) - defer lb.Release() - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - lb.AppendValues(isValid) - for _, v := range vs { - vb.Append(v) - } - - arr := lb.NewArray().(*array.FixedSizeList) - defer arr.Release() - - if got, want := arr.DataType().ID(), arrow.FIXED_SIZE_LIST; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), lengths[i] == 0; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestFixedSizeListArrayStringer(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const N = 3 - var ( - vs = [][N]int32{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, -9, -8}} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32) - defer lb.Release() - - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - for i, v := range vs { - lb.Append(isValid[i]) - vb.AppendValues(v[:], nil) - } - - arr := lb.NewArray().(*array.FixedSizeList) - defer arr.Release() - - arr.Retain() - arr.Release() - - want := `[[0 1 2] (null) [6 7 8] [9 -9 -8]]` - if got, want := arr.String(), want; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - assert.Equal(t, "[0,1,2]", arr.ValueStr(0)) - assert.Equal(t, array.NullValueStr, arr.ValueStr(1)) -} - -func TestFixedSizeListArraySlice(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const N = 3 - var ( - vs = [][N]int32{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, -9, -8}} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32) - defer lb.Release() - - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - for i, v := range vs { - lb.Append(isValid[i]) - vb.AppendValues(v[:], nil) - } - - arr := lb.NewArray().(*array.FixedSizeList) - defer arr.Release() - - arr.Retain() - arr.Release() - - want := `[[0 1 2] (null) [6 7 8] [9 -9 -8]]` - if got, want := arr.String(), want; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - sub := array.NewSlice(arr, 1, 3).(*array.FixedSizeList) - defer sub.Release() - - want = `[(null) [6 7 8]]` - if got, want := sub.String(), want; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } -} - -func TestFixedSizeListStringRoundTrip(t *testing.T) { - // 1. create array - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const N = 3 - var ( - values = [][N]int32{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}, {9, -9, -8}} - valid = []bool{true, false, true, true} - ) - - b := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32) - defer b.Release() - - vb := b.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(values)) - - for i, v := range values { - b.Append(valid[i]) - vb.AppendValues(v[:], nil) - } - - arr := b.NewArray().(*array.FixedSizeList) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewFixedSizeListBuilder(pool, N, arrow.PrimitiveTypes.Int32) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.FixedSizeList) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} diff --git a/go/arrow/array/fixedsize_binary.go b/go/arrow/array/fixedsize_binary.go deleted file mode 100644 index f4d16c6386d60..0000000000000 --- a/go/arrow/array/fixedsize_binary.go +++ /dev/null @@ -1,123 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "encoding/base64" - "fmt" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A type which represents an immutable sequence of fixed-length binary strings. -type FixedSizeBinary struct { - array - - valueBytes []byte - bytewidth int32 -} - -// NewFixedSizeBinaryData constructs a new fixed-size binary array from data. -func NewFixedSizeBinaryData(data arrow.ArrayData) *FixedSizeBinary { - a := &FixedSizeBinary{bytewidth: int32(data.DataType().(arrow.FixedWidthDataType).BitWidth() / 8)} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Value returns the fixed-size slice at index i. This value should not be mutated. -func (a *FixedSizeBinary) Value(i int) []byte { - i += a.array.data.offset - var ( - bw = int(a.bytewidth) - beg = i * bw - end = (i + 1) * bw - ) - return a.valueBytes[beg:end] -} -func (a *FixedSizeBinary) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return base64.StdEncoding.EncodeToString(a.Value(i)) -} - -func (a *FixedSizeBinary) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%q", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *FixedSizeBinary) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.valueBytes = vals.Bytes() - } - -} - -func (a *FixedSizeBinary) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.Value(i) -} - -func (a *FixedSizeBinary) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.Value(i) - } else { - vals[i] = nil - } - } - return json.Marshal(vals) -} - -func arrayEqualFixedSizeBinary(left, right *FixedSizeBinary) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !bytes.Equal(left.Value(i), right.Value(i)) { - return false - } - } - return true -} - -var ( - _ arrow.Array = (*FixedSizeBinary)(nil) -) diff --git a/go/arrow/array/fixedsize_binary_test.go b/go/arrow/array/fixedsize_binary_test.go deleted file mode 100644 index 4a32cb9692a06..0000000000000 --- a/go/arrow/array/fixedsize_binary_test.go +++ /dev/null @@ -1,189 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/stretchr/testify/assert" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -func TestFixedSizeBinary(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := arrow.FixedSizeBinaryType{ByteWidth: 7} - b := array.NewFixedSizeBinaryBuilder(mem, &dtype) - - zero := make([]byte, dtype.ByteWidth) - - values := [][]byte{ - []byte("7654321"), - nil, - []byte("AZERTYU"), - } - valid := []bool{true, false, true} - b.AppendValues(values, valid) - // encoded abcdefg base64 - assert.NoError(t, b.AppendValueFromString("YWJjZGVmZw==")) - - b.Retain() - b.Release() - - a := b.NewFixedSizeBinaryArray() - assert.Equal(t, 4, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("7654321"), a.Value(0)) - assert.Equal(t, "YWJjZGVmZw==", a.ValueStr(3)) - assert.Equal(t, zero, a.Value(1)) - assert.Equal(t, true, a.IsNull(1)) - assert.Equal(t, false, a.IsValid(1)) - assert.Equal(t, []byte("AZERTYU"), a.Value(2)) - a.Release() - - // Test builder reset and NewArray API. - b.AppendValues(values, valid) - a = b.NewArray().(*array.FixedSizeBinary) - assert.Equal(t, 3, a.Len()) - assert.Equal(t, 1, a.NullN()) - assert.Equal(t, []byte("7654321"), a.Value(0)) - assert.Equal(t, zero, a.Value(1)) - assert.Equal(t, []byte("AZERTYU"), a.Value(2)) - a.Release() - - b.Release() -} - -func TestFixedSizeBinarySlice(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.FixedSizeBinaryType{ByteWidth: 4} - b := array.NewFixedSizeBinaryBuilder(mem, dtype) - defer b.Release() - - var data = [][]byte{ - []byte("ABCD"), - []byte("1234"), - nil, - []byte("AZER"), - } - b.AppendValues(data[:2], nil) - b.AppendNull() - b.Append(data[3]) - - arr := b.NewFixedSizeBinaryArray() - defer arr.Release() - - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.FixedSizeBinary) - if !ok { - t.Fatalf("could not type-assert to array.String") - } - - if got, want := v.String(), `[(null) "AZER"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if got, want := v.NullN(), 1; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } -} - -func TestFixedSizeBinary_MarshalUnmarshalJSON(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.FixedSizeBinaryType{ByteWidth: 4} - b := array.NewFixedSizeBinaryBuilder(mem, dtype) - defer b.Release() - - var data = [][]byte{ - []byte("ABCD"), - []byte("1234"), - nil, - []byte("AZER"), - } - b.AppendValues(data[:2], nil) - b.AppendNull() - b.Append(data[3]) - - arr := b.NewFixedSizeBinaryArray() - defer arr.Release() - - jsonBytes, err := arr.MarshalJSON() - if err != nil { - t.Fatalf("failed to marshal json: %v", err) - } - - err = b.UnmarshalJSON(jsonBytes) - if err != nil { - t.Fatalf("failed to unmarshal json: %v", err) - } - gotArr := b.NewFixedSizeBinaryArray() - defer gotArr.Release() - - gotString := gotArr.String() - wantString := arr.String() - if gotString != wantString { - t.Fatalf("got=%q, want=%q", gotString, wantString) - } -} - -func TestFixedSizeBinaryStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.FixedSizeBinaryType{ByteWidth: 7} - b := array.NewFixedSizeBinaryBuilder(mem, dt) - - values := [][]byte{ - []byte("7654321"), - nil, - []byte("AZERTYU"), - } - valid := []bool{true, false, true} - b.AppendValues(values, valid) - // encoded abcdefg base64 - assert.NoError(t, b.AppendValueFromString("YWJjZGVmZw==")) - - arr := b.NewArray().(*array.FixedSizeBinary) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewFixedSizeBinaryBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.FixedSizeBinary) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} diff --git a/go/arrow/array/fixedsize_binarybuilder.go b/go/arrow/array/fixedsize_binarybuilder.go deleted file mode 100644 index 96d58632ab8c8..0000000000000 --- a/go/arrow/array/fixedsize_binarybuilder.go +++ /dev/null @@ -1,261 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "encoding/base64" - "fmt" - "reflect" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A FixedSizeBinaryBuilder is used to build a FixedSizeBinary array using the Append methods. -type FixedSizeBinaryBuilder struct { - builder - - dtype *arrow.FixedSizeBinaryType - values *byteBufferBuilder -} - -func NewFixedSizeBinaryBuilder(mem memory.Allocator, dtype *arrow.FixedSizeBinaryType) *FixedSizeBinaryBuilder { - b := &FixedSizeBinaryBuilder{ - builder: builder{refCount: 1, mem: mem}, - dtype: dtype, - values: newByteBufferBuilder(mem), - } - return b -} - -func (b *FixedSizeBinaryBuilder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (b *FixedSizeBinaryBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.values != nil { - b.values.Release() - b.values = nil - } - } -} - -func (b *FixedSizeBinaryBuilder) Append(v []byte) { - if len(v) != b.dtype.ByteWidth { - // TODO(alexandre): should we return an error instead? - panic("len(v) != b.dtype.ByteWidth") - } - - b.Reserve(1) - b.values.Append(v) - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *FixedSizeBinaryBuilder) AppendNull() { - b.Reserve(1) - b.values.Advance(b.dtype.ByteWidth) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *FixedSizeBinaryBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *FixedSizeBinaryBuilder) AppendEmptyValue() { - b.Reserve(1) - b.values.Advance(b.dtype.ByteWidth) - b.UnsafeAppendBoolToBitmap(true) -} - -func (b *FixedSizeBinaryBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *FixedSizeBinaryBuilder) UnsafeAppend(v []byte) { - b.values.unsafeAppend(v) - b.UnsafeAppendBoolToBitmap(true) -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *FixedSizeBinaryBuilder) AppendValues(v [][]byte, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - for _, vv := range v { - switch len(vv) { - case 0: - b.values.Advance(b.dtype.ByteWidth) - case b.dtype.ByteWidth: - b.values.Append(vv) - default: - panic(fmt.Errorf("array: invalid binary length (got=%d, want=%d)", len(vv), b.dtype.ByteWidth)) - } - } - - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *FixedSizeBinaryBuilder) init(capacity int) { - b.builder.init(capacity) - b.values.resize(capacity * b.dtype.ByteWidth) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *FixedSizeBinaryBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *FixedSizeBinaryBuilder) Resize(n int) { - b.builder.resize(n, b.init) -} - -// NewArray creates a FixedSizeBinary array from the memory buffers used by the -// builder and resets the FixedSizeBinaryBuilder so it can be used to build a new array. -func (b *FixedSizeBinaryBuilder) NewArray() arrow.Array { - return b.NewFixedSizeBinaryArray() -} - -// NewFixedSizeBinaryArray creates a FixedSizeBinary array from the memory buffers used by the builder and resets the FixedSizeBinaryBuilder -// so it can be used to build a new array. -func (b *FixedSizeBinaryBuilder) NewFixedSizeBinaryArray() (a *FixedSizeBinary) { - data := b.newData() - a = NewFixedSizeBinaryData(data) - data.Release() - return -} - -func (b *FixedSizeBinaryBuilder) newData() (data *Data) { - values := b.values.Finish() - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, values}, nil, b.nulls, 0) - - if values != nil { - values.Release() - } - - b.builder.reset() - - return -} - -func (b *FixedSizeBinaryBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - data, err := base64.StdEncoding.DecodeString(s) - if err != nil { - b.AppendNull() - return err - } - b.Append(data) - return nil -} - -func (b *FixedSizeBinaryBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - var val []byte - switch v := t.(type) { - case string: - data, err := base64.StdEncoding.DecodeString(v) - if err != nil { - return err - } - val = data - case []byte: - val = v - case nil: - b.AppendNull() - return nil - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf([]byte{}), - Offset: dec.InputOffset(), - Struct: fmt.Sprintf("FixedSizeBinary[%d]", b.dtype.ByteWidth), - } - } - - if len(val) != b.dtype.ByteWidth { - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(val), - Type: reflect.TypeOf([]byte{}), - Offset: dec.InputOffset(), - Struct: fmt.Sprintf("FixedSizeBinary[%d]", b.dtype.ByteWidth), - } - } - b.Append(val) - return nil -} - -func (b *FixedSizeBinaryBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *FixedSizeBinaryBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("fixed size binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ Builder = (*FixedSizeBinaryBuilder)(nil) -) diff --git a/go/arrow/array/fixedsize_binarybuilder_test.go b/go/arrow/array/fixedsize_binarybuilder_test.go deleted file mode 100644 index 0c58c65ecb02e..0000000000000 --- a/go/arrow/array/fixedsize_binarybuilder_test.go +++ /dev/null @@ -1,107 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestFixedSizeBinaryBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := arrow.FixedSizeBinaryType{ByteWidth: 7} - b := NewFixedSizeBinaryBuilder(mem, &dtype) - - b.Append([]byte("1234567")) - b.AppendNull() - b.Append([]byte("ABCDEFG")) - b.AppendNull() - - assert.Equal(t, 4, b.Len(), "unexpected Len()") - assert.Equal(t, 2, b.NullN(), "unexpected NullN()") - - values := [][]byte{ - []byte("7654321"), - nil, - []byte("AZERTYU"), - } - b.AppendValues(values, []bool{true, false, true}) - - assert.Equal(t, 7, b.Len(), "unexpected Len()") - assert.Equal(t, 3, b.NullN(), "unexpected NullN()") - - a := b.NewFixedSizeBinaryArray() - - // check state of builder after NewFixedSizeBinaryArray - assert.Zero(t, b.Len(), "unexpected ArrayBuilder.Len(), NewFixedSizeBinaryArray did not reset state") - assert.Zero(t, b.Cap(), "unexpected ArrayBuilder.Cap(), NewFixedSizeBinaryArray did not reset state") - assert.Zero(t, b.NullN(), "unexpected ArrayBuilder.NullN(), NewFixedSizeBinaryArray did not reset state") - assert.Equal(t, a.String(), `["1234567" (null) "ABCDEFG" (null) "7654321" (null) "AZERTYU"]`) - - b.Release() - a.Release() -} - -func TestFixedSizeBinaryBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := arrow.FixedSizeBinaryType{ByteWidth: 7} - ab := NewFixedSizeBinaryBuilder(mem, &dtype) - defer ab.Release() - - want := [][]byte{ - []byte("1234567"), - []byte("AZERTYU"), - []byte("7654321"), - } - - fixedSizeValues := func(a *FixedSizeBinary) [][]byte { - vs := make([][]byte, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - ab.AppendValues([][]byte{}, nil) - a := ab.NewFixedSizeBinaryArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewFixedSizeBinaryArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([][]byte{}, nil) - ab.AppendValues(want, nil) - a = ab.NewFixedSizeBinaryArray() - assert.Equal(t, want, fixedSizeValues(a)) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([][]byte{}, nil) - a = ab.NewFixedSizeBinaryArray() - assert.Equal(t, want, fixedSizeValues(a)) - a.Release() -} diff --git a/go/arrow/array/float16.go b/go/arrow/array/float16.go deleted file mode 100644 index 757b658a9150d..0000000000000 --- a/go/arrow/array/float16.go +++ /dev/null @@ -1,123 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A type which represents an immutable sequence of Float16 values. -type Float16 struct { - array - values []float16.Num -} - -func NewFloat16Data(data arrow.ArrayData) *Float16 { - a := &Float16{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *Float16) Value(i int) float16.Num { return a.values[i] } -func (a *Float16) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.Value(i).String() -} - -func (a *Float16) Values() []float16.Num { return a.values } - -func (a *Float16) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", a.values[i].Float32()) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Float16) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Float16Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Float16) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.values[i].Float32() - } - return nil -} - -func (a *Float16) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i, v := range a.values { - if !a.IsValid(i) { - vals[i] = nil - continue - } - - switch { - case v.IsNaN(): - vals[i] = "NaN" - case v.IsInf() && !v.Signbit(): - vals[i] = "+Inf" - case v.IsInf() && v.Signbit(): - vals[i] = "-Inf" - default: - vals[i] = v.Float32() - } - } - return json.Marshal(vals) -} - -func arrayEqualFloat16(left, right *Float16) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -var ( - _ arrow.Array = (*Float16)(nil) -) diff --git a/go/arrow/array/float16_builder.go b/go/arrow/array/float16_builder.go deleted file mode 100644 index 7543f2b6f96dd..0000000000000 --- a/go/arrow/array/float16_builder.go +++ /dev/null @@ -1,263 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "reflect" - "strconv" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type Float16Builder struct { - builder - - data *memory.Buffer - rawData []float16.Num -} - -func NewFloat16Builder(mem memory.Allocator) *Float16Builder { - return &Float16Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Float16Builder) Type() arrow.DataType { return arrow.FixedWidthTypes.Float16 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Float16Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Float16Builder) Append(v float16.Num) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Float16Builder) UnsafeAppend(v float16.Num) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Float16Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Float16Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Float16Builder) AppendEmptyValue() { - b.Reserve(1) - b.UnsafeAppend(float16.Num{}) -} - -func (b *Float16Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Float16Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Float16Builder) AppendValues(v []float16.Num, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - if len(v) > 0 { - arrow.Float16Traits.Copy(b.rawData[b.length:], v) - } - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Float16Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Uint16Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Float16Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Float16Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Float16Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Float16Traits.BytesRequired(n)) - b.rawData = arrow.Float16Traits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a Float16 array from the memory buffers used by the builder and resets the Float16Builder -// so it can be used to build a new array. -func (b *Float16Builder) NewArray() arrow.Array { - return b.NewFloat16Array() -} - -// NewFloat16Array creates a Float16 array from the memory buffers used by the builder and resets the Float16Builder -// so it can be used to build a new array. -func (b *Float16Builder) NewFloat16Array() (a *Float16) { - data := b.newData() - a = NewFloat16Data(data) - data.Release() - return -} - -func (b *Float16Builder) newData() (data *Data) { - bytesRequired := arrow.Float16Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.FixedWidthTypes.Float16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Float16Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseFloat(s, 32) - if err != nil { - b.AppendNull() - return err - } - b.Append(float16.New(float32(v))) - return nil -} - -func (b *Float16Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case float64: - b.Append(float16.New(float32(v))) - case string: - f, err := strconv.ParseFloat(v, 32) - if err != nil { - return err - } - // this will currently silently truncate if it is too large - b.Append(float16.New(float32(f))) - case json.Number: - f, err := v.Float64() - if err != nil { - return err - } - b.Append(float16.New(float32(f))) - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(float16.Num{}), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *Float16Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON will add values to this builder from unmarshalling the -// array of values. Currently values that are larger than a float16 will -// be silently truncated. -func (b *Float16Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("float16 builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} diff --git a/go/arrow/array/float16_builder_test.go b/go/arrow/array/float16_builder_test.go deleted file mode 100644 index ab25e544ed833..0000000000000 --- a/go/arrow/array/float16_builder_test.go +++ /dev/null @@ -1,156 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func float32Values(a *array.Float16) []float32 { - values := make([]float32, a.Len()) - for i, v := range a.Values() { - values[i] = v.Float32() - } - return values -} - -func TestNewFloat16Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat16Builder(mem) - - ab.Append(float16.New(1)) - ab.Append(float16.New(2)) - ab.Append(float16.New(3)) - ab.AppendNull() - ab.Append(float16.New(5)) - ab.Append(float16.New(6)) - ab.AppendNull() - ab.Append(float16.New(8)) - ab.Append(float16.New(9)) - ab.Append(float16.New(10)) - assert.NoError(t, ab.AppendValueFromString("11.0")) - - // check state of builder before NewFloat16Array - assert.Equal(t, 11, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewFloat16Array() - assert.Equal(t, "1", a.ValueStr(0)) - // check state of builder after NewFloat16Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewFloat16Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewFloat16Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewFloat16Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - - assert.Equal(t, []float32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10, 11}, float32Values(a), "unexpected Float16Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Values(), 11, "unexpected length of Float16Values") - - a.Release() - ab.Append(float16.New(7)) - ab.Append(float16.New(8)) - - a = ab.NewFloat16Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []float32{7, 8}, float32Values(a)) - assert.Len(t, a.Values(), 2) - - a.Release() -} - -func TestFloat16Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat16Builder(mem) - defer ab.Release() - - want := []float16.Num{float16.New(3), float16.New(4)} - - ab.AppendValues([]float16.Num{}, nil) - a := ab.NewFloat16Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewFloat16Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(want, nil) - a = ab.NewFloat16Array() - assert.Equal(t, want, a.Values()) - a.Release() - - ab.AppendValues([]float16.Num{}, nil) - ab.AppendValues(want, nil) - a = ab.NewFloat16Array() - assert.Equal(t, want, a.Values()) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]float16.Num{}, nil) - a = ab.NewFloat16Array() - assert.Equal(t, want, a.Values()) - a.Release() -} - -func TestFloat16StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewFloat16Builder(mem) - defer b.Release() - - b.Append(float16.New(1)) - b.Append(float16.New(2)) - b.Append(float16.New(3)) - b.AppendNull() - b.Append(float16.New(5)) - b.Append(float16.New(6)) - b.AppendNull() - b.Append(float16.New(8)) - b.Append(float16.New(9)) - b.Append(float16.New(10)) - - arr := b.NewArray().(*array.Float16) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewFloat16Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Float16) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} diff --git a/go/arrow/array/interval.go b/go/arrow/array/interval.go deleted file mode 100644 index 66c6eca21bca5..0000000000000 --- a/go/arrow/array/interval.go +++ /dev/null @@ -1,953 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "strconv" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -func NewIntervalData(data arrow.ArrayData) arrow.Array { - switch data.DataType().(type) { - case *arrow.MonthIntervalType: - return NewMonthIntervalData(data.(*Data)) - case *arrow.DayTimeIntervalType: - return NewDayTimeIntervalData(data.(*Data)) - case *arrow.MonthDayNanoIntervalType: - return NewMonthDayNanoIntervalData(data.(*Data)) - default: - panic(fmt.Errorf("arrow/array: unknown interval data type %T", data.DataType())) - } -} - -// A type which represents an immutable sequence of arrow.MonthInterval values. -type MonthInterval struct { - array - values []arrow.MonthInterval -} - -func NewMonthIntervalData(data arrow.ArrayData) *MonthInterval { - a := &MonthInterval{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *MonthInterval) Value(i int) arrow.MonthInterval { return a.values[i] } -func (a *MonthInterval) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return fmt.Sprintf("%v", a.Value(i)) -} -func (a *MonthInterval) MonthIntervalValues() []arrow.MonthInterval { return a.values } - -func (a *MonthInterval) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *MonthInterval) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.MonthIntervalTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *MonthInterval) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.values[i] - } - return nil -} - -// MarshalJSON will create a json array out of a MonthInterval array, -// each value will be an object of the form {"months": #} where -// # is the numeric value of that index -func (a *MonthInterval) MarshalJSON() ([]byte, error) { - if a.NullN() == 0 { - return json.Marshal(a.values) - } - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualMonthInterval(left, right *MonthInterval) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type MonthIntervalBuilder struct { - builder - - data *memory.Buffer - rawData []arrow.MonthInterval -} - -func NewMonthIntervalBuilder(mem memory.Allocator) *MonthIntervalBuilder { - return &MonthIntervalBuilder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *MonthIntervalBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.MonthInterval } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *MonthIntervalBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *MonthIntervalBuilder) Append(v arrow.MonthInterval) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *MonthIntervalBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *MonthIntervalBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *MonthIntervalBuilder) AppendEmptyValue() { - b.Append(arrow.MonthInterval(0)) -} - -func (b *MonthIntervalBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *MonthIntervalBuilder) UnsafeAppend(v arrow.MonthInterval) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *MonthIntervalBuilder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *MonthIntervalBuilder) AppendValues(v []arrow.MonthInterval, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.MonthIntervalTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *MonthIntervalBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.MonthIntervalTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.MonthIntervalTraits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *MonthIntervalBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *MonthIntervalBuilder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.MonthIntervalTraits.BytesRequired(n)) - b.rawData = arrow.MonthIntervalTraits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a MonthInterval array from the memory buffers used by the builder and resets the MonthIntervalBuilder -// so it can be used to build a new array. -func (b *MonthIntervalBuilder) NewArray() arrow.Array { - return b.NewMonthIntervalArray() -} - -// NewMonthIntervalArray creates a MonthInterval array from the memory buffers used by the builder and resets the MonthIntervalBuilder -// so it can be used to build a new array. -func (b *MonthIntervalBuilder) NewMonthIntervalArray() (a *MonthInterval) { - data := b.newData() - a = NewMonthIntervalData(data) - data.Release() - return -} - -func (b *MonthIntervalBuilder) newData() (data *Data) { - bytesRequired := arrow.MonthIntervalTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.FixedWidthTypes.MonthInterval, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *MonthIntervalBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseInt(s, 10, 32) - if err != nil { - b.AppendNull() - return err - } - b.Append(arrow.MonthInterval(v)) - return nil -} - -func (b *MonthIntervalBuilder) UnmarshalOne(dec *json.Decoder) error { - var v *arrow.MonthInterval - if err := dec.Decode(&v); err != nil { - return err - } - - if v == nil { - b.AppendNull() - } else { - b.Append(*v) - } - return nil -} - -func (b *MonthIntervalBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON will add the unmarshalled values of an array to the builder, -// values are expected to be strings of the form "#months" where # is the int32 -// value that will be added to the builder. -func (b *MonthIntervalBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("month interval builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -// A type which represents an immutable sequence of arrow.DayTimeInterval values. -type DayTimeInterval struct { - array - values []arrow.DayTimeInterval -} - -func NewDayTimeIntervalData(data arrow.ArrayData) *DayTimeInterval { - a := &DayTimeInterval{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *DayTimeInterval) Value(i int) arrow.DayTimeInterval { return a.values[i] } -func (a *DayTimeInterval) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - data, err := json.Marshal(a.GetOneForMarshal(i)) - if err != nil { - panic(err) - } - return string(data) -} - -func (a *DayTimeInterval) DayTimeIntervalValues() []arrow.DayTimeInterval { return a.values } - -func (a *DayTimeInterval) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *DayTimeInterval) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.DayTimeIntervalTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *DayTimeInterval) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.values[i] - } - return nil -} - -// MarshalJSON will marshal this array to JSON as an array of objects, -// consisting of the form {"days": #, "milliseconds": #} for each element. -func (a *DayTimeInterval) MarshalJSON() ([]byte, error) { - if a.NullN() == 0 { - return json.Marshal(a.values) - } - vals := make([]interface{}, a.Len()) - for i, v := range a.values { - if a.IsValid(i) { - vals[i] = v - } else { - vals[i] = nil - } - } - return json.Marshal(vals) -} - -func arrayEqualDayTimeInterval(left, right *DayTimeInterval) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type DayTimeIntervalBuilder struct { - builder - - data *memory.Buffer - rawData []arrow.DayTimeInterval -} - -func NewDayTimeIntervalBuilder(mem memory.Allocator) *DayTimeIntervalBuilder { - return &DayTimeIntervalBuilder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *DayTimeIntervalBuilder) Type() arrow.DataType { return arrow.FixedWidthTypes.DayTimeInterval } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *DayTimeIntervalBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *DayTimeIntervalBuilder) Append(v arrow.DayTimeInterval) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *DayTimeIntervalBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *DayTimeIntervalBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *DayTimeIntervalBuilder) AppendEmptyValue() { - b.Append(arrow.DayTimeInterval{}) -} - -func (b *DayTimeIntervalBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *DayTimeIntervalBuilder) UnsafeAppend(v arrow.DayTimeInterval) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *DayTimeIntervalBuilder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *DayTimeIntervalBuilder) AppendValues(v []arrow.DayTimeInterval, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.DayTimeIntervalTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *DayTimeIntervalBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.DayTimeIntervalTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.DayTimeIntervalTraits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *DayTimeIntervalBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *DayTimeIntervalBuilder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.DayTimeIntervalTraits.BytesRequired(n)) - b.rawData = arrow.DayTimeIntervalTraits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a DayTimeInterval array from the memory buffers used by the builder and resets the DayTimeIntervalBuilder -// so it can be used to build a new array. -func (b *DayTimeIntervalBuilder) NewArray() arrow.Array { - return b.NewDayTimeIntervalArray() -} - -// NewDayTimeIntervalArray creates a DayTimeInterval array from the memory buffers used by the builder and resets the DayTimeIntervalBuilder -// so it can be used to build a new array. -func (b *DayTimeIntervalBuilder) NewDayTimeIntervalArray() (a *DayTimeInterval) { - data := b.newData() - a = NewDayTimeIntervalData(data) - data.Release() - return -} - -func (b *DayTimeIntervalBuilder) newData() (data *Data) { - bytesRequired := arrow.DayTimeIntervalTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.FixedWidthTypes.DayTimeInterval, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *DayTimeIntervalBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - var v arrow.DayTimeInterval - if err := json.Unmarshal([]byte(s), &v); err != nil { - b.AppendNull() - return err - } - b.Append(v) - return nil -} - -func (b *DayTimeIntervalBuilder) UnmarshalOne(dec *json.Decoder) error { - var v *arrow.DayTimeInterval - if err := dec.Decode(&v); err != nil { - return err - } - - if v == nil { - b.AppendNull() - } else { - b.Append(*v) - } - return nil -} - -func (b *DayTimeIntervalBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON will add the values unmarshalled from an array to the builder, -// with the values expected to be objects of the form {"days": #, "milliseconds": #} -func (b *DayTimeIntervalBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("day_time interval builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -// A type which represents an immutable sequence of arrow.DayTimeInterval values. -type MonthDayNanoInterval struct { - array - values []arrow.MonthDayNanoInterval -} - -func NewMonthDayNanoIntervalData(data arrow.ArrayData) *MonthDayNanoInterval { - a := &MonthDayNanoInterval{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *MonthDayNanoInterval) Value(i int) arrow.MonthDayNanoInterval { return a.values[i] } -func (a *MonthDayNanoInterval) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - data, err := json.Marshal(a.GetOneForMarshal(i)) - if err != nil { - panic(err) - } - return string(data) -} - -func (a *MonthDayNanoInterval) MonthDayNanoIntervalValues() []arrow.MonthDayNanoInterval { - return a.values -} - -func (a *MonthDayNanoInterval) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *MonthDayNanoInterval) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.MonthDayNanoIntervalTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *MonthDayNanoInterval) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.values[i] - } - return nil -} - -// MarshalJSON will marshal this array to a JSON array with elements -// marshalled to the form {"months": #, "days": #, "nanoseconds": #} -func (a *MonthDayNanoInterval) MarshalJSON() ([]byte, error) { - if a.NullN() == 0 { - return json.Marshal(a.values) - } - vals := make([]interface{}, a.Len()) - for i, v := range a.values { - if a.IsValid(i) { - vals[i] = v - } else { - vals[i] = nil - } - } - return json.Marshal(vals) -} - -func arrayEqualMonthDayNanoInterval(left, right *MonthDayNanoInterval) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type MonthDayNanoIntervalBuilder struct { - builder - - data *memory.Buffer - rawData []arrow.MonthDayNanoInterval -} - -func NewMonthDayNanoIntervalBuilder(mem memory.Allocator) *MonthDayNanoIntervalBuilder { - return &MonthDayNanoIntervalBuilder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *MonthDayNanoIntervalBuilder) Type() arrow.DataType { - return arrow.FixedWidthTypes.MonthDayNanoInterval -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *MonthDayNanoIntervalBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *MonthDayNanoIntervalBuilder) Append(v arrow.MonthDayNanoInterval) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *MonthDayNanoIntervalBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *MonthDayNanoIntervalBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *MonthDayNanoIntervalBuilder) AppendEmptyValue() { - b.Append(arrow.MonthDayNanoInterval{}) -} - -func (b *MonthDayNanoIntervalBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *MonthDayNanoIntervalBuilder) UnsafeAppend(v arrow.MonthDayNanoInterval) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *MonthDayNanoIntervalBuilder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *MonthDayNanoIntervalBuilder) AppendValues(v []arrow.MonthDayNanoInterval, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.MonthDayNanoIntervalTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *MonthDayNanoIntervalBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.MonthDayNanoIntervalTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.MonthDayNanoIntervalTraits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *MonthDayNanoIntervalBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *MonthDayNanoIntervalBuilder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.MonthDayNanoIntervalTraits.BytesRequired(n)) - b.rawData = arrow.MonthDayNanoIntervalTraits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a MonthDayNanoInterval array from the memory buffers used by the builder and resets the MonthDayNanoIntervalBuilder -// so it can be used to build a new array. -func (b *MonthDayNanoIntervalBuilder) NewArray() arrow.Array { - return b.NewMonthDayNanoIntervalArray() -} - -// NewMonthDayNanoIntervalArray creates a MonthDayNanoInterval array from the memory buffers used by the builder and resets the MonthDayNanoIntervalBuilder -// so it can be used to build a new array. -func (b *MonthDayNanoIntervalBuilder) NewMonthDayNanoIntervalArray() (a *MonthDayNanoInterval) { - data := b.newData() - a = NewMonthDayNanoIntervalData(data) - data.Release() - return -} - -func (b *MonthDayNanoIntervalBuilder) newData() (data *Data) { - bytesRequired := arrow.MonthDayNanoIntervalTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.FixedWidthTypes.MonthDayNanoInterval, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *MonthDayNanoIntervalBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - var v arrow.MonthDayNanoInterval - if err := json.Unmarshal([]byte(s), &v); err != nil { - return err - } - b.Append(v) - return nil -} - -func (b *MonthDayNanoIntervalBuilder) UnmarshalOne(dec *json.Decoder) error { - var v *arrow.MonthDayNanoInterval - if err := dec.Decode(&v); err != nil { - return err - } - - if v == nil { - b.AppendNull() - } else { - b.Append(*v) - } - return nil -} - -func (b *MonthDayNanoIntervalBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -// UnmarshalJSON unmarshals a JSON array of objects and adds them to this builder, -// each element of the array is expected to be an object of the form -// {"months": #, "days": #, "nanoseconds": #} -func (b *MonthDayNanoIntervalBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("month_day_nano interval builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*MonthInterval)(nil) - _ arrow.Array = (*DayTimeInterval)(nil) - _ arrow.Array = (*MonthDayNanoInterval)(nil) - - _ Builder = (*MonthIntervalBuilder)(nil) - _ Builder = (*DayTimeIntervalBuilder)(nil) - _ Builder = (*MonthDayNanoIntervalBuilder)(nil) -) diff --git a/go/arrow/array/interval_test.go b/go/arrow/array/interval_test.go deleted file mode 100644 index 6d36885a627d9..0000000000000 --- a/go/arrow/array/interval_test.go +++ /dev/null @@ -1,524 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "math" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestMonthIntervalArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - want = []arrow.MonthInterval{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - b := array.NewMonthIntervalBuilder(mem) - defer b.Release() - - b.Retain() - b.Release() - - b.AppendValues(want[:2], nil) - b.AppendNull() - b.Append(want[3]) - - if got, want := b.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := b.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - arr := b.NewMonthIntervalArray() - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - for i := range want { - if arr.IsNull(i) != !valids[i] { - t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) - } - switch { - case arr.IsNull(i): - default: - got := arr.Value(i) - if got != want[i] { - t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) - } - } - } - - sub := array.MakeFromData(arr.Data()) - defer sub.Release() - - if sub.DataType().ID() != arrow.INTERVAL_MONTHS { - t.Fatalf("invalid type: got=%q, want=interval_months", sub.DataType().Name()) - } - - if _, ok := sub.(*array.MonthInterval); !ok { - t.Fatalf("could not type-assert to array.MonthInterval") - } - - if got, want := arr.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.MonthInterval) - if !ok { - t.Fatalf("could not type-assert to array.MonthInterval") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } -} - -func TestMonthIntervalBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - want := []arrow.MonthInterval{1, 2, 3, 4} - - b := array.NewMonthIntervalBuilder(mem) - defer b.Release() - - miValues := func(a *array.MonthInterval) []arrow.MonthInterval { - vs := make([]arrow.MonthInterval, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - b.AppendValues([]arrow.MonthInterval{}, nil) - arr := b.NewMonthIntervalArray() - assert.Zero(t, arr.Len()) - arr.Release() - - b.AppendValues(nil, nil) - arr = b.NewMonthIntervalArray() - assert.Zero(t, arr.Len()) - arr.Release() - - b.AppendValues([]arrow.MonthInterval{}, nil) - b.AppendValues(want, nil) - arr = b.NewMonthIntervalArray() - assert.Equal(t, want, miValues(arr)) - arr.Release() - - b.AppendValues(want, nil) - b.AppendValues([]arrow.MonthInterval{}, nil) - arr = b.NewMonthIntervalArray() - assert.Equal(t, want, miValues(arr)) - arr.Release() -} - -func TestMonthIntervalStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - values = []arrow.MonthInterval{1, 2, 3, 4} - valid = []bool{true, true, false, true} - ) - - b := array.NewMonthIntervalBuilder(mem) - defer b.Release() - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.MonthInterval) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewMonthIntervalBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.MonthInterval) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestDayTimeArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - want = []arrow.DayTimeInterval{ - {Days: 1, Milliseconds: 1}, {Days: 2, Milliseconds: 2}, - {Days: 3, Milliseconds: 3}, {Days: 4, Milliseconds: 4}} - valids = []bool{true, true, false, true} - ) - - b := array.NewDayTimeIntervalBuilder(mem) - defer b.Release() - - b.Retain() - b.Release() - - b.AppendValues(want[:2], nil) - b.AppendNull() - b.Append(want[3]) - - if got, want := b.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := b.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - arr := b.NewDayTimeIntervalArray() - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - for i := range want { - if arr.IsNull(i) != !valids[i] { - t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) - } - switch { - case arr.IsNull(i): - default: - got := arr.Value(i) - if got != want[i] { - t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) - } - } - } - - sub := array.MakeFromData(arr.Data()) - defer sub.Release() - - if sub.DataType().ID() != arrow.INTERVAL_DAY_TIME { - t.Fatalf("invalid type: got=%q, want=interval_day_time", sub.DataType().Name()) - } - - if _, ok := sub.(*array.DayTimeInterval); !ok { - t.Fatalf("could not type-assert to array.DayTimeInterval") - } - - if got, want := arr.String(), `[{1 1} {2 2} (null) {4 4}]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.DayTimeInterval) - if !ok { - t.Fatalf("could not type-assert to array.DayInterval") - } - - if got, want := v.String(), `[(null) {4 4}]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } -} - -func TestDayTimeIntervalBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - want := []arrow.DayTimeInterval{ - {Days: 1, Milliseconds: 1}, {Days: 2, Milliseconds: 2}, - {Days: 3, Milliseconds: 3}, {Days: 4, Milliseconds: 4}} - - b := array.NewDayTimeIntervalBuilder(mem) - defer b.Release() - - dtValues := func(a *array.DayTimeInterval) []arrow.DayTimeInterval { - vs := make([]arrow.DayTimeInterval, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - b.AppendValues([]arrow.DayTimeInterval{}, nil) - arr := b.NewDayTimeIntervalArray() - assert.Zero(t, arr.Len()) - arr.Release() - - b.AppendValues(nil, nil) - arr = b.NewDayTimeIntervalArray() - assert.Zero(t, arr.Len()) - arr.Release() - - b.AppendValues([]arrow.DayTimeInterval{}, nil) - b.AppendValues(want, nil) - arr = b.NewDayTimeIntervalArray() - assert.Equal(t, want, dtValues(arr)) - arr.Release() - - b.AppendValues(want, nil) - b.AppendValues([]arrow.DayTimeInterval{}, nil) - arr = b.NewDayTimeIntervalArray() - assert.Equal(t, want, dtValues(arr)) - arr.Release() -} - -func TestDayTimeIntervalStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - values = []arrow.DayTimeInterval{ - {Days: 1, Milliseconds: 1}, - {Days: 2, Milliseconds: 2}, - {Days: 3, Milliseconds: 3}, - {Days: 4, Milliseconds: 4}, - } - valid = []bool{true, true, false, true} - ) - - b := array.NewDayTimeIntervalBuilder(mem) - defer b.Release() - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.DayTimeInterval) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDayTimeIntervalBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.DayTimeInterval) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestMonthDayNanoArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - want = []arrow.MonthDayNanoInterval{ - {Months: 1, Days: 1, Nanoseconds: 1000}, {Months: 2, Days: 2, Nanoseconds: 2000}, - {Months: 3, Days: 3, Nanoseconds: 3000}, {Months: 4, Days: 4, Nanoseconds: 4000}, - {Months: 0, Days: 0, Nanoseconds: 0}, {Months: -1, Days: -2, Nanoseconds: -300}, - {Months: math.MaxInt32, Days: math.MinInt32, Nanoseconds: math.MaxInt64}, - {Months: math.MinInt32, Days: math.MaxInt32, Nanoseconds: math.MinInt64}, - } - valids = []bool{true, true, false, true, true, true, false, true} - ) - - b := array.NewMonthDayNanoIntervalBuilder(mem) - defer b.Release() - - b.Retain() - b.Release() - - b.AppendValues(want[:2], nil) - b.AppendNull() - b.Append(want[3]) - b.AppendValues(want[4:], valids[4:]) - - if got, want := b.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := b.NullN(), 2; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - arr := b.NewMonthDayNanoIntervalArray() - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 2; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - for i := range want { - if arr.IsNull(i) != !valids[i] { - t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) - } - switch { - case arr.IsNull(i): - default: - got := arr.Value(i) - if got != want[i] { - t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) - } - } - } - - sub := array.MakeFromData(arr.Data()) - defer sub.Release() - - if sub.DataType().ID() != arrow.INTERVAL_MONTH_DAY_NANO { - t.Fatalf("invalid type: got=%q, want=interval", sub.DataType().Name()) - } - - if _, ok := sub.(*array.MonthDayNanoInterval); !ok { - t.Fatalf("could not type-assert to array.MonthDayNanoInterval") - } - - if got, want := arr.String(), `[{1 1 1000} {2 2 2000} (null) {4 4 4000} {0 0 0} {-1 -2 -300} (null) {-2147483648 2147483647 -9223372036854775808}]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.MonthDayNanoInterval) - if !ok { - t.Fatalf("could not type-assert to array.MonthDayNanoInterval") - } - - if got, want := v.String(), `[(null) {4 4 4000}]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } -} - -func TestMonthDayNanoIntervalBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - want := []arrow.MonthDayNanoInterval{ - {Months: 1, Days: 1, Nanoseconds: 1000}, - {Months: 2, Days: 2, Nanoseconds: 2000}, - {Months: 3, Days: 3, Nanoseconds: 3000}, - {Months: 4, Days: 4, Nanoseconds: 4000}} - - b := array.NewMonthDayNanoIntervalBuilder(mem) - defer b.Release() - - dtValues := func(a *array.MonthDayNanoInterval) []arrow.MonthDayNanoInterval { - vs := make([]arrow.MonthDayNanoInterval, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - b.AppendValues([]arrow.MonthDayNanoInterval{}, nil) - arr := b.NewMonthDayNanoIntervalArray() - assert.Zero(t, arr.Len()) - arr.Release() - - b.AppendValues(nil, nil) - arr = b.NewMonthDayNanoIntervalArray() - assert.Zero(t, arr.Len()) - arr.Release() - - b.AppendValues([]arrow.MonthDayNanoInterval{}, nil) - b.AppendValues(want, nil) - arr = b.NewMonthDayNanoIntervalArray() - assert.Equal(t, want, dtValues(arr)) - arr.Release() - - b.AppendValues(want, nil) - b.AppendValues([]arrow.MonthDayNanoInterval{}, nil) - arr = b.NewMonthDayNanoIntervalArray() - assert.Equal(t, want, dtValues(arr)) - arr.Release() -} - -func TestMonthDayNanoIntervalStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - values = []arrow.MonthDayNanoInterval{ - {Months: 1, Days: 1, Nanoseconds: 1000}, {Months: 2, Days: 2, Nanoseconds: 2000}, - {Months: 3, Days: 3, Nanoseconds: 3000}, {Months: 4, Days: 4, Nanoseconds: 4000}, - {Months: 0, Days: 0, Nanoseconds: 0}, {Months: -1, Days: -2, Nanoseconds: -300}, - {Months: math.MaxInt32, Days: math.MinInt32, Nanoseconds: math.MaxInt64}, - {Months: math.MinInt32, Days: math.MaxInt32, Nanoseconds: math.MinInt64}, - } - valid = []bool{true, true, false, true, true, true, false, true} - ) - - b := array.NewMonthDayNanoIntervalBuilder(mem) - defer b.Release() - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.MonthDayNanoInterval) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewMonthDayNanoIntervalBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.MonthDayNanoInterval) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} diff --git a/go/arrow/array/json_reader.go b/go/arrow/array/json_reader.go deleted file mode 100644 index 2944151a5f63c..0000000000000 --- a/go/arrow/array/json_reader.go +++ /dev/null @@ -1,205 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "errors" - "fmt" - "io" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type Option func(config) -type config interface{} - -// WithChunk sets the chunk size for reading in json records. The default is to -// read in one row per record batch as a single object. If chunk size is set to -// a negative value, then the entire file is read as a single record batch. -// Otherwise a record batch is read in with chunk size rows per record batch until -// it reaches EOF. -func WithChunk(n int) Option { - return func(cfg config) { - switch cfg := cfg.(type) { - case *JSONReader: - cfg.chunk = n - default: - panic(fmt.Errorf("arrow/json): unknown config type %T", cfg)) - } - } -} - -// WithAllocator specifies the allocator to use for creating the record batches, -// if it is not called, then memory.DefaultAllocator will be used. -func WithAllocator(mem memory.Allocator) Option { - return func(cfg config) { - switch cfg := cfg.(type) { - case *JSONReader: - cfg.mem = mem - default: - panic(fmt.Errorf("arrow/json): unknown config type %T", cfg)) - } - } -} - -// JSONReader is a json reader that meets the RecordReader interface definition. -// -// To read in an array of objects as a record, you can use RecordFromJSON -// which is equivalent to reading the json as a struct array whose fields are -// the columns of the record. This primarily exists to fit the RecordReader -// interface as a matching reader for the csv reader. -type JSONReader struct { - r *json.Decoder - schema *arrow.Schema - - bldr *RecordBuilder - - refs int64 - cur arrow.Record - err error - - chunk int - done bool - - mem memory.Allocator - next func() bool -} - -// NewJSONReader returns a json RecordReader which expects to find one json object -// per row of dataset. Using WithChunk can control how many rows are processed -// per record, which is how many objects become a single record from the file. -// -// If it is desired to write out an array of rows, then simply use RecordToStructArray -// and json.Marshal the struct array for the same effect. -func NewJSONReader(r io.Reader, schema *arrow.Schema, opts ...Option) *JSONReader { - rr := &JSONReader{ - r: json.NewDecoder(r), - schema: schema, - refs: 1, - chunk: 1, - } - for _, o := range opts { - o(rr) - } - - if rr.mem == nil { - rr.mem = memory.DefaultAllocator - } - - rr.bldr = NewRecordBuilder(rr.mem, schema) - switch { - case rr.chunk < 0: - rr.next = rr.nextall - case rr.chunk > 1: - rr.next = rr.nextn - default: - rr.next = rr.next1 - } - return rr -} - -// Err returns the last encountered error -func (r *JSONReader) Err() error { return r.err } - -func (r *JSONReader) Schema() *arrow.Schema { return r.schema } - -// Record returns the last read in record. The returned record is only valid -// until the next call to Next unless Retain is called on the record itself. -func (r *JSONReader) Record() arrow.Record { return r.cur } - -func (r *JSONReader) Retain() { - atomic.AddInt64(&r.refs, 1) -} - -func (r *JSONReader) Release() { - debug.Assert(atomic.LoadInt64(&r.refs) > 0, "too many releases") - - if atomic.AddInt64(&r.refs, -1) == 0 { - if r.cur != nil { - r.cur.Release() - r.bldr.Release() - r.r = nil - } - } -} - -// Next returns true if it read in a record, which will be available via Record -// and false if there is either an error or the end of the reader. -func (r *JSONReader) Next() bool { - if r.cur != nil { - r.cur.Release() - r.cur = nil - } - - if r.err != nil || r.done { - return false - } - - return r.next() -} - -func (r *JSONReader) readNext() bool { - r.err = r.r.Decode(r.bldr) - if r.err != nil { - r.done = true - if errors.Is(r.err, io.EOF) { - r.err = nil - } - return false - } - return true -} - -func (r *JSONReader) nextall() bool { - for r.readNext() { - } - - r.cur = r.bldr.NewRecord() - return r.cur.NumRows() > 0 -} - -func (r *JSONReader) next1() bool { - if !r.readNext() { - return false - } - - r.cur = r.bldr.NewRecord() - return true -} - -func (r *JSONReader) nextn() bool { - var n = 0 - - for i := 0; i < r.chunk && !r.done; i, n = i+1, n+1 { - if !r.readNext() { - break - } - } - - if n > 0 { - r.cur = r.bldr.NewRecord() - } - return n > 0 -} - -var ( - _ RecordReader = (*JSONReader)(nil) -) diff --git a/go/arrow/array/json_reader_test.go b/go/arrow/array/json_reader_test.go deleted file mode 100644 index 5e258dfdc07b1..0000000000000 --- a/go/arrow/array/json_reader_test.go +++ /dev/null @@ -1,141 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -const jsondata = ` - {"region": "NY", "model": "3", "sales": 742.0} - {"region": "NY", "model": "S", "sales": 304.125} - {"region": "NY", "model": "X", "sales": 136.25} - {"region": "NY", "model": "Y", "sales": 27.5} - {"region": "CA", "model": "3", "sales": 512} - {"region": "CA", "model": "S", "sales": 978} - {"region": "CA", "model": "X", "sales": 1.0} - {"region": "CA", "model": "Y", "sales": 69} - {"region": "QC", "model": "3", "sales": 273.5} - {"region": "QC", "model": "S", "sales": 13} - {"region": "QC", "model": "X", "sales": 54} - {"region": "QC", "model": "Y", "sales": 21} - {"region": "QC", "model": "3", "sales": 152.25} - {"region": "QC", "model": "S", "sales": 10} - {"region": "QC", "model": "X", "sales": 42} - {"region": "QC", "model": "Y", "sales": 37}` - -func TestJSONReader(t *testing.T) { - schema := arrow.NewSchema([]arrow.Field{ - {Name: "region", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "model", Type: arrow.BinaryTypes.String}, - {Name: "sales", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, nil) - - rdr := array.NewJSONReader(strings.NewReader(jsondata), schema) - defer rdr.Release() - - n := 0 - for rdr.Next() { - n++ - rec := rdr.Record() - assert.NotNil(t, rec) - assert.EqualValues(t, 1, rec.NumRows()) - assert.EqualValues(t, 3, rec.NumCols()) - } - - assert.NoError(t, rdr.Err()) - assert.Equal(t, 16, n) -} - -func TestJSONReaderAll(t *testing.T) { - schema := arrow.NewSchema([]arrow.Field{ - {Name: "region", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "model", Type: arrow.BinaryTypes.String}, - {Name: "sales", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, nil) - - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - rdr := array.NewJSONReader(strings.NewReader(jsondata), schema, array.WithAllocator(mem), array.WithChunk(-1)) - defer rdr.Release() - - assert.True(t, rdr.Next()) - rec := rdr.Record() - assert.NotNil(t, rec) - assert.NoError(t, rdr.Err()) - - assert.EqualValues(t, 16, rec.NumRows()) - assert.EqualValues(t, 3, rec.NumCols()) - assert.False(t, rdr.Next()) -} - -func TestJSONReaderChunked(t *testing.T) { - schema := arrow.NewSchema([]arrow.Field{ - {Name: "region", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "model", Type: arrow.BinaryTypes.String}, - {Name: "sales", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, nil) - - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - rdr := array.NewJSONReader(strings.NewReader(jsondata), schema, array.WithAllocator(mem), array.WithChunk(4)) - defer rdr.Release() - - n := 0 - for rdr.Next() { - n++ - rec := rdr.Record() - assert.NotNil(t, rec) - assert.NoError(t, rdr.Err()) - assert.EqualValues(t, 4, rec.NumRows()) - } - - assert.Equal(t, 4, n) - assert.NoError(t, rdr.Err()) -} - -func TestUnmarshalJSON(t *testing.T) { - schema := arrow.NewSchema([]arrow.Field{ - {Name: "region", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "model", Type: arrow.BinaryTypes.String}, - {Name: "sales", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, nil) - - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - recordBuilder := array.NewRecordBuilder(mem, schema) - defer recordBuilder.Release() - - jsondata := `{"region": "NY", "model": "3", "sales": 742.0, "extra": 1234}` - - err := recordBuilder.UnmarshalJSON([]byte(jsondata)) - assert.NoError(t, err) - - record := recordBuilder.NewRecord() - defer record.Release() - - assert.NotNil(t, record) -} diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go deleted file mode 100644 index 1e2191f2cfc3a..0000000000000 --- a/go/arrow/array/list.go +++ /dev/null @@ -1,1574 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type ListLike interface { - arrow.Array - ListValues() arrow.Array - ValueOffsets(i int) (start, end int64) -} - -type VarLenListLike interface { - ListLike -} - -// List represents an immutable sequence of array values. -type List struct { - array - values arrow.Array - offsets []int32 -} - -var _ ListLike = (*List)(nil) - -// NewListData returns a new List array value, from data. -func NewListData(data arrow.ArrayData) *List { - a := &List{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *List) ListValues() arrow.Array { return a.values } - -func (a *List) ValueStr(i int) string { - if !a.IsValid(i) { - return NullValueStr - } - return string(a.GetOneForMarshal(i).(json.RawMessage)) -} - -func (a *List) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - if a.IsNull(i) { - o.WriteString(NullValueStr) - continue - } - sub := a.newListValue(i) - fmt.Fprintf(o, "%v", sub) - sub.Release() - } - o.WriteString("]") - return o.String() -} - -func (a *List) newListValue(i int) arrow.Array { - beg, end := a.ValueOffsets(i) - return NewSlice(a.values, beg, end) -} - -func (a *List) setData(data *Data) { - debug.Assert(len(data.buffers) >= 2, "list data should have 2 buffers") - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.offsets = arrow.Int32Traits.CastFromBytes(vals.Bytes()) - } - a.values = MakeFromData(data.childData[0]) -} - -func (a *List) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - slice := a.newListValue(i) - defer slice.Release() - v, err := json.Marshal(slice) - if err != nil { - panic(err) - } - return json.RawMessage(v) -} - -func (a *List) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func arrayEqualList(left, right *List) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return Equal(l, r) - }() - if !o { - return false - } - } - return true -} - -// Len returns the number of elements in the array. -func (a *List) Len() int { return a.array.Len() } - -func (a *List) Offsets() []int32 { return a.offsets } - -func (a *List) Retain() { - a.array.Retain() - a.values.Retain() -} - -func (a *List) Release() { - a.array.Release() - a.values.Release() -} - -func (a *List) ValueOffsets(i int) (start, end int64) { - debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") - j := i + a.array.data.offset - start, end = int64(a.offsets[j]), int64(a.offsets[j+1]) - return -} - -// LargeList represents an immutable sequence of array values. -type LargeList struct { - array - values arrow.Array - offsets []int64 -} - -var _ ListLike = (*LargeList)(nil) - -// NewLargeListData returns a new LargeList array value, from data. -func NewLargeListData(data arrow.ArrayData) *LargeList { - a := new(LargeList) - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *LargeList) ListValues() arrow.Array { return a.values } - -func (a *LargeList) ValueStr(i int) string { - if !a.IsValid(i) { - return NullValueStr - } - return string(a.GetOneForMarshal(i).(json.RawMessage)) -} - -func (a *LargeList) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - if a.IsNull(i) { - o.WriteString(NullValueStr) - continue - } - sub := a.newListValue(i) - fmt.Fprintf(o, "%v", sub) - sub.Release() - } - o.WriteString("]") - return o.String() -} - -func (a *LargeList) newListValue(i int) arrow.Array { - beg, end := a.ValueOffsets(i) - return NewSlice(a.values, beg, end) -} - -func (a *LargeList) setData(data *Data) { - debug.Assert(len(data.buffers) >= 2, "list data should have 2 buffers") - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.offsets = arrow.Int64Traits.CastFromBytes(vals.Bytes()) - } - a.values = MakeFromData(data.childData[0]) -} - -func (a *LargeList) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - slice := a.newListValue(i) - defer slice.Release() - v, err := json.Marshal(slice) - if err != nil { - panic(err) - } - return json.RawMessage(v) -} - -func (a *LargeList) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func arrayEqualLargeList(left, right *LargeList) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return Equal(l, r) - }() - if !o { - return false - } - } - return true -} - -// Len returns the number of elements in the array. -func (a *LargeList) Len() int { return a.array.Len() } - -func (a *LargeList) Offsets() []int64 { return a.offsets } - -func (a *LargeList) ValueOffsets(i int) (start, end int64) { - debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") - j := i + a.array.data.offset - start, end = a.offsets[j], a.offsets[j+1] - return -} - -func (a *LargeList) Retain() { - a.array.Retain() - a.values.Retain() -} - -func (a *LargeList) Release() { - a.array.Release() - a.values.Release() -} - -type baseListBuilder struct { - builder - - values Builder // value builder for the list's elements. - offsets Builder - - // actual list type - dt arrow.DataType - appendOffsetVal func(int) -} - -type ListLikeBuilder interface { - Builder - ValueBuilder() Builder - Append(bool) -} - -type VarLenListLikeBuilder interface { - ListLikeBuilder - AppendWithSize(bool, int) -} - -type ListBuilder struct { - baseListBuilder -} - -type LargeListBuilder struct { - baseListBuilder -} - -// NewListBuilder returns a builder, using the provided memory allocator. -// The created list builder will create a list whose elements will be of type etype. -func NewListBuilder(mem memory.Allocator, etype arrow.DataType) *ListBuilder { - offsetBldr := NewInt32Builder(mem) - return &ListBuilder{ - baseListBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, etype), - offsets: offsetBldr, - dt: arrow.ListOf(etype), - appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, - }, - } -} - -// NewListBuilderWithField takes a field to use for the child rather than just -// a datatype to allow for more customization. -func NewListBuilderWithField(mem memory.Allocator, field arrow.Field) *ListBuilder { - offsetBldr := NewInt32Builder(mem) - return &ListBuilder{ - baseListBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, field.Type), - offsets: offsetBldr, - dt: arrow.ListOfField(field), - appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, - }, - } -} - -func (b *baseListBuilder) Type() arrow.DataType { - switch dt := b.dt.(type) { - case *arrow.ListType: - f := dt.ElemField() - f.Type = b.values.Type() - return arrow.ListOfField(f) - case *arrow.LargeListType: - f := dt.ElemField() - f.Type = b.values.Type() - return arrow.LargeListOfField(f) - } - return nil -} - -// NewLargeListBuilder returns a builder, using the provided memory allocator. -// The created list builder will create a list whose elements will be of type etype. -func NewLargeListBuilder(mem memory.Allocator, etype arrow.DataType) *LargeListBuilder { - offsetBldr := NewInt64Builder(mem) - return &LargeListBuilder{ - baseListBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, etype), - offsets: offsetBldr, - dt: arrow.LargeListOf(etype), - appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, - }, - } -} - -// NewLargeListBuilderWithField takes a field rather than just an element type -// to allow for more customization of the final type of the LargeList Array -func NewLargeListBuilderWithField(mem memory.Allocator, field arrow.Field) *LargeListBuilder { - offsetBldr := NewInt64Builder(mem) - return &LargeListBuilder{ - baseListBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, field.Type), - offsets: offsetBldr, - dt: arrow.LargeListOfField(field), - appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, - }, - } -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *baseListBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - b.values.Release() - b.offsets.Release() - } - -} - -func (b *baseListBuilder) appendNextOffset() { - b.appendOffsetVal(b.values.Len()) -} - -func (b *baseListBuilder) Append(v bool) { - b.Reserve(1) - b.unsafeAppendBoolToBitmap(v) - b.appendNextOffset() -} - -func (b *baseListBuilder) AppendWithSize(v bool, _ int) { - b.Append(v) -} - -func (b *baseListBuilder) AppendNull() { - b.Reserve(1) - b.unsafeAppendBoolToBitmap(false) - b.appendNextOffset() -} - -func (b *baseListBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *baseListBuilder) AppendEmptyValue() { - b.Append(true) -} - -func (b *baseListBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *ListBuilder) AppendValues(offsets []int32, valid []bool) { - b.Reserve(len(valid)) - b.offsets.(*Int32Builder).AppendValues(offsets, nil) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) -} - -func (b *LargeListBuilder) AppendValues(offsets []int64, valid []bool) { - b.Reserve(len(valid)) - b.offsets.(*Int64Builder).AppendValues(offsets, nil) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) -} - -func (b *baseListBuilder) unsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -func (b *baseListBuilder) init(capacity int) { - b.builder.init(capacity) - b.offsets.init(capacity + 1) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *baseListBuilder) Reserve(n int) { - b.builder.reserve(n, b.resizeHelper) - b.offsets.Reserve(n) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *baseListBuilder) Resize(n int) { - b.resizeHelper(n) - b.offsets.Resize(n) -} - -func (b *baseListBuilder) resizeHelper(n int) { - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(n, b.builder.init) - } -} - -func (b *baseListBuilder) ValueBuilder() Builder { - return b.values -} - -// NewArray creates a List array from the memory buffers used by the builder and resets the ListBuilder -// so it can be used to build a new array. -func (b *ListBuilder) NewArray() arrow.Array { - return b.NewListArray() -} - -// NewArray creates a LargeList array from the memory buffers used by the builder and resets the LargeListBuilder -// so it can be used to build a new array. -func (b *LargeListBuilder) NewArray() arrow.Array { - return b.NewLargeListArray() -} - -// NewListArray creates a List array from the memory buffers used by the builder and resets the ListBuilder -// so it can be used to build a new array. -func (b *ListBuilder) NewListArray() (a *List) { - data := b.newData() - a = NewListData(data) - data.Release() - return -} - -// NewLargeListArray creates a List array from the memory buffers used by the builder and resets the LargeListBuilder -// so it can be used to build a new array. -func (b *LargeListBuilder) NewLargeListArray() (a *LargeList) { - data := b.newData() - a = NewLargeListData(data) - data.Release() - return -} - -func (b *baseListBuilder) newData() (data *Data) { - if b.offsets.Len() != b.length+1 { - b.appendNextOffset() - } - values := b.values.NewArray() - defer values.Release() - - var offsets *memory.Buffer - if b.offsets != nil { - arr := b.offsets.NewArray() - defer arr.Release() - offsets = arr.Data().Buffers()[1] - } - - data = NewData( - b.Type(), b.length, - []*memory.Buffer{ - b.nullBitmap, - offsets, - }, - []arrow.ArrayData{values.Data()}, - b.nulls, - 0, - ) - b.reset() - - return -} - -func (b *baseListBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - return b.UnmarshalOne(json.NewDecoder(strings.NewReader(s))) -} - -func (b *baseListBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t { - case json.Delim('['): - b.Append(true) - if err := b.values.Unmarshal(dec); err != nil { - return err - } - // consume ']' - _, err := dec.Token() - return err - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Struct: b.dt.String(), - } - } - - return nil -} - -func (b *baseListBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *baseListBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("list builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -// ListView represents an immutable sequence of array values defined by an -// offset into a child array and a length. -type ListView struct { - array - values arrow.Array - offsets []int32 - sizes []int32 -} - -var _ VarLenListLike = (*ListView)(nil) - -func NewListViewData(data arrow.ArrayData) *ListView { - a := &ListView{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *ListView) ListValues() arrow.Array { return a.values } - -func (a *ListView) ValueStr(i int) string { - if !a.IsValid(i) { - return NullValueStr - } - return string(a.GetOneForMarshal(i).(json.RawMessage)) -} - -func (a *ListView) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - if a.IsNull(i) { - o.WriteString(NullValueStr) - continue - } - sub := a.newListValue(i) - fmt.Fprintf(o, "%v", sub) - sub.Release() - } - o.WriteString("]") - return o.String() -} - -func (a *ListView) newListValue(i int) arrow.Array { - beg, end := a.ValueOffsets(i) - return NewSlice(a.values, beg, end) -} - -func (a *ListView) setData(data *Data) { - debug.Assert(len(data.buffers) >= 3, "list-view data should have 3 buffers") - a.array.setData(data) - offsets := data.buffers[1] - if offsets != nil { - a.offsets = arrow.Int32Traits.CastFromBytes(offsets.Bytes()) - } - sizes := data.buffers[2] - if sizes != nil { - a.sizes = arrow.Int32Traits.CastFromBytes(sizes.Bytes()) - } - a.values = MakeFromData(data.childData[0]) -} - -func (a *ListView) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - slice := a.newListValue(i) - defer slice.Release() - v, err := json.Marshal(slice) - if err != nil { - panic(err) - } - return json.RawMessage(v) -} - -func (a *ListView) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func arrayEqualListView(left, right *ListView) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return Equal(l, r) - }() - if !o { - return false - } - } - return true -} - -// Len returns the number of elements in the array. -func (a *ListView) Len() int { return a.array.Len() } - -func (a *ListView) Offsets() []int32 { return a.offsets } - -func (a *ListView) Sizes() []int32 { return a.sizes } - -func (a *ListView) Retain() { - a.array.Retain() - a.values.Retain() -} - -func (a *ListView) Release() { - a.array.Release() - a.values.Release() -} - -func (a *ListView) ValueOffsets(i int) (start, end int64) { - debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") - j := i + a.array.data.offset - size := int64(a.sizes[j]) - // If size is 0, skip accessing offsets. - if size == 0 { - start, end = 0, 0 - return - } - start = int64(a.offsets[j]) - end = start + size - return -} - -// LargeListView represents an immutable sequence of array values defined by an -// offset into a child array and a length. -type LargeListView struct { - array - values arrow.Array - offsets []int64 - sizes []int64 -} - -var _ VarLenListLike = (*LargeListView)(nil) - -// NewLargeListViewData returns a new LargeListView array value, from data. -func NewLargeListViewData(data arrow.ArrayData) *LargeListView { - a := new(LargeListView) - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *LargeListView) ListValues() arrow.Array { return a.values } - -func (a *LargeListView) ValueStr(i int) string { - if !a.IsValid(i) { - return NullValueStr - } - return string(a.GetOneForMarshal(i).(json.RawMessage)) -} - -func (a *LargeListView) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - if a.IsNull(i) { - o.WriteString(NullValueStr) - continue - } - sub := a.newListValue(i) - fmt.Fprintf(o, "%v", sub) - sub.Release() - } - o.WriteString("]") - return o.String() -} - -func (a *LargeListView) newListValue(i int) arrow.Array { - beg, end := a.ValueOffsets(i) - return NewSlice(a.values, beg, end) -} - -func (a *LargeListView) setData(data *Data) { - debug.Assert(len(data.buffers) >= 3, "list-view data should have 3 buffers") - a.array.setData(data) - offsets := data.buffers[1] - if offsets != nil { - a.offsets = arrow.Int64Traits.CastFromBytes(offsets.Bytes()) - } - sizes := data.buffers[2] - if sizes != nil { - a.sizes = arrow.Int64Traits.CastFromBytes(sizes.Bytes()) - } - a.values = MakeFromData(data.childData[0]) -} - -func (a *LargeListView) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - slice := a.newListValue(i) - defer slice.Release() - v, err := json.Marshal(slice) - if err != nil { - panic(err) - } - return json.RawMessage(v) -} - -func (a *LargeListView) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func arrayEqualLargeListView(left, right *LargeListView) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - o := func() bool { - l := left.newListValue(i) - defer l.Release() - r := right.newListValue(i) - defer r.Release() - return Equal(l, r) - }() - if !o { - return false - } - } - return true -} - -// Len returns the number of elements in the array. -func (a *LargeListView) Len() int { return a.array.Len() } - -func (a *LargeListView) Offsets() []int64 { return a.offsets } - -func (a *LargeListView) Sizes() []int64 { return a.sizes } - -func (a *LargeListView) ValueOffsets(i int) (start, end int64) { - debug.Assert(i >= 0 && i < a.array.data.length, "index out of range") - j := i + a.array.data.offset - size := a.sizes[j] - // If size is 0, skip accessing offsets. - if size == 0 { - return 0, 0 - } - start = a.offsets[j] - end = start + size - return -} - -func (a *LargeListView) Retain() { - a.array.Retain() - a.values.Retain() -} - -func (a *LargeListView) Release() { - a.array.Release() - a.values.Release() -} - -// Accessors for offsets and sizes to make ListView and LargeListView validation generic. -type offsetsAndSizes interface { - offsetAt(slot int64) int64 - sizeAt(slot int64) int64 -} - -var _ offsetsAndSizes = (*ListView)(nil) -var _ offsetsAndSizes = (*LargeListView)(nil) - -func (a *ListView) offsetAt(slot int64) int64 { return int64(a.offsets[int64(a.data.offset)+slot]) } - -func (a *ListView) sizeAt(slot int64) int64 { return int64(a.sizes[int64(a.data.offset)+slot]) } - -func (a *LargeListView) offsetAt(slot int64) int64 { return a.offsets[int64(a.data.offset)+slot] } - -func (a *LargeListView) sizeAt(slot int64) int64 { return a.sizes[int64(a.data.offset)+slot] } - -func outOfBoundsListViewOffset(l offsetsAndSizes, slot int64, offsetLimit int64) error { - offset := l.offsetAt(slot) - return fmt.Errorf("%w: Offset invariant failure: offset for slot %d out of bounds. Expected %d to be at least 0 and less than %d", arrow.ErrInvalid, slot, offset, offsetLimit) -} - -func outOfBoundsListViewSize(l offsetsAndSizes, slot int64, offsetLimit int64) error { - size := l.sizeAt(slot) - if size < 0 { - return fmt.Errorf("%w: Offset invariant failure: size for slot %d out of bounds: %d < 0", arrow.ErrInvalid, slot, size) - } - offset := l.offsetAt(slot) - return fmt.Errorf("%w: Offset invariant failure: size for slot %d out of bounds: %d + %d > %d", arrow.ErrInvalid, slot, offset, size, offsetLimit) -} - -// Pre-condition: Basic validation has already been performed -func (a *array) fullyValidateOffsetsAndSizes(l offsetsAndSizes, offsetLimit int64) error { - for slot := int64(0); slot < int64(a.Len()); slot += 1 { - size := l.sizeAt(slot) - if size > 0 { - offset := l.offsetAt(slot) - if offset < 0 || offset > offsetLimit { - return outOfBoundsListViewOffset(l, slot, offsetLimit) - } - if size > offsetLimit-int64(offset) { - return outOfBoundsListViewSize(l, slot, offsetLimit) - } - } else if size < 0 { - return outOfBoundsListViewSize(l, slot, offsetLimit) - } - } - - return nil -} - -func (a *array) validateOffsetsAndMaybeSizes(l offsetsAndSizes, offsetByteWidth int, isListView bool, offsetLimit int64, fullValidation bool) error { - nonEmpty := a.Len() > 0 - if a.data.buffers[1] == nil { - // For length 0, an empty offsets buffer is accepted (ARROW-544). - if nonEmpty { - return fmt.Errorf("non-empty array but offsets are null") - } - return nil - } - if isListView && a.data.buffers[2] == nil { - if nonEmpty { - return fmt.Errorf("non-empty array but sizes are null") - } - return nil - } - - var requiredOffsets int - if nonEmpty { - requiredOffsets = a.Len() + a.Offset() - if !isListView { - requiredOffsets += 1 - } - } else { - requiredOffsets = 0 - } - offsetsByteSize := a.data.buffers[1].Len() - if offsetsByteSize/offsetByteWidth < requiredOffsets { - return fmt.Errorf("offsets buffer size (bytes): %d isn't large enough for length: %d and offset: %d", - offsetsByteSize, a.Len(), a.Offset()) - } - if isListView { - requiredSizes := a.Len() + a.Offset() - sizesBytesSize := a.data.buffers[2].Len() - if sizesBytesSize/offsetByteWidth < requiredSizes { - return fmt.Errorf("sizes buffer size (bytes): %d isn't large enough for length: %d and offset: %d", - sizesBytesSize, a.Len(), a.Offset()) - } - } - - if fullValidation && requiredOffsets > 0 { - if isListView { - return a.fullyValidateOffsetsAndSizes(l, offsetLimit) - } - // TODO: implement validation of List and LargeList - // return fullyValidateOffsets(offset_limit) - return nil - } - return nil -} - -func (a *ListView) validate(fullValidation bool) error { - values := a.array.data.childData[0] - offsetLimit := values.Len() - return a.array.validateOffsetsAndMaybeSizes(a, 4, true, int64(offsetLimit), fullValidation) -} - -func (a *ListView) Validate() error { - return a.validate(false) -} - -func (a *ListView) ValidateFull() error { - return a.validate(true) -} - -func (a *LargeListView) validate(fullValidation bool) error { - values := a.array.data.childData[0] - offsetLimit := values.Len() - return a.array.validateOffsetsAndMaybeSizes(a, 8, true, int64(offsetLimit), fullValidation) -} - -func (a *LargeListView) Validate() error { - return a.validate(false) -} - -func (a *LargeListView) ValidateFull() error { - return a.validate(true) -} - -type baseListViewBuilder struct { - builder - - values Builder // value builder for the list-view's elements. - offsets Builder - sizes Builder - - // actual list-view type - dt arrow.DataType - appendOffsetVal func(int) - appendSizeVal func(int) -} - -type ListViewBuilder struct { - baseListViewBuilder -} - -type LargeListViewBuilder struct { - baseListViewBuilder -} - -// NewListViewBuilder returns a builder, using the provided memory allocator. -// The created list-view builder will create a list whose elements will be -// of type etype. -func NewListViewBuilder(mem memory.Allocator, etype arrow.DataType) *ListViewBuilder { - offsetBldr := NewInt32Builder(mem) - sizeBldr := NewInt32Builder(mem) - return &ListViewBuilder{ - baseListViewBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, etype), - offsets: offsetBldr, - sizes: sizeBldr, - dt: arrow.ListViewOf(etype), - appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, - appendSizeVal: func(s int) { sizeBldr.Append(int32(s)) }, - }, - } -} - -// NewListViewBuilderWithField takes a field to use for the child rather than just -// a datatype to allow for more customization. -func NewListViewBuilderWithField(mem memory.Allocator, field arrow.Field) *ListViewBuilder { - offsetBldr := NewInt32Builder(mem) - sizeBldr := NewInt32Builder(mem) - return &ListViewBuilder{ - baseListViewBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, field.Type), - offsets: offsetBldr, - sizes: sizeBldr, - dt: arrow.ListViewOfField(field), - appendOffsetVal: func(o int) { offsetBldr.Append(int32(o)) }, - appendSizeVal: func(s int) { sizeBldr.Append(int32(s)) }, - }, - } -} - -func (b *baseListViewBuilder) Type() arrow.DataType { - switch dt := b.dt.(type) { - case *arrow.ListViewType: - f := dt.ElemField() - f.Type = b.values.Type() - return arrow.ListViewOfField(f) - case *arrow.LargeListViewType: - f := dt.ElemField() - f.Type = b.values.Type() - return arrow.LargeListViewOfField(f) - } - return nil -} - -// NewLargeListViewBuilder returns a builder, using the provided memory allocator. -// The created list-view builder will create a list whose elements will be of type etype. -func NewLargeListViewBuilder(mem memory.Allocator, etype arrow.DataType) *LargeListViewBuilder { - offsetBldr := NewInt64Builder(mem) - sizeBldr := NewInt64Builder(mem) - return &LargeListViewBuilder{ - baseListViewBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, etype), - offsets: offsetBldr, - sizes: sizeBldr, - dt: arrow.LargeListViewOf(etype), - appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, - appendSizeVal: func(s int) { sizeBldr.Append(int64(s)) }, - }, - } -} - -// NewLargeListViewBuilderWithField takes a field rather than just an element type -// to allow for more customization of the final type of the LargeListView Array -func NewLargeListViewBuilderWithField(mem memory.Allocator, field arrow.Field) *LargeListViewBuilder { - offsetBldr := NewInt64Builder(mem) - sizeBldr := NewInt64Builder(mem) - return &LargeListViewBuilder{ - baseListViewBuilder{ - builder: builder{refCount: 1, mem: mem}, - values: NewBuilder(mem, field.Type), - offsets: offsetBldr, - sizes: sizeBldr, - dt: arrow.LargeListViewOfField(field), - appendOffsetVal: func(o int) { offsetBldr.Append(int64(o)) }, - appendSizeVal: func(o int) { sizeBldr.Append(int64(o)) }, - }, - } -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *baseListViewBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - b.values.Release() - b.offsets.Release() - b.sizes.Release() - } -} - -func (b *baseListViewBuilder) AppendDimensions(offset int, listSize int) { - b.Reserve(1) - b.unsafeAppendBoolToBitmap(true) - b.appendOffsetVal(offset) - b.appendSizeVal(listSize) -} - -func (b *baseListViewBuilder) Append(v bool) { - debug.Assert(false, "baseListViewBuilder.Append should never be called -- use AppendWithSize instead") -} - -func (b *baseListViewBuilder) AppendWithSize(v bool, listSize int) { - debug.Assert(v || listSize == 0, "invalid list-view should have size 0") - b.Reserve(1) - b.unsafeAppendBoolToBitmap(v) - b.appendOffsetVal(b.values.Len()) - b.appendSizeVal(listSize) -} - -func (b *baseListViewBuilder) AppendNull() { - b.AppendWithSize(false, 0) -} - -func (b *baseListViewBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *baseListViewBuilder) AppendEmptyValue() { - b.AppendWithSize(true, 0) -} - -func (b *baseListViewBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *ListViewBuilder) AppendValuesWithSizes(offsets []int32, sizes []int32, valid []bool) { - b.Reserve(len(valid)) - b.offsets.(*Int32Builder).AppendValues(offsets, nil) - b.sizes.(*Int32Builder).AppendValues(sizes, nil) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) -} - -func (b *LargeListViewBuilder) AppendValuesWithSizes(offsets []int64, sizes []int64, valid []bool) { - b.Reserve(len(valid)) - b.offsets.(*Int64Builder).AppendValues(offsets, nil) - b.sizes.(*Int64Builder).AppendValues(sizes, nil) - b.builder.unsafeAppendBoolsToBitmap(valid, len(valid)) -} - -func (b *baseListViewBuilder) unsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -func (b *baseListViewBuilder) init(capacity int) { - b.builder.init(capacity) - b.offsets.init(capacity) - b.sizes.init(capacity) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *baseListViewBuilder) Reserve(n int) { - b.builder.reserve(n, b.resizeHelper) - b.offsets.Reserve(n) - b.sizes.Reserve(n) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *baseListViewBuilder) Resize(n int) { - b.resizeHelper(n) - b.offsets.Resize(n) - b.sizes.Resize(n) -} - -func (b *baseListViewBuilder) resizeHelper(n int) { - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(n, b.builder.init) - } -} - -func (b *baseListViewBuilder) ValueBuilder() Builder { - return b.values -} - -// NewArray creates a ListView array from the memory buffers used by the builder and -// resets the ListViewBuilder so it can be used to build a new array. -func (b *ListViewBuilder) NewArray() arrow.Array { - return b.NewListViewArray() -} - -// NewArray creates a LargeListView array from the memory buffers used by the builder -// and resets the LargeListViewBuilder so it can be used to build a new array. -func (b *LargeListViewBuilder) NewArray() arrow.Array { - return b.NewLargeListViewArray() -} - -// NewListViewArray creates a ListView array from the memory buffers used by the builder -// and resets the ListViewBuilder so it can be used to build a new array. -func (b *ListViewBuilder) NewListViewArray() (a *ListView) { - data := b.newData() - a = NewListViewData(data) - data.Release() - return -} - -// NewLargeListViewArray creates a ListView array from the memory buffers used by the -// builder and resets the LargeListViewBuilder so it can be used to build a new array. -func (b *LargeListViewBuilder) NewLargeListViewArray() (a *LargeListView) { - data := b.newData() - a = NewLargeListViewData(data) - data.Release() - return -} - -func (b *baseListViewBuilder) newData() (data *Data) { - values := b.values.NewArray() - defer values.Release() - - var offsets *memory.Buffer - if b.offsets != nil { - arr := b.offsets.NewArray() - defer arr.Release() - offsets = arr.Data().Buffers()[1] - } - - var sizes *memory.Buffer - if b.sizes != nil { - arr := b.sizes.NewArray() - defer arr.Release() - sizes = arr.Data().Buffers()[1] - } - - data = NewData( - b.Type(), b.length, - []*memory.Buffer{ - b.nullBitmap, - offsets, - sizes, - }, - []arrow.ArrayData{values.Data()}, - b.nulls, - 0, - ) - b.reset() - - return -} - -func (b *baseListViewBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - return b.UnmarshalOne(json.NewDecoder(strings.NewReader(s))) -} - -func (b *baseListViewBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t { - case json.Delim('['): - offset := b.values.Len() - // 0 is a placeholder size as we don't know the actual size yet - b.AppendWithSize(true, 0) - if err := b.values.Unmarshal(dec); err != nil { - return err - } - // consume ']' - _, err := dec.Token() - // replace the last size with the actual size - switch b.sizes.(type) { - case *Int32Builder: - b.sizes.(*Int32Builder).rawData[b.sizes.Len()-1] = int32(b.values.Len() - offset) - case *Int64Builder: - b.sizes.(*Int64Builder).rawData[b.sizes.Len()-1] = int64(b.values.Len() - offset) - } - return err - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Struct: b.dt.String(), - } - } - - return nil -} - -func (b *baseListViewBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *baseListViewBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("list-view builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -// Find the minimum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array. -// -// Pre-conditions: -// -// input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 -// input.Len() > 0 && input.NullN() != input.Len() -func minListViewOffset[Offset int32 | int64](input arrow.ArrayData) Offset { - var bitmap []byte - if input.Buffers()[0] != nil { - bitmap = input.Buffers()[0].Bytes() - } - offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[input.Offset():] - sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[input.Offset():] - - isNull := func(i int) bool { - return bitmap != nil && bitutil.BitIsNotSet(bitmap, input.Offset()+i) - } - - // It's very likely that the first non-null non-empty list-view starts at - // offset 0 of the child array. - i := 0 - for i < input.Len() && (isNull(i) || sizes[i] == 0) { - i += 1 - } - if i >= input.Len() { - return 0 - } - minOffset := offsets[i] - if minOffset == 0 { - // early exit: offset 0 found already - return 0 - } - - // Slow path: scan the buffers entirely. - i += 1 - for ; i < input.Len(); i += 1 { - if isNull(i) { - continue - } - offset := offsets[i] - if offset < minOffset && sizes[i] > 0 { - minOffset = offset - } - } - return minOffset -} - -// Find the maximum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array. -// -// Pre-conditions: -// -// input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 -// input.Len() > 0 && input.NullN() != input.Len() -func maxListViewEnd[Offset int32 | int64](input arrow.ArrayData) Offset { - inputOffset := input.Offset() - var bitmap []byte - if input.Buffers()[0] != nil { - bitmap = input.Buffers()[0].Bytes() - } - offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[inputOffset:] - sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[inputOffset:] - - isNull := func(i int) bool { - return bitmap != nil && bitutil.BitIsNotSet(bitmap, inputOffset+i) - } - - i := input.Len() - 1 // safe because input.Len() > 0 - for i != 0 && (isNull(i) || sizes[i] == 0) { - i -= 1 - } - offset := offsets[i] - size := sizes[i] - if i == 0 { - if isNull(i) || sizes[i] == 0 { - return 0 - } else { - return offset + size - } - } - - values := input.Children()[0] - maxEnd := offsets[i] + sizes[i] - if maxEnd == Offset(values.Len()) { - // Early-exit: maximum possible view-end found already. - return maxEnd - } - - // Slow path: scan the buffers entirely. - for ; i >= 0; i -= 1 { - offset := offsets[i] - size := sizes[i] - if size > 0 && !isNull(i) { - if offset+size > maxEnd { - maxEnd = offset + size - if maxEnd == Offset(values.Len()) { - return maxEnd - } - } - } - } - return maxEnd -} - -func rangeOfValuesUsed(input arrow.ArrayData) (int, int) { - if input.Len() == 0 || input.NullN() == input.Len() { - return 0, 0 - } - var minOffset, maxEnd int - switch input.DataType().(type) { - case *arrow.ListViewType: - minOffset = int(minListViewOffset[int32](input)) - maxEnd = int(maxListViewEnd[int32](input)) - case *arrow.LargeListViewType: - minOffset = int(minListViewOffset[int64](input)) - maxEnd = int(maxListViewEnd[int64](input)) - case *arrow.ListType: - offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] - minOffset = int(offsets[0]) - maxEnd = int(offsets[len(offsets)-1]) - case *arrow.LargeListType: - offsets := arrow.Int64Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] - minOffset = int(offsets[0]) - maxEnd = int(offsets[len(offsets)-1]) - case *arrow.MapType: - offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] - minOffset = int(offsets[0]) - maxEnd = int(offsets[len(offsets)-1]) - } - return minOffset, maxEnd - minOffset -} - -// Returns the smallest contiguous range of values of the child array that are -// referenced by all the list values in the input array. -func RangeOfValuesUsed(input VarLenListLike) (int, int) { - return rangeOfValuesUsed(input.Data()) -} - -var ( - _ arrow.Array = (*List)(nil) - _ arrow.Array = (*LargeList)(nil) - _ arrow.Array = (*ListView)(nil) - _ arrow.Array = (*LargeListView)(nil) - - _ Builder = (*ListBuilder)(nil) - _ Builder = (*LargeListBuilder)(nil) - _ Builder = (*ListViewBuilder)(nil) - _ Builder = (*LargeListViewBuilder)(nil) - - _ VarLenListLike = (*List)(nil) - _ VarLenListLike = (*LargeList)(nil) - _ VarLenListLike = (*Map)(nil) - _ VarLenListLike = (*ListView)(nil) - _ VarLenListLike = (*LargeListView)(nil) - _ ListLike = (*FixedSizeList)(nil) - - _ VarLenListLikeBuilder = (*ListBuilder)(nil) - _ VarLenListLikeBuilder = (*LargeListBuilder)(nil) - _ VarLenListLikeBuilder = (*ListBuilder)(nil) - _ VarLenListLikeBuilder = (*LargeListBuilder)(nil) - _ VarLenListLikeBuilder = (*MapBuilder)(nil) - _ ListLikeBuilder = (*FixedSizeListBuilder)(nil) -) diff --git a/go/arrow/array/list_test.go b/go/arrow/array/list_test.go deleted file mode 100644 index f6f42a31299e4..0000000000000 --- a/go/arrow/array/list_test.go +++ /dev/null @@ -1,864 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestListArray(t *testing.T) { - tests := []struct { - typeID arrow.Type - offsets interface{} - sizes interface{} - dt arrow.DataType - }{ - {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOfField(arrow.Field{Name: "item", Type: arrow.PrimitiveTypes.Int32, Nullable: true})}, - {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOfField(arrow.Field{Name: "item", Type: arrow.PrimitiveTypes.Int32, Nullable: true})}, - {arrow.LIST_VIEW, []int32{0, 3, 3, 3}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST_VIEW, []int64{0, 3, 3, 3}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, - } - - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 0, 4} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer lb.Release() - - for i := 0; i < 10; i++ { - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - pos := 0 - for i, length := range lengths { - lb.AppendWithSize(isValid[i], length) - for j := 0; j < length; j++ { - vb.Append(vs[pos]) - pos++ - } - } - - arr := lb.NewArray().(array.ListLike) - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.DataType().ID(), tt.typeID; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), !isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - var gotOffsets, gotSizes interface{} - switch tt.typeID { - case arrow.LIST: - arr := arr.(*array.List) - gotOffsets = arr.Offsets() - case arrow.LARGE_LIST: - arr := arr.(*array.LargeList) - gotOffsets = arr.Offsets() - case arrow.LIST_VIEW: - arr := arr.(*array.ListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - case arrow.LARGE_LIST_VIEW: - arr := arr.(*array.LargeListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - } - - if !reflect.DeepEqual(gotOffsets, tt.offsets) { - t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) - } - - if tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW { - if !reflect.DeepEqual(gotSizes, tt.sizes) { - t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) - } - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - } - }) - } -} - -// Like the list-view tests in TestListArray, but with out-of-order offsets. -func TestListViewArray(t *testing.T) { - tests := []struct { - typeID arrow.Type - offsets interface{} - sizes interface{} - dt arrow.DataType - }{ - {arrow.LIST_VIEW, []int32{5, 0, 0, 1}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST_VIEW, []int64{5, 0, 0, 1}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, - } - - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{-1, 3, 4, 5, 6, 0, 1, 2} - lengths = []int{3, 0, 0, 4} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer lb.Release() - - for i := 0; i < 10; i++ { - switch lvb := lb.(type) { - case *array.ListViewBuilder: - lvb.AppendDimensions(5, 3) - lb.AppendNull() - lvb.AppendDimensions(0, 0) - lvb.AppendDimensions(1, 4) - case *array.LargeListViewBuilder: - lvb.AppendDimensions(5, 3) - lb.AppendNull() - lvb.AppendDimensions(0, 0) - lvb.AppendDimensions(1, 4) - } - - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - vb.AppendValues(vs, []bool{false, true, true, true, true, true, true, true}) - - arr := lb.NewArray().(array.ListLike) - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.DataType().ID(), tt.typeID; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), !isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - var gotOffsets, gotSizes interface{} - switch tt.typeID { - case arrow.LIST_VIEW: - arr := arr.(*array.ListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - case arrow.LARGE_LIST_VIEW: - arr := arr.(*array.LargeListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - } - - if !reflect.DeepEqual(gotOffsets, tt.offsets) { - t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) - } - - if !reflect.DeepEqual(gotSizes, tt.sizes) { - t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - } - }) - } -} - -func TestListArrayEmpty(t *testing.T) { - typ := []arrow.DataType{ - arrow.ListOf(arrow.PrimitiveTypes.Int32), - arrow.LargeListOf(arrow.PrimitiveTypes.Int32), - arrow.ListViewOf(arrow.PrimitiveTypes.Int32), - arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32), - } - - for _, dt := range typ { - t.Run(dt.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - lb := array.NewBuilder(pool, dt) - defer lb.Release() - arr := lb.NewArray() - defer arr.Release() - if got, want := arr.Len(), 0; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - }) - } -} - -func TestListArrayBulkAppend(t *testing.T) { - tests := []struct { - typeID arrow.Type - offsets interface{} - sizes interface{} - dt arrow.DataType - }{ - {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LIST_VIEW, []int32{0, 3, 3, 3}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST_VIEW, []int64{0, 3, 3, 3}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, - } - - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 0, 4} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer lb.Release() - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - switch tt.typeID { - case arrow.LIST: - lb.(*array.ListBuilder).AppendValues(tt.offsets.([]int32), isValid) - case arrow.LARGE_LIST: - lb.(*array.LargeListBuilder).AppendValues(tt.offsets.([]int64), isValid) - case arrow.LIST_VIEW: - lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) - case arrow.LARGE_LIST_VIEW: - lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) - } - for _, v := range vs { - vb.Append(v) - } - - arr := lb.NewArray().(array.VarLenListLike) - defer arr.Release() - - if got, want := arr.DataType().ID(), tt.typeID; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), !isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - var gotOffsets, gotSizes interface{} - switch tt.typeID { - case arrow.LIST: - arr := arr.(*array.List) - gotOffsets = arr.Offsets() - case arrow.LARGE_LIST: - arr := arr.(*array.LargeList) - gotOffsets = arr.Offsets() - case arrow.LIST_VIEW: - arr := arr.(*array.ListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - case arrow.LARGE_LIST_VIEW: - arr := arr.(*array.LargeListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - } - - if !reflect.DeepEqual(gotOffsets, tt.offsets) { - t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) - } - if tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW { - if !reflect.DeepEqual(gotSizes, tt.sizes) { - t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) - } - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - }) - } -} - -func TestListViewArrayBulkAppend(t *testing.T) { - tests := []struct { - typeID arrow.Type - offsets interface{} - sizes interface{} - dt arrow.DataType - }{ - {arrow.LIST_VIEW, []int32{5, 0, 0, 1}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST_VIEW, []int64{5, 0, 0, 1}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, - } - - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{-1, 3, 4, 5, 6, 0, 1, 2} - lengths = []int{3, 0, 0, 4} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer lb.Release() - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - switch tt.typeID { - case arrow.LIST_VIEW: - lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) - case arrow.LARGE_LIST_VIEW: - lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) - } - for _, v := range vs { - vb.Append(v) - } - - arr := lb.NewArray().(array.VarLenListLike) - defer arr.Release() - - if got, want := arr.DataType().ID(), tt.typeID; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), !isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - var gotOffsets, gotSizes interface{} - switch tt.typeID { - case arrow.LIST_VIEW: - arr := arr.(*array.ListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - case arrow.LARGE_LIST_VIEW: - arr := arr.(*array.LargeListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - } - - if !reflect.DeepEqual(gotOffsets, tt.offsets) { - t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) - } - if !reflect.DeepEqual(gotSizes, tt.sizes) { - t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - }) - } -} - -func TestListArraySlice(t *testing.T) { - tests := []struct { - typeID arrow.Type - offsets interface{} - sizes interface{} - dt arrow.DataType - }{ - {arrow.LIST, []int32{0, 3, 3, 3, 7}, nil, arrow.ListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST, []int64{0, 3, 3, 3, 7}, nil, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LIST_VIEW, []int32{0, 3, 3, 3, 7}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST_VIEW, []int64{0, 3, 3, 3, 7}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, - } - - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{0, 1, 2, 3, 4, 5, 6} - lengths = []int{3, 0, 0, 4} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer lb.Release() - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - switch tt.typeID { - case arrow.LIST: - lb.(*array.ListBuilder).AppendValues(tt.offsets.([]int32), isValid) - case arrow.LARGE_LIST: - lb.(*array.LargeListBuilder).AppendValues(tt.offsets.([]int64), isValid) - case arrow.LIST_VIEW: - lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) - case arrow.LARGE_LIST_VIEW: - lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) - } - for _, v := range vs { - vb.Append(v) - } - - arr := lb.NewArray().(array.VarLenListLike) - defer arr.Release() - - if got, want := arr.DataType().ID(), tt.typeID; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), !isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - var gotOffsets, gotSizes interface{} - switch tt.typeID { - case arrow.LIST: - arr := arr.(*array.List) - gotOffsets = arr.Offsets() - case arrow.LARGE_LIST: - arr := arr.(*array.LargeList) - gotOffsets = arr.Offsets() - case arrow.LIST_VIEW: - arr := arr.(*array.ListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - case arrow.LARGE_LIST_VIEW: - arr := arr.(*array.LargeListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - } - - if !reflect.DeepEqual(gotOffsets, tt.offsets) { - t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) - } - - if tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW { - if !reflect.DeepEqual(gotSizes, tt.sizes) { - t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) - } - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.String(), `[[0 1 2] (null) [] [3 4 5 6]]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - assert.Equal(t, "[0,1,2]", arr.ValueStr(0)) - - sub := array.NewSlice(arr, 1, 4).(array.ListLike) - defer sub.Release() - - if got, want := sub.String(), `[(null) [] [3 4 5 6]]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - }) - } -} - -func TestListViewArraySlice(t *testing.T) { - tests := []struct { - typeID arrow.Type - offsets interface{} - sizes interface{} - dt arrow.DataType - }{ - {arrow.LIST_VIEW, []int32{5, 0, 0, 1}, []int32{3, 0, 0, 4}, arrow.ListViewOf(arrow.PrimitiveTypes.Int32)}, - {arrow.LARGE_LIST_VIEW, []int64{5, 0, 0, 1}, []int64{3, 0, 0, 4}, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int32)}, - } - - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int32{-1, 3, 4, 5, 6, 0, 1, 2} - lengths = []int{3, 0, 0, 4} - isValid = []bool{true, false, true, true} - ) - - lb := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer lb.Release() - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Reserve(len(vs)) - - switch tt.typeID { - case arrow.LIST_VIEW: - lb.(*array.ListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int32), tt.sizes.([]int32), isValid) - case arrow.LARGE_LIST_VIEW: - lb.(*array.LargeListViewBuilder).AppendValuesWithSizes(tt.offsets.([]int64), tt.sizes.([]int64), isValid) - } - for _, v := range vs { - vb.Append(v) - } - - arr := lb.NewArray().(array.VarLenListLike) - defer arr.Release() - - if got, want := arr.DataType().ID(), tt.typeID; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range lengths { - if got, want := arr.IsValid(i), isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := arr.IsNull(i), !isValid[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - var gotOffsets, gotSizes interface{} - switch tt.typeID { - case arrow.LIST_VIEW: - arr := arr.(*array.ListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - case arrow.LARGE_LIST_VIEW: - arr := arr.(*array.LargeListView) - gotOffsets = arr.Offsets() - gotSizes = arr.Sizes() - } - - if !reflect.DeepEqual(gotOffsets, tt.offsets) { - t.Fatalf("got=%v, want=%v", gotOffsets, tt.offsets) - } - - if !reflect.DeepEqual(gotSizes, tt.sizes) { - t.Fatalf("got=%v, want=%v", gotSizes, tt.sizes) - } - - varr := arr.ListValues().(*array.Int32) - if got, want := varr.Int32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - if got, want := arr.String(), `[[0 1 2] (null) [] [3 4 5 6]]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - assert.Equal(t, "[0,1,2]", arr.ValueStr(0)) - - sub := array.NewSlice(arr, 1, 4).(array.ListLike) - defer sub.Release() - - if got, want := sub.String(), `[(null) [] [3 4 5 6]]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - }) - } -} - -func TestVarLenListLikeStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - builders := []array.VarLenListLikeBuilder{ - array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewLargeListBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - } - - builders1 := []array.VarLenListLikeBuilder{ - array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewLargeListBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - } - - for i, b := range builders { - defer b.Release() - - vb := b.ValueBuilder().(*array.Int32Builder) - - var values = [][]int32{ - {0, 1, 2, 3, 4, 5, 6}, - {1, 2, 3, 4, 5, 6, 7}, - {2, 3, 4, 5, 6, 7, 8}, - {3, 4, 5, 6, 7, 8, 9}, - } - for _, value := range values { - b.AppendNull() - b.AppendWithSize(true, 2*len(value)) - for _, el := range value { - vb.Append(el) - vb.AppendNull() - } - b.AppendWithSize(false, 0) - } - - arr := b.NewArray() - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := builders1[i] - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray() - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) - } -} - -// Test the string roun-trip for a list-view containing out-of-order offsets. -func TestListViewStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - builders := []array.VarLenListLikeBuilder{ - array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - } - - builders1 := []array.VarLenListLikeBuilder{ - array.NewListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - array.NewLargeListViewBuilder(mem, arrow.PrimitiveTypes.Int32), - } - - for i, b := range builders { - defer b.Release() - - switch lvb := b.(type) { - case *array.ListViewBuilder: - lvb.AppendDimensions(5, 3) - b.AppendNull() - lvb.AppendDimensions(0, 0) - lvb.AppendDimensions(1, 4) - case *array.LargeListViewBuilder: - lvb.AppendDimensions(5, 3) - b.AppendNull() - lvb.AppendDimensions(0, 0) - lvb.AppendDimensions(1, 4) - } - - vb := b.ValueBuilder().(*array.Int32Builder) - - vs := []int32{-1, 3, 4, 5, 6, 0, 1, 2} - isValid := []bool{false, true, true, true, true, true, true, true} - vb.Reserve(len(vs)) - vb.AppendValues(vs, isValid) - - arr := b.NewArray() - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := builders1[i] - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray() - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) - } -} - -func TestRangeOfValuesUsed(t *testing.T) { - tests := []struct { - typeID arrow.Type - dt arrow.DataType - }{ - {arrow.LIST, arrow.ListOf(arrow.PrimitiveTypes.Int16)}, - {arrow.LARGE_LIST, arrow.LargeListOf(arrow.PrimitiveTypes.Int16)}, - {arrow.LIST_VIEW, arrow.ListViewOf(arrow.PrimitiveTypes.Int16)}, - {arrow.LARGE_LIST_VIEW, arrow.LargeListViewOf(arrow.PrimitiveTypes.Int16)}, - } - for _, tt := range tests { - t.Run(tt.typeID.String(), func(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - isListView := tt.typeID == arrow.LIST_VIEW || tt.typeID == arrow.LARGE_LIST_VIEW - - bldr := array.NewBuilder(pool, tt.dt).(array.VarLenListLikeBuilder) - defer bldr.Release() - - var arr array.VarLenListLike - - // Empty array - arr = bldr.NewArray().(array.VarLenListLike) - defer arr.Release() - offset, len := array.RangeOfValuesUsed(arr) - assert.Equal(t, 0, offset) - assert.Equal(t, 0, len) - - // List-like array with only nulls - bldr.AppendNulls(3) - arr = bldr.NewArray().(array.VarLenListLike) - defer arr.Release() - offset, len = array.RangeOfValuesUsed(arr) - assert.Equal(t, 0, offset) - assert.Equal(t, 0, len) - - // Array with nulls and non-nulls (starting at a non-zero offset) - vb := bldr.ValueBuilder().(*array.Int16Builder) - vb.Append(-2) - vb.Append(-1) - bldr.AppendWithSize(false, 0) - bldr.AppendWithSize(true, 2) - vb.Append(0) - vb.Append(1) - bldr.AppendWithSize(true, 3) - vb.Append(2) - vb.Append(3) - vb.Append(4) - if isListView { - vb.Append(10) - vb.Append(11) - } - arr = bldr.NewArray().(array.VarLenListLike) - defer arr.Release() - offset, len = array.RangeOfValuesUsed(arr) - assert.Equal(t, 2, offset) - assert.Equal(t, 5, len) - - // Overlapping list-views - // [null, [0, 1, 2, 3, 4, 5], [1, 2], null, [4], null, null] - vb = bldr.ValueBuilder().(*array.Int16Builder) - vb.Append(-2) - vb.Append(-1) - bldr.AppendWithSize(false, 0) - if isListView { - bldr.AppendWithSize(true, 6) - vb.Append(0) - bldr.AppendWithSize(true, 2) - vb.Append(1) - vb.Append(2) - vb.Append(3) - bldr.AppendWithSize(false, 0) - bldr.AppendWithSize(true, 1) - vb.Append(4) - vb.Append(5) - // -- used range ends here -- - vb.Append(10) - vb.Append(11) - } else { - bldr.AppendWithSize(true, 6) - vb.Append(0) - vb.Append(1) - vb.Append(2) - vb.Append(3) - vb.Append(4) - vb.Append(5) - bldr.AppendWithSize(true, 2) - vb.Append(1) - vb.Append(2) - bldr.AppendWithSize(false, 0) - bldr.AppendWithSize(true, 1) - vb.Append(4) - } - bldr.AppendNulls(2) - arr = bldr.NewArray().(array.VarLenListLike) - defer arr.Release() - - // Check the range - offset, len = array.RangeOfValuesUsed(arr) - assert.Equal(t, 2, offset) - if isListView { - assert.Equal(t, 6, len) - } else { - assert.Equal(t, 9, len) - } - }) - } -} diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go deleted file mode 100644 index a692c2cd6d71a..0000000000000 --- a/go/arrow/array/map.go +++ /dev/null @@ -1,361 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// Map represents an immutable sequence of Key/Value structs. It is a -// logical type that is implemented as a List. -type Map struct { - *List - keys, items arrow.Array -} - -var _ ListLike = (*Map)(nil) - -// NewMapData returns a new Map array value, from data -func NewMapData(data arrow.ArrayData) *Map { - a := &Map{List: &List{}} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// KeysSorted checks the datatype that was used to construct this array and -// returns the KeysSorted boolean value used to denote if the key array is -// sorted for each list element. -// -// Important note: Nothing is enforced regarding the KeysSorted value, it is -// solely a metadata field that should be set if keys within each value are sorted. -// This value is not used at all in regards to comparisons / equality. -func (a *Map) KeysSorted() bool { return a.DataType().(*arrow.MapType).KeysSorted } - -func (a *Map) validateData(data *Data) { - if len(data.childData) != 1 || data.childData[0] == nil { - panic("arrow/array: expected one child array for map array") - } - - if data.childData[0].DataType().ID() != arrow.STRUCT { - panic("arrow/array: map array child should be struct type") - } - - if data.childData[0].NullN() != 0 { - panic("arrow/array: map array child array should have no nulls") - } - - if len(data.childData[0].Children()) != 2 { - panic("arrow/array: map array child array should have two fields") - } - - if data.childData[0].Children()[0].NullN() != 0 { - panic("arrow/array: map array keys array should have no nulls") - } -} - -func (a *Map) setData(data *Data) { - a.validateData(data) - - a.List.setData(data) - a.keys = MakeFromData(data.childData[0].Children()[0]) - a.items = MakeFromData(data.childData[0].Children()[1]) -} - -// Keys returns the full Array of Key values, equivalent to grabbing -// the key field of the child struct. -func (a *Map) Keys() arrow.Array { return a.keys } - -// Items returns the full Array of Item values, equivalent to grabbing -// the Value field (the second field) of the child struct. -func (a *Map) Items() arrow.Array { return a.items } - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (a *Map) Retain() { - a.List.Retain() - a.keys.Retain() - a.items.Retain() -} - -// Release decreases the reference count by 1. -// Release may be called simultaneously from multiple goroutines. -// When the reference count goes to zero, the memory is freed. -func (a *Map) Release() { - a.List.Release() - a.keys.Release() - a.items.Release() -} - -func arrayEqualMap(left, right *Map) bool { - // since Map is implemented using a list, we can just use arrayEqualList - return arrayEqualList(left.List, right.List) -} - -type MapBuilder struct { - listBuilder *ListBuilder - - etype *arrow.MapType - keytype, itemtype arrow.DataType - keyBuilder, itemBuilder Builder - keysSorted bool -} - -// NewMapBuilder returns a builder, using the provided memory allocator. -// The created Map builder will create a map array whose keys will be a non-nullable -// array of type `keytype` and whose mapped items will be a nullable array of itemtype. -// -// KeysSorted is not enforced at all by the builder, it should only be set to true -// building using keys in sorted order for each value. The KeysSorted value will just be -// used when creating the DataType for the map. -// -// # Example -// -// Simple example provided of converting a []map[string]int32 to an array.Map -// by using a MapBuilder: -// -// /* assume maplist == []map[string]int32 */ -// bldr := array.NewMapBuilder(memory.DefaultAllocator, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) -// defer bldr.Release() -// kb := bldr.KeyBuilder().(*array.StringBuilder) -// ib := bldr.ItemBuilder().(*array.Int32Builder) -// for _, m := range maplist { -// bldr.Append(true) -// for k, v := range m { -// kb.Append(k) -// ib.Append(v) -// } -// } -// maparr := bldr.NewMapArray() -// defer maparr.Release() -func NewMapBuilder(mem memory.Allocator, keytype, itemtype arrow.DataType, keysSorted bool) *MapBuilder { - etype := arrow.MapOf(keytype, itemtype) - etype.KeysSorted = keysSorted - listBldr := NewListBuilder(mem, etype.Elem()) - keyBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(0) - keyBldr.Retain() - itemBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(1) - itemBldr.Retain() - return &MapBuilder{ - listBuilder: listBldr, - keyBuilder: keyBldr, - itemBuilder: itemBldr, - etype: etype, - keytype: keytype, - itemtype: itemtype, - keysSorted: keysSorted, - } -} - -func NewMapBuilderWithType(mem memory.Allocator, dt *arrow.MapType) *MapBuilder { - listBldr := NewListBuilder(mem, dt.Elem()) - keyBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(0) - keyBldr.Retain() - itemBldr := listBldr.ValueBuilder().(*StructBuilder).FieldBuilder(1) - itemBldr.Retain() - return &MapBuilder{ - listBuilder: listBldr, - keyBuilder: keyBldr, - itemBuilder: itemBldr, - etype: dt, - keytype: dt.KeyType(), - itemtype: dt.ItemType(), - keysSorted: dt.KeysSorted, - } -} - -func (b *MapBuilder) Type() arrow.DataType { return b.etype } - -// Retain increases the reference count by 1 for the sub-builders (list, key, item). -// Retain may be called simultaneously from multiple goroutines. -func (b *MapBuilder) Retain() { - b.listBuilder.Retain() - b.keyBuilder.Retain() - b.itemBuilder.Retain() -} - -// Release decreases the reference count by 1 for the sub builders (list, key, item). -func (b *MapBuilder) Release() { - b.listBuilder.Release() - b.keyBuilder.Release() - b.itemBuilder.Release() -} - -// Len returns the current number of Maps that are in the builder -func (b *MapBuilder) Len() int { return b.listBuilder.Len() } - -// Cap returns the total number of elements that can be stored -// without allocating additional memory. -func (b *MapBuilder) Cap() int { return b.listBuilder.Cap() } - -// NullN returns the number of null values in the array builder. -func (b *MapBuilder) NullN() int { return b.listBuilder.NullN() } - -// IsNull returns if a previously appended value at a given index is null or not. -func (b *MapBuilder) IsNull(i int) bool { - return b.listBuilder.IsNull(i) -} - -// Append adds a new Map element to the array, calling Append(false) is -// equivalent to calling AppendNull. -func (b *MapBuilder) Append(v bool) { - b.adjustStructBuilderLen() - b.listBuilder.Append(v) -} - -func (b *MapBuilder) AppendWithSize(v bool, _ int) { - b.Append(v) -} - -// AppendNull adds a null map entry to the array. -func (b *MapBuilder) AppendNull() { - b.Append(false) -} - -// AppendNulls adds null map entry to the array. -func (b *MapBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *MapBuilder) SetNull(i int) { - b.listBuilder.SetNull(i) -} - -func (b *MapBuilder) AppendEmptyValue() { - b.Append(true) -} - -func (b *MapBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -// Reserve enough space for n maps -func (b *MapBuilder) Reserve(n int) { b.listBuilder.Reserve(n) } - -// Resize adjust the space allocated by b to n map elements. If n is greater than -// b.Cap(), additional memory will be allocated. If n is smaller, the allocated memory may be reduced. -func (b *MapBuilder) Resize(n int) { b.listBuilder.Resize(n) } - -// AppendValues is for bulk appending a group of elements with offsets provided -// and validity booleans provided. -func (b *MapBuilder) AppendValues(offsets []int32, valid []bool) { - b.adjustStructBuilderLen() - b.listBuilder.AppendValues(offsets, valid) -} - -func (b *MapBuilder) UnsafeAppendBoolToBitmap(v bool) { - b.listBuilder.UnsafeAppendBoolToBitmap(v) -} - -func (b *MapBuilder) init(capacity int) { b.listBuilder.init(capacity) } -func (b *MapBuilder) resize(newBits int, init func(int)) { b.listBuilder.resize(newBits, init) } - -func (b *MapBuilder) adjustStructBuilderLen() { - sb := b.listBuilder.ValueBuilder().(*StructBuilder) - if sb.Len() < b.keyBuilder.Len() { - valids := make([]bool, b.keyBuilder.Len()-sb.Len()) - for i := range valids { - valids[i] = true - } - sb.AppendValues(valids) - } -} - -// NewArray creates a new Map array from the memory buffers used by the builder, and -// resets the builder so it can be used again to build a new Map array. -func (b *MapBuilder) NewArray() arrow.Array { - return b.NewMapArray() -} - -// NewMapArray creates a new Map array from the memory buffers used by the builder, and -// resets the builder so it can be used again to build a new Map array. -func (b *MapBuilder) NewMapArray() (a *Map) { - if !b.etype.ItemField().Nullable && b.ItemBuilder().NullN() > 0 { - panic("arrow/array: item not nullable") - } - - data := b.newData() - defer data.Release() - a = NewMapData(data) - return -} - -func (b *MapBuilder) newData() (data *Data) { - b.adjustStructBuilderLen() - values := b.listBuilder.NewListArray() - defer values.Release() - - data = NewData(b.etype, - values.Len(), values.data.buffers, - values.data.childData, values.NullN(), 0) - return -} - -// KeyBuilder returns a builder that can be used to populate the keys of the maps. -func (b *MapBuilder) KeyBuilder() Builder { return b.keyBuilder } - -// ItemBuilder returns a builder that can be used to populate the values that the -// keys point to. -func (b *MapBuilder) ItemBuilder() Builder { return b.itemBuilder } - -// ValueBuilder can be used instead of separately using the Key/Item builders -// to build the list as a List of Structs rather than building the keys/items -// separately. -func (b *MapBuilder) ValueBuilder() Builder { - return b.listBuilder.ValueBuilder() -} - -func (b *MapBuilder) AppendValueFromString(s string) error { - return b.listBuilder.AppendValueFromString(s) -} - -func (b *MapBuilder) UnmarshalOne(dec *json.Decoder) error { - return b.listBuilder.UnmarshalOne(dec) -} - -func (b *MapBuilder) Unmarshal(dec *json.Decoder) error { - return b.listBuilder.Unmarshal(dec) -} - -func (b *MapBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("map builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*Map)(nil) - _ Builder = (*MapBuilder)(nil) - _ ListLikeBuilder = (*MapBuilder)(nil) -) diff --git a/go/arrow/array/map_test.go b/go/arrow/array/map_test.go deleted file mode 100644 index e73508e6afe11..0000000000000 --- a/go/arrow/array/map_test.go +++ /dev/null @@ -1,254 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "strconv" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestMapArray(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - arr, equalArr, unequalArr *array.Map - - equalValid = []bool{true, true, true, true, true, true, true} - equalOffsets = []int32{0, 1, 2, 5, 6, 7, 8, 10} - equalKeys = []string{"a", "a", "a", "b", "c", "a", "a", "a", "a", "b"} - equalValues = []int32{1, 2, 3, 4, 5, 2, 2, 2, 5, 6} - unequalValid = []bool{true, true, true} - unequalOffsets = []int32{0, 1, 4, 7} - unequalKeys = []string{"a", "a", "b", "c", "a", "b", "c"} - unequalValues = []int32{1, 2, 2, 2, 3, 4, 5} - ) - - bldr := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) - defer bldr.Release() - - kb := bldr.KeyBuilder().(*array.StringBuilder) - ib := bldr.ItemBuilder().(*array.Int32Builder) - - bldr.AppendValues(equalOffsets, equalValid) - for _, k := range equalKeys { - kb.Append(k) - } - ib.AppendValues(equalValues, nil) - - assert.Equal(t, len(equalValid), bldr.Len()) - assert.Zero(t, bldr.NullN()) - - arr = bldr.NewMapArray() - defer arr.Release() - - bldr.AppendValues(equalOffsets, equalValid) - for _, k := range equalKeys { - kb.Append(k) - } - ib.AppendValues(equalValues, nil) - - equalArr = bldr.NewMapArray() - defer equalArr.Release() - - bldr.AppendValues(unequalOffsets, unequalValid) - for _, k := range unequalKeys { - kb.Append(k) - } - ib.AppendValues(unequalValues, nil) - - unequalArr = bldr.NewMapArray() - defer unequalArr.Release() - - assert.True(t, array.Equal(arr, arr)) - assert.True(t, array.Equal(arr, equalArr)) - assert.True(t, array.Equal(equalArr, arr)) - assert.False(t, array.Equal(equalArr, unequalArr)) - assert.False(t, array.Equal(unequalArr, equalArr)) - - assert.True(t, array.SliceEqual(arr, 0, 1, unequalArr, 0, 1)) - assert.False(t, array.SliceEqual(arr, 0, 2, unequalArr, 0, 2)) - assert.False(t, array.SliceEqual(arr, 1, 2, unequalArr, 1, 2)) - assert.True(t, array.SliceEqual(arr, 2, 3, unequalArr, 2, 3)) - - t.Run("items non nullable", func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := arrow.MapOf(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int16) - dt.KeysSorted = true - dt.SetItemNullable(false) - - bldr := array.NewBuilder(pool, dt).(*array.MapBuilder) - defer bldr.Release() - - kb := bldr.KeyBuilder().(*array.Int16Builder) - ib := bldr.ItemBuilder().(*array.Int16Builder) - - bldr.Append(true) - kb.Append(1) - ib.AppendNull() - - assert.Panics(t, func() { - _ = bldr.NewArray() - }) - }) -} - -func TestMapArrayBuildIntToInt(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - dtype = arrow.MapOf(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int16) - keys = []int16{0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5} - items = []int16{1, 1, 2, 3, 5, 8, -1, -1, 0, 1, -1, 2} - validItems = []bool{true, true, true, true, true, true, false, false, true, true, false, true} - offsets = []int32{0, 6, 6, 12, 12} - validMaps = []bool{true, false, true, true} - ) - - bldr := array.NewBuilder(pool, dtype).(*array.MapBuilder) - defer bldr.Release() - - bldr.Reserve(len(validMaps)) - - kb := bldr.KeyBuilder().(*array.Int16Builder) - ib := bldr.ItemBuilder().(*array.Int16Builder) - - bldr.Append(true) - kb.AppendValues(keys[:6], nil) - ib.AppendValues(items[:6], nil) - - bldr.AppendNull() - bldr.Append(true) - kb.AppendValues(keys[6:], nil) - ib.AppendValues(items[6:], []bool{false, false, true, true, false, true}) - - bldr.Append(true) - arr := bldr.NewArray().(*array.Map) - defer arr.Release() - - assert.Equal(t, arrow.MAP, arr.DataType().ID()) - assert.EqualValues(t, len(validMaps), arr.Len()) - - for i, ex := range validMaps { - assert.Equal(t, ex, arr.IsValid(i)) - assert.Equal(t, !ex, arr.IsNull(i)) - } - - assert.Equal(t, offsets, arr.Offsets()) - assert.Equal(t, keys, arr.Keys().(*array.Int16).Int16Values()) - - itemArr := arr.Items().(*array.Int16) - for i, ex := range validItems { - if ex { - assert.True(t, itemArr.IsValid(i)) - assert.False(t, itemArr.IsNull(i)) - assert.Equal(t, items[i], itemArr.Value(i)) - } else { - assert.False(t, itemArr.IsValid(i)) - assert.True(t, itemArr.IsNull(i)) - } - } - - assert.Equal(t, "[{[0 1 2 3 4 5] [1 1 2 3 5 8]} (null) {[0 1 2 3 4 5] [(null) (null) 0 1 (null) 2]} {[] []}]", arr.String()) -} - -func TestMapStringRoundTrip(t *testing.T) { - // 1. create array - dt := arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32) - - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewMapBuilderWithType(mem, dt) - defer b.Release() - - kb := b.KeyBuilder().(*array.StringBuilder) - ib := b.ItemBuilder().(*array.Int32Builder) - - for n := 0; n < 10; n++ { - b.AppendNull() - b.Append(true) - - for r := 'a'; r <= 'z'; r++ { - kb.Append(string(r) + strconv.Itoa(n)) - if (n+int(r))%2 == 0 { - ib.AppendNull() - } else { - ib.Append(int32(n + int(r))) - } - } - } - - arr := b.NewArray().(*array.Map) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewMapBuilderWithType(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Map) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestMapBuilder_SetNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - arr *array.Map - equalValid = []bool{true, true, true, true, true, true, true} - equalOffsets = []int32{0, 1, 2, 5, 6, 7, 8, 10} - equalKeys = []string{"a", "a", "a", "b", "c", "a", "a", "a", "a", "b"} - equalValues = []int32{1, 2, 3, 4, 5, 2, 2, 2, 5, 6} - ) - - bldr := array.NewMapBuilder(pool, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32, false) - defer bldr.Release() - - kb := bldr.KeyBuilder().(*array.StringBuilder) - ib := bldr.ItemBuilder().(*array.Int32Builder) - - bldr.AppendValues(equalOffsets, equalValid) - for _, k := range equalKeys { - kb.Append(k) - } - ib.AppendValues(equalValues, nil) - - bldr.SetNull(0) - bldr.SetNull(3) - - arr = bldr.NewMapArray() - defer arr.Release() - - assert.True(t, arr.IsNull(0)) - assert.True(t, arr.IsValid(1)) - assert.True(t, arr.IsNull(3)) -} diff --git a/go/arrow/array/null.go b/go/arrow/array/null.go deleted file mode 100644 index 6dccd3af59f2a..0000000000000 --- a/go/arrow/array/null.go +++ /dev/null @@ -1,218 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "reflect" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// Null represents an immutable, degenerate array with no physical storage. -type Null struct { - array -} - -// NewNull returns a new Null array value of size n. -func NewNull(n int) *Null { - a := &Null{} - a.refCount = 1 - data := NewData( - arrow.Null, n, - []*memory.Buffer{nil}, - nil, - n, - 0, - ) - a.setData(data) - data.Release() - return a -} - -// NewNullData returns a new Null array value, from data. -func NewNullData(data arrow.ArrayData) *Null { - a := &Null{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *Null) ValueStr(int) string { return NullValueStr } - -func (a *Null) Value(int) interface{} { return nil } - -func (a *Null) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - o.WriteString(NullValueStr) - } - o.WriteString("]") - return o.String() -} - -func (a *Null) setData(data *Data) { - a.array.setData(data) - a.array.nullBitmapBytes = nil - a.array.data.nulls = a.array.data.length -} - -func (a *Null) GetOneForMarshal(i int) interface{} { - return nil -} - -func (a *Null) MarshalJSON() ([]byte, error) { - return json.Marshal(make([]interface{}, a.Len())) -} - -type NullBuilder struct { - builder -} - -// NewNullBuilder returns a builder, using the provided memory allocator. -func NewNullBuilder(mem memory.Allocator) *NullBuilder { - return &NullBuilder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *NullBuilder) Type() arrow.DataType { return arrow.Null } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *NullBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - } -} - -func (b *NullBuilder) AppendNull() { - b.builder.length++ - b.builder.nulls++ -} - -func (b *NullBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *NullBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - return fmt.Errorf("cannot convert %q to null", s) -} - -func (b *NullBuilder) AppendEmptyValue() { b.AppendNull() } - -func (b *NullBuilder) AppendEmptyValues(n int) { b.AppendNulls(n) } - -func (*NullBuilder) Reserve(size int) {} -func (*NullBuilder) Resize(size int) {} - -func (*NullBuilder) init(cap int) {} -func (*NullBuilder) resize(newBits int, init func(int)) {} - -// NewArray creates a Null array from the memory buffers used by the builder and resets the NullBuilder -// so it can be used to build a new array. -func (b *NullBuilder) NewArray() arrow.Array { - return b.NewNullArray() -} - -// NewNullArray creates a Null array from the memory buffers used by the builder and resets the NullBuilder -// so it can be used to build a new array. -func (b *NullBuilder) NewNullArray() (a *Null) { - data := b.newData() - a = NewNullData(data) - data.Release() - return -} - -func (b *NullBuilder) newData() (data *Data) { - data = NewData( - arrow.Null, b.length, - []*memory.Buffer{nil}, - nil, - b.nulls, - 0, - ) - b.reset() - - return -} - -func (b *NullBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t.(type) { - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(nil), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *NullBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *NullBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("null builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*Null)(nil) - _ Builder = (*NullBuilder)(nil) -) diff --git a/go/arrow/array/null_test.go b/go/arrow/array/null_test.go deleted file mode 100644 index 61ccb472b1f7b..0000000000000 --- a/go/arrow/array/null_test.go +++ /dev/null @@ -1,110 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestNullArray(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - b := array.NewNullBuilder(pool) - defer b.Release() - - b.AppendNull() - b.AppendNulls(2) - b.AppendEmptyValue() - b.AppendEmptyValues(2) - - arr1 := b.NewArray().(*array.Null) - defer arr1.Release() - - if got, want := arr1.Len(), 6; got != want { - t.Fatalf("invalid null array length: got=%d, want=%d", got, want) - } - - if got, want := arr1.NullN(), 6; got != want { - t.Fatalf("invalid number of nulls: got=%d, want=%d", got, want) - } - - if got, want := arr1.DataType(), arrow.Null; got != want { - t.Fatalf("invalid null data type: got=%v, want=%v", got, want) - } - - arr1.Retain() - arr1.Release() - - if arr1.Data() == nil { - t.Fatalf("invalid null data") - } - - arr2 := b.NewNullArray() - defer arr2.Release() - - if got, want := arr2.Len(), 0; got != want { - t.Fatalf("invalid null array length: got=%d, want=%d", got, want) - } - - arr3 := array.NewNull(10) - defer arr3.Release() - - if got, want := arr3.Len(), 10; got != want { - t.Fatalf("invalid null array length: got=%d, want=%d", got, want) - } - - if got, want := arr3.NullN(), 10; got != want { - t.Fatalf("invalid number of nulls: got=%d, want=%d", got, want) - } - -} - -func TestNullStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewNullBuilder(mem) - defer b.Release() - - b.AppendNull() - b.AppendNulls(2) - b.AppendEmptyValue() - b.AppendEmptyValues(2) - - arr := b.NewArray().(*array.Null) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewNullBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Null) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go deleted file mode 100644 index 413a356c2a8ab..0000000000000 --- a/go/arrow/array/numeric.gen.go +++ /dev/null @@ -1,1452 +0,0 @@ -// Code generated by array/numeric.gen.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "math" - "strconv" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/internal/json" -) - -// A type which represents an immutable sequence of int64 values. -type Int64 struct { - array - values []int64 -} - -// NewInt64Data creates a new Int64. -func NewInt64Data(data arrow.ArrayData) *Int64 { - a := &Int64{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Int64) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Int64) Value(i int) int64 { return a.values[i] } - -// Values returns the values. -func (a *Int64) Int64Values() []int64 { return a.values } - -// String returns a string representation of the array. -func (a *Int64) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Int64) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Int64Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Int64) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatInt(int64(a.Value(i)), 10) -} - -func (a *Int64) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Int64) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualInt64(left, right *Int64) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of uint64 values. -type Uint64 struct { - array - values []uint64 -} - -// NewUint64Data creates a new Uint64. -func NewUint64Data(data arrow.ArrayData) *Uint64 { - a := &Uint64{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Uint64) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Uint64) Value(i int) uint64 { return a.values[i] } - -// Values returns the values. -func (a *Uint64) Uint64Values() []uint64 { return a.values } - -// String returns a string representation of the array. -func (a *Uint64) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Uint64) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Uint64Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Uint64) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatUint(uint64(a.Value(i)), 10) -} - -func (a *Uint64) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Uint64) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualUint64(left, right *Uint64) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of float64 values. -type Float64 struct { - array - values []float64 -} - -// NewFloat64Data creates a new Float64. -func NewFloat64Data(data arrow.ArrayData) *Float64 { - a := &Float64{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Float64) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Float64) Value(i int) float64 { return a.values[i] } - -// Values returns the values. -func (a *Float64) Float64Values() []float64 { return a.values } - -// String returns a string representation of the array. -func (a *Float64) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Float64) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Float64Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Float64) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 64) -} - -func (a *Float64) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Float64) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if !a.IsValid(i) { - vals[i] = nil - continue - } - - f := a.Value(i) - switch { - case math.IsNaN(f): - vals[i] = "NaN" - case math.IsInf(f, 1): - vals[i] = "+Inf" - case math.IsInf(f, -1): - vals[i] = "-Inf" - default: - vals[i] = f - } - - } - - return json.Marshal(vals) -} - -func arrayEqualFloat64(left, right *Float64) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of int32 values. -type Int32 struct { - array - values []int32 -} - -// NewInt32Data creates a new Int32. -func NewInt32Data(data arrow.ArrayData) *Int32 { - a := &Int32{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Int32) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Int32) Value(i int) int32 { return a.values[i] } - -// Values returns the values. -func (a *Int32) Int32Values() []int32 { return a.values } - -// String returns a string representation of the array. -func (a *Int32) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Int32) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Int32Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Int32) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatInt(int64(a.Value(i)), 10) -} - -func (a *Int32) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Int32) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualInt32(left, right *Int32) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of uint32 values. -type Uint32 struct { - array - values []uint32 -} - -// NewUint32Data creates a new Uint32. -func NewUint32Data(data arrow.ArrayData) *Uint32 { - a := &Uint32{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Uint32) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Uint32) Value(i int) uint32 { return a.values[i] } - -// Values returns the values. -func (a *Uint32) Uint32Values() []uint32 { return a.values } - -// String returns a string representation of the array. -func (a *Uint32) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Uint32) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Uint32Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Uint32) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatUint(uint64(a.Value(i)), 10) -} - -func (a *Uint32) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Uint32) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualUint32(left, right *Uint32) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of float32 values. -type Float32 struct { - array - values []float32 -} - -// NewFloat32Data creates a new Float32. -func NewFloat32Data(data arrow.ArrayData) *Float32 { - a := &Float32{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Float32) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Float32) Value(i int) float32 { return a.values[i] } - -// Values returns the values. -func (a *Float32) Float32Values() []float32 { return a.values } - -// String returns a string representation of the array. -func (a *Float32) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Float32) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Float32Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Float32) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 32) -} - -func (a *Float32) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Float32) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if !a.IsValid(i) { - vals[i] = nil - continue - } - - f := a.Value(i) - v := strconv.FormatFloat(float64(f), 'g', -1, 32) - - switch v { - case "NaN", "+Inf", "-Inf": - vals[i] = v - default: - vals[i] = f - } - } - - return json.Marshal(vals) -} - -func arrayEqualFloat32(left, right *Float32) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of int16 values. -type Int16 struct { - array - values []int16 -} - -// NewInt16Data creates a new Int16. -func NewInt16Data(data arrow.ArrayData) *Int16 { - a := &Int16{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Int16) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Int16) Value(i int) int16 { return a.values[i] } - -// Values returns the values. -func (a *Int16) Int16Values() []int16 { return a.values } - -// String returns a string representation of the array. -func (a *Int16) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Int16) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Int16Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Int16) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatInt(int64(a.Value(i)), 10) -} - -func (a *Int16) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Int16) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualInt16(left, right *Int16) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of uint16 values. -type Uint16 struct { - array - values []uint16 -} - -// NewUint16Data creates a new Uint16. -func NewUint16Data(data arrow.ArrayData) *Uint16 { - a := &Uint16{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Uint16) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Uint16) Value(i int) uint16 { return a.values[i] } - -// Values returns the values. -func (a *Uint16) Uint16Values() []uint16 { return a.values } - -// String returns a string representation of the array. -func (a *Uint16) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Uint16) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Uint16Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Uint16) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatUint(uint64(a.Value(i)), 10) -} - -func (a *Uint16) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return a.values[i] -} - -func (a *Uint16) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.values[i] - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualUint16(left, right *Uint16) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of int8 values. -type Int8 struct { - array - values []int8 -} - -// NewInt8Data creates a new Int8. -func NewInt8Data(data arrow.ArrayData) *Int8 { - a := &Int8{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Int8) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Int8) Value(i int) int8 { return a.values[i] } - -// Values returns the values. -func (a *Int8) Int8Values() []int8 { return a.values } - -// String returns a string representation of the array. -func (a *Int8) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Int8) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Int8Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Int8) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatInt(int64(a.Value(i)), 10) -} - -func (a *Int8) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return float64(a.values[i]) // prevent uint8 from being seen as binary data -} - -func (a *Int8) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = float64(a.values[i]) // prevent uint8 from being seen as binary data - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualInt8(left, right *Int8) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of uint8 values. -type Uint8 struct { - array - values []uint8 -} - -// NewUint8Data creates a new Uint8. -func NewUint8Data(data arrow.ArrayData) *Uint8 { - a := &Uint8{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Uint8) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Uint8) Value(i int) uint8 { return a.values[i] } - -// Values returns the values. -func (a *Uint8) Uint8Values() []uint8 { return a.values } - -// String returns a string representation of the array. -func (a *Uint8) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Uint8) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Uint8Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Uint8) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return strconv.FormatUint(uint64(a.Value(i)), 10) -} - -func (a *Uint8) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - return float64(a.values[i]) // prevent uint8 from being seen as binary data -} - -func (a *Uint8) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = float64(a.values[i]) // prevent uint8 from being seen as binary data - } else { - vals[i] = nil - } - } - - return json.Marshal(vals) -} - -func arrayEqualUint8(left, right *Uint8) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of arrow.Time32 values. -type Time32 struct { - array - values []arrow.Time32 -} - -// NewTime32Data creates a new Time32. -func NewTime32Data(data arrow.ArrayData) *Time32 { - a := &Time32{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Time32) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Time32) Value(i int) arrow.Time32 { return a.values[i] } - -// Values returns the values. -func (a *Time32) Time32Values() []arrow.Time32 { return a.values } - -// String returns a string representation of the array. -func (a *Time32) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Time32) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Time32Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Time32) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.values[i].FormattedString(a.DataType().(*arrow.Time32Type).Unit) -} - -func (a *Time32) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.values[i].ToTime(a.DataType().(*arrow.Time32Type).Unit).Format("15:04:05.999999999") -} - -func (a *Time32) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualTime32(left, right *Time32) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of arrow.Time64 values. -type Time64 struct { - array - values []arrow.Time64 -} - -// NewTime64Data creates a new Time64. -func NewTime64Data(data arrow.ArrayData) *Time64 { - a := &Time64{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Time64) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Time64) Value(i int) arrow.Time64 { return a.values[i] } - -// Values returns the values. -func (a *Time64) Time64Values() []arrow.Time64 { return a.values } - -// String returns a string representation of the array. -func (a *Time64) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Time64) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Time64Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Time64) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.values[i].FormattedString(a.DataType().(*arrow.Time64Type).Unit) -} - -func (a *Time64) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.values[i].ToTime(a.DataType().(*arrow.Time64Type).Unit).Format("15:04:05.999999999") -} - -func (a *Time64) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualTime64(left, right *Time64) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of arrow.Date32 values. -type Date32 struct { - array - values []arrow.Date32 -} - -// NewDate32Data creates a new Date32. -func NewDate32Data(data arrow.ArrayData) *Date32 { - a := &Date32{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Date32) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Date32) Value(i int) arrow.Date32 { return a.values[i] } - -// Values returns the values. -func (a *Date32) Date32Values() []arrow.Date32 { return a.values } - -// String returns a string representation of the array. -func (a *Date32) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Date32) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Date32Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Date32) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.values[i].FormattedString() -} - -func (a *Date32) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.values[i].ToTime().Format("2006-01-02") -} - -func (a *Date32) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualDate32(left, right *Date32) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of arrow.Date64 values. -type Date64 struct { - array - values []arrow.Date64 -} - -// NewDate64Data creates a new Date64. -func NewDate64Data(data arrow.ArrayData) *Date64 { - a := &Date64{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Date64) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Date64) Value(i int) arrow.Date64 { return a.values[i] } - -// Values returns the values. -func (a *Date64) Date64Values() []arrow.Date64 { return a.values } - -// String returns a string representation of the array. -func (a *Date64) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Date64) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.Date64Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Date64) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.values[i].FormattedString() -} - -func (a *Date64) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.values[i].ToTime().Format("2006-01-02") -} - -func (a *Date64) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualDate64(left, right *Date64) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// A type which represents an immutable sequence of arrow.Duration values. -type Duration struct { - array - values []arrow.Duration -} - -// NewDurationData creates a new Duration. -func NewDurationData(data arrow.ArrayData) *Duration { - a := &Duration{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Duration) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Duration) Value(i int) arrow.Duration { return a.values[i] } - -// Values returns the values. -func (a *Duration) DurationValues() []arrow.Duration { return a.values } - -// String returns a string representation of the array. -func (a *Duration) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Duration) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.DurationTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Duration) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - // return value and suffix as a string such as "12345ms" - return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*arrow.DurationType).Unit) -} - -func (a *Duration) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - // return value and suffix as a string such as "12345ms" - return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*arrow.DurationType).Unit.String()) -} - -func (a *Duration) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualDuration(left, right *Duration) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} diff --git a/go/arrow/array/numeric.gen.go.tmpl b/go/arrow/array/numeric.gen.go.tmpl deleted file mode 100644 index 1f4b56609f464..0000000000000 --- a/go/arrow/array/numeric.gen.go.tmpl +++ /dev/null @@ -1,192 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "fmt" - "strings" - "time" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/internal/json" -) - -{{range .In}} - -// A type which represents an immutable sequence of {{or .QualifiedType .Type}} values. -type {{.Name}} struct { - array - values []{{or .QualifiedType .Type}} -} - -// New{{.Name}}Data creates a new {{.Name}}. -func New{{.Name}}Data(data arrow.ArrayData) *{{.Name}} { - a := &{{.Name}}{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *{{.Name}}) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *{{.Name}}) Value(i int) {{or .QualifiedType .Type}} { return a.values[i] } - -// Values returns the values. -func (a *{{.Name}}) {{.Name}}Values() []{{or .QualifiedType .Type}} { return a.values } - -// String returns a string representation of the array. -func (a *{{.Name}}) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *{{.Name}}) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.{{.Name}}Traits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *{{.Name}}) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } -{{if or (eq .Name "Date32") (eq .Name "Date64") -}} - return a.values[i].FormattedString() -{{else if or (eq .Name "Time32") (eq .Name "Time64") -}} - return a.values[i].FormattedString(a.DataType().(*{{.QualifiedType}}Type).Unit) -{{else if (eq .Name "Duration") -}} - // return value and suffix as a string such as "12345ms" - return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit) -{{else if or (eq .Name "Int8") (eq .Name "Int16") (eq .Name "Int32") (eq .Name "Int64") -}} - return strconv.FormatInt(int64(a.Value(i)), 10) -{{else if or (eq .Name "Uint8") (eq .Name "Uint16") (eq .Name "Uint32") (eq .Name "Uint64") -}} - return strconv.FormatUint(uint64(a.Value(i)), 10) -{{else if or (eq .Name "Float32") -}} - return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 32) -{{else if or (eq .Name "Float64") -}} - return strconv.FormatFloat(float64(a.Value(i)), 'g', -1, 64) -{{else}} - return fmt.Sprintf("%v", a.values[i]) -{{end -}} -} - -func (a *{{.Name}}) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } -{{if or (eq .Name "Date32") (eq .Name "Date64") -}} - return a.values[i].ToTime().Format("2006-01-02") -{{else if or (eq .Name "Time32") (eq .Name "Time64") -}} - return a.values[i].ToTime(a.DataType().(*{{.QualifiedType}}Type).Unit).Format("15:04:05.999999999") -{{else if (eq .Name "Duration") -}} - // return value and suffix as a string such as "12345ms" - return fmt.Sprintf("%d%s", a.values[i], a.DataType().(*{{.QualifiedType}}Type).Unit.String()) -{{else if (eq .Size "1")}} - return float64(a.values[i]) // prevent uint8 from being seen as binary data -{{else}} - return a.values[i] -{{end -}} -} - -func (a *{{.Name}}) MarshalJSON() ([]byte, error) { -{{if .QualifiedType -}} - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } -{{else -}} - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - {{if (eq .Name "Float32") -}} - if !a.IsValid(i) { - vals[i] = nil - continue - } - - f := a.Value(i) - v := strconv.FormatFloat(float64(f), 'g', -1, 32) - - switch v { - case "NaN", "+Inf", "-Inf": - vals[i] = v - default: - vals[i] = f - } - {{else if (eq .Name "Float64") -}} - if !a.IsValid(i) { - vals[i] = nil - continue - } - - f := a.Value(i) - switch { - case math.IsNaN(f): - vals[i] = "NaN" - case math.IsInf(f, 1): - vals[i] = "+Inf" - case math.IsInf(f, -1): - vals[i] = "-Inf" - default: - vals[i] = f - } - {{else}} - if a.IsValid(i) { - {{ if (eq .Size "1") }}vals[i] = float64(a.values[i]) // prevent uint8 from being seen as binary data{{ else }}vals[i] = a.values[i]{{ end }} - } else { - vals[i] = nil - } - {{end}} - } -{{end}} - return json.Marshal(vals) -} - -func arrayEqual{{.Name}}(left, right *{{.Name}}) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -{{end}} diff --git a/go/arrow/array/numeric_test.go b/go/arrow/array/numeric_test.go deleted file mode 100644 index bb8acc3f41519..0000000000000 --- a/go/arrow/array/numeric_test.go +++ /dev/null @@ -1,779 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "math" - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/float16" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" - "github.com/stretchr/testify/assert" -) - -func TestNewFloat64Data(t *testing.T) { - exp := []float64{1.0, 2.0, 4.0, 8.0, 16.0} - - ad := array.NewData( - arrow.PrimitiveTypes.Float64, len(exp), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Float64Traits.CastToBytes(exp))}, - nil, 0, 0, - ) - fa := array.NewFloat64Data(ad) - - assert.Equal(t, len(exp), fa.Len(), "unexpected Len()") - assert.Equal(t, exp, fa.Float64Values(), "unexpected Float64Values()") -} - -func TestFloat64SliceData(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 4 - ) - - var ( - vs = []float64{1, 2, 3, 4, 5} - sub = vs[beg:end] - ) - - b := array.NewFloat64Builder(pool) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Float64) - defer arr.Release() - - if got, want := arr.Len(), len(vs); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Float64Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Float64) - defer slice.Release() - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Float64Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestFloat64SliceDataWithNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 5 - ) - - var ( - valids = []bool{true, true, true, false, true, true} - vs = []float64{1, 2, 3, 0, 4, 5} - sub = vs[beg:end] - ) - - b := array.NewFloat64Builder(pool) - defer b.Release() - - b.AppendValues(vs, valids) - - arr := b.NewArray().(*array.Float64) - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Float64Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Float64) - defer slice.Release() - - if got, want := slice.NullN(), 1; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Float64Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestFloat16MarshalJSON(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - bldr := array.NewFloat16Builder(pool) - defer bldr.Release() - - jsonstr := `[0, 1, 2, 3, "NaN", "NaN", 4, 5, "+Inf", "-Inf"]` - - bldr.Append(float16.New(0)) - bldr.Append(float16.New(1)) - bldr.Append(float16.New(2)) - bldr.Append(float16.New(3)) - bldr.Append(float16.NaN()) - bldr.Append(float16.NaN()) - bldr.Append(float16.New(4)) - bldr.Append(float16.New(5)) - bldr.Append(float16.Inf()) - bldr.Append(float16.Inf().Negate()) - - expected := bldr.NewFloat16Array() - defer expected.Release() - expected_json, err := expected.MarshalJSON() - assert.NoError(t, err) - assert.JSONEq(t, jsonstr, string(expected_json)) -} - -func TestFloat32MarshalJSON(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - bldr := array.NewFloat32Builder(pool) - defer bldr.Release() - - jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` - - bldr.Append(0) - bldr.Append(1) - bldr.Append(float32(math.Inf(1))) - bldr.Append(2) - bldr.Append(3) - bldr.Append(float32(math.NaN())) - bldr.Append(float32(math.NaN())) - bldr.Append(4) - bldr.Append(5) - bldr.Append(float32(math.Inf(-1))) - - expected := bldr.NewFloat32Array() - defer expected.Release() - - expected_json, err := expected.MarshalJSON() - assert.NoError(t, err) - - assert.JSONEq(t, jsonstr, string(expected_json)) -} - -func TestFloat64MarshalJSON(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - bldr := array.NewFloat64Builder(pool) - defer bldr.Release() - - jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` - - bldr.Append(0) - bldr.Append(1) - bldr.Append(math.Inf(1)) - bldr.Append(2) - bldr.Append(3) - bldr.Append(math.NaN()) - bldr.Append(math.NaN()) - bldr.Append(4) - bldr.Append(5) - bldr.Append(math.Inf(-1)) - - expected := bldr.NewFloat64Array() - defer expected.Release() - - expected_json, err := expected.MarshalJSON() - assert.NoError(t, err) - - assert.JSONEq(t, jsonstr, string(expected_json)) - -} - -func TestUnmarshalSpecialFloat(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - bldr := array.NewFloat32Builder(pool) - defer bldr.Release() - - assert.NoError(t, json.Unmarshal([]byte(`[3.4, "Inf", "-Inf"]`), bldr)) - arr := bldr.NewFloat32Array() - defer arr.Release() - - assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) - assert.True(t, math.IsInf(float64(arr.Value(1)), 1), arr.Value(1)) - assert.True(t, math.IsInf(float64(arr.Value(2)), -1), arr.Value(2)) -} - -func TestNewTime32Data(t *testing.T) { - data := []arrow.Time32{ - arrow.Time32(1), - arrow.Time32(2), - arrow.Time32(4), - arrow.Time32(8), - arrow.Time32(16), - } - - dtype := arrow.FixedWidthTypes.Time32s - ad := array.NewData(dtype, len(data), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Time32Traits.CastToBytes(data))}, - nil, 0, 0, - ) - t32a := array.NewTime32Data(ad) - - assert.Equal(t, len(data), t32a.Len(), "unexpected Len()") - assert.Equal(t, data, t32a.Time32Values(), "unexpected Float64Values()") -} - -func TestTime32SliceData(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 4 - ) - - var ( - vs = []arrow.Time32{ - arrow.Time32(1), - arrow.Time32(2), - arrow.Time32(4), - arrow.Time32(8), - arrow.Time32(16), - } - sub = vs[beg:end] - ) - - dtype := arrow.FixedWidthTypes.Time32s - b := array.NewTime32Builder(pool, dtype.(*arrow.Time32Type)) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Time32) - defer arr.Release() - - if got, want := arr.Len(), len(vs); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Time32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Time32) - defer slice.Release() - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Time32Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestTime32SliceDataWithNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 5 - ) - - var ( - valids = []bool{true, true, true, false, true, true} - vs = []arrow.Time32{ - arrow.Time32(1), - arrow.Time32(2), - arrow.Time32(3), - arrow.Time32(0), - arrow.Time32(4), - arrow.Time32(5), - } - sub = vs[beg:end] - ) - - dtype := arrow.FixedWidthTypes.Time32s - b := array.NewTime32Builder(pool, dtype.(*arrow.Time32Type)) - defer b.Release() - - b.AppendValues(vs, valids) - - arr := b.NewArray().(*array.Time32) - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Time32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Time32) - defer slice.Release() - - if got, want := slice.NullN(), 1; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Time32Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestNewTime64Data(t *testing.T) { - data := []arrow.Time64{ - arrow.Time64(1), - arrow.Time64(2), - arrow.Time64(4), - arrow.Time64(8), - arrow.Time64(16), - } - - dtype := arrow.FixedWidthTypes.Time64us - ad := array.NewData(dtype, len(data), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Time64Traits.CastToBytes(data))}, - nil, 0, 0, - ) - t64a := array.NewTime64Data(ad) - - assert.Equal(t, len(data), t64a.Len(), "unexpected Len()") - assert.Equal(t, data, t64a.Time64Values(), "unexpected Float64Values()") -} - -func TestTime64SliceData(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 4 - ) - - var ( - vs = []arrow.Time64{ - arrow.Time64(1), - arrow.Time64(2), - arrow.Time64(4), - arrow.Time64(8), - arrow.Time64(16), - } - sub = vs[beg:end] - ) - - dtype := arrow.FixedWidthTypes.Time64us - b := array.NewTime64Builder(pool, dtype.(*arrow.Time64Type)) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Time64) - defer arr.Release() - - if got, want := arr.Len(), len(vs); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Time64Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Time64) - defer slice.Release() - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Time64Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestTime64SliceDataWithNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 5 - ) - - var ( - valids = []bool{true, true, true, false, true, true} - vs = []arrow.Time64{ - arrow.Time64(1), - arrow.Time64(2), - arrow.Time64(3), - arrow.Time64(0), - arrow.Time64(4), - arrow.Time64(5), - } - sub = vs[beg:end] - ) - - dtype := arrow.FixedWidthTypes.Time64us - b := array.NewTime64Builder(pool, dtype.(*arrow.Time64Type)) - defer b.Release() - - b.AppendValues(vs, valids) - - arr := b.NewArray().(*array.Time64) - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Time64Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Time64) - defer slice.Release() - - if got, want := slice.NullN(), 1; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Time64Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestNewDate32Data(t *testing.T) { - exp := []arrow.Date32{1, 2, 4, 8, 16} - - dtype := &arrow.Date32Type{} - ad := array.NewData( - dtype, len(exp), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Date32Traits.CastToBytes(exp))}, - nil, 0, 0, - ) - fa := array.NewDate32Data(ad) - - assert.Equal(t, len(exp), fa.Len(), "unexpected Len()") - assert.Equal(t, exp, fa.Date32Values(), "unexpected Date32Values()") -} - -func TestDate32SliceData(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 4 - ) - - var ( - vs = []arrow.Date32{1, 2, 3, 4, 5} - sub = vs[beg:end] - ) - - b := array.NewDate32Builder(pool) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Date32) - defer arr.Release() - - if got, want := arr.Len(), len(vs); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Date32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Date32) - defer slice.Release() - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Date32Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestDate32SliceDataWithNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 5 - ) - - var ( - valids = []bool{true, true, true, false, true, true} - vs = []arrow.Date32{1, 2, 3, 0, 4, 5} - sub = vs[beg:end] - ) - - b := array.NewDate32Builder(pool) - defer b.Release() - - b.AppendValues(vs, valids) - - arr := b.NewArray().(*array.Date32) - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Date32Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Date32) - defer slice.Release() - - if got, want := slice.NullN(), 1; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Date32Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestNewDate64Data(t *testing.T) { - exp := []arrow.Date64{1, 2, 4, 8, 16} - - dtype := &arrow.Date64Type{} - ad := array.NewData( - dtype, len(exp), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Date64Traits.CastToBytes(exp))}, - nil, 0, 0, - ) - fa := array.NewDate64Data(ad) - - assert.Equal(t, len(exp), fa.Len(), "unexpected Len()") - assert.Equal(t, exp, fa.Date64Values(), "unexpected Date64Values()") -} - -func TestDate64SliceData(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 4 - ) - - var ( - vs = []arrow.Date64{1, 2, 3, 4, 5} - sub = vs[beg:end] - ) - - b := array.NewDate64Builder(pool) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Date64) - defer arr.Release() - - if got, want := arr.Len(), len(vs); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Date64Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Date64) - defer slice.Release() - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Date64Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestDate64SliceDataWithNull(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - const ( - beg = 2 - end = 5 - ) - - var ( - valids = []bool{true, true, true, false, true, true} - vs = []arrow.Date64{1, 2, 3, 0, 4, 5} - sub = vs[beg:end] - ) - - b := array.NewDate64Builder(pool) - defer b.Release() - - b.AppendValues(vs, valids) - - arr := b.NewArray().(*array.Date64) - defer arr.Release() - - if got, want := arr.Len(), len(valids); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.Date64Values(), vs; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - slice := array.NewSlice(arr, beg, end).(*array.Date64) - defer slice.Release() - - if got, want := slice.NullN(), 1; got != want { - t.Errorf("got=%d, want=%d", got, want) - } - - if got, want := slice.Len(), len(sub); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := slice.Date64Values(), sub; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } -} - -func TestInt64MarshalJSON(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []int64{-5474557666971701248} - ) - - b := array.NewInt64Builder(pool) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Int64) - defer arr.Release() - - jsonBytes, err := json.Marshal(arr) - if err != nil { - t.Fatal(err) - } - got := string(jsonBytes) - want := `[-5474557666971701248]` - if got != want { - t.Fatalf("got=%s, want=%s", got, want) - } -} - -func TestUInt64MarshalJSON(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - vs = []uint64{14697929703826477056} - ) - - b := array.NewUint64Builder(pool) - defer b.Release() - - for _, v := range vs { - b.Append(v) - } - - arr := b.NewArray().(*array.Uint64) - defer arr.Release() - - jsonBytes, err := json.Marshal(arr) - if err != nil { - t.Fatal(err) - } - got := string(jsonBytes) - want := `[14697929703826477056]` - if got != want { - t.Fatalf("got=%s, want=%s", got, want) - } -} diff --git a/go/arrow/array/numericbuilder.gen.go b/go/arrow/array/numericbuilder.gen.go deleted file mode 100644 index c80f0c7c9578e..0000000000000 --- a/go/arrow/array/numericbuilder.gen.go +++ /dev/null @@ -1,3664 +0,0 @@ -// Code generated by array/numericbuilder.gen.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "reflect" - "strconv" - "strings" - "sync/atomic" - "time" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type Int64Builder struct { - builder - - data *memory.Buffer - rawData []int64 -} - -func NewInt64Builder(mem memory.Allocator) *Int64Builder { - return &Int64Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Int64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int64 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Int64Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Int64Builder) Append(v int64) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Int64Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Int64Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Int64Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Int64Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Int64Builder) UnsafeAppend(v int64) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Int64Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Int64Builder) AppendValues(v []int64, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Int64Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Int64Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Int64Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Int64Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Int64Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Int64Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Int64Traits.BytesRequired(n)) - b.rawData = arrow.Int64Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Int64Builder) Value(i int) int64 { - return b.rawData[i] -} - -// NewArray creates a Int64 array from the memory buffers used by the builder and resets the Int64Builder -// so it can be used to build a new array. -func (b *Int64Builder) NewArray() arrow.Array { - return b.NewInt64Array() -} - -// NewInt64Array creates a Int64 array from the memory buffers used by the builder and resets the Int64Builder -// so it can be used to build a new array. -func (b *Int64Builder) NewInt64Array() (a *Int64) { - data := b.newData() - a = NewInt64Data(data) - data.Release() - return -} - -func (b *Int64Builder) newData() (data *Data) { - bytesRequired := arrow.Int64Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Int64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Int64Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseInt(s, 10, 8*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(int64(v)) - return nil -} - -func (b *Int64Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseInt(v, 10, 8*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(int64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int64(f)) - case float64: - b.Append(int64(v)) - case json.Number: - f, err := strconv.ParseInt(v.String(), 10, 8*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(int64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int64(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(int64(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Int64Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Int64Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Uint64Builder struct { - builder - - data *memory.Buffer - rawData []uint64 -} - -func NewUint64Builder(mem memory.Allocator) *Uint64Builder { - return &Uint64Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Uint64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint64 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Uint64Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Uint64Builder) Append(v uint64) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Uint64Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Uint64Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Uint64Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Uint64Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Uint64Builder) UnsafeAppend(v uint64) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Uint64Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Uint64Builder) AppendValues(v []uint64, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Uint64Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Uint64Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Uint64Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Uint64Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Uint64Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Uint64Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Uint64Traits.BytesRequired(n)) - b.rawData = arrow.Uint64Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Uint64Builder) Value(i int) uint64 { - return b.rawData[i] -} - -// NewArray creates a Uint64 array from the memory buffers used by the builder and resets the Uint64Builder -// so it can be used to build a new array. -func (b *Uint64Builder) NewArray() arrow.Array { - return b.NewUint64Array() -} - -// NewUint64Array creates a Uint64 array from the memory buffers used by the builder and resets the Uint64Builder -// so it can be used to build a new array. -func (b *Uint64Builder) NewUint64Array() (a *Uint64) { - data := b.newData() - a = NewUint64Data(data) - data.Release() - return -} - -func (b *Uint64Builder) newData() (data *Data) { - bytesRequired := arrow.Uint64Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Uint64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Uint64Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseUint(s, 10, 8*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(uint64(v)) - return nil -} - -func (b *Uint64Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseUint(v, 10, 8*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(uint64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint64(f)) - case float64: - b.Append(uint64(v)) - case json.Number: - f, err := strconv.ParseUint(v.String(), 10, 8*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(uint64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint64(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(uint64(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Uint64Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Uint64Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Float64Builder struct { - builder - - data *memory.Buffer - rawData []float64 -} - -func NewFloat64Builder(mem memory.Allocator) *Float64Builder { - return &Float64Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Float64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Float64 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Float64Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Float64Builder) Append(v float64) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Float64Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Float64Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Float64Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Float64Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Float64Builder) UnsafeAppend(v float64) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Float64Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Float64Builder) AppendValues(v []float64, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Float64Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Float64Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Float64Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Float64Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Float64Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Float64Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Float64Traits.BytesRequired(n)) - b.rawData = arrow.Float64Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Float64Builder) Value(i int) float64 { - return b.rawData[i] -} - -// NewArray creates a Float64 array from the memory buffers used by the builder and resets the Float64Builder -// so it can be used to build a new array. -func (b *Float64Builder) NewArray() arrow.Array { - return b.NewFloat64Array() -} - -// NewFloat64Array creates a Float64 array from the memory buffers used by the builder and resets the Float64Builder -// so it can be used to build a new array. -func (b *Float64Builder) NewFloat64Array() (a *Float64) { - data := b.newData() - a = NewFloat64Data(data) - data.Release() - return -} - -func (b *Float64Builder) newData() (data *Data) { - bytesRequired := arrow.Float64Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Float64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Float64Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseFloat(s, 8*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(float64(v)) - return nil -} - -func (b *Float64Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseFloat(v, 8*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(float64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(float64(f)) - case float64: - b.Append(float64(v)) - case json.Number: - f, err := strconv.ParseFloat(v.String(), 8*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(float64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(float64(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(float64(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Float64Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Float64Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Int32Builder struct { - builder - - data *memory.Buffer - rawData []int32 -} - -func NewInt32Builder(mem memory.Allocator) *Int32Builder { - return &Int32Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Int32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int32 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Int32Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Int32Builder) Append(v int32) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Int32Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Int32Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Int32Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Int32Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Int32Builder) UnsafeAppend(v int32) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Int32Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Int32Builder) AppendValues(v []int32, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Int32Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Int32Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Int32Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Int32Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Int32Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Int32Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Int32Traits.BytesRequired(n)) - b.rawData = arrow.Int32Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Int32Builder) Value(i int) int32 { - return b.rawData[i] -} - -// NewArray creates a Int32 array from the memory buffers used by the builder and resets the Int32Builder -// so it can be used to build a new array. -func (b *Int32Builder) NewArray() arrow.Array { - return b.NewInt32Array() -} - -// NewInt32Array creates a Int32 array from the memory buffers used by the builder and resets the Int32Builder -// so it can be used to build a new array. -func (b *Int32Builder) NewInt32Array() (a *Int32) { - data := b.newData() - a = NewInt32Data(data) - data.Release() - return -} - -func (b *Int32Builder) newData() (data *Data) { - bytesRequired := arrow.Int32Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Int32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Int32Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseInt(s, 10, 4*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(int32(v)) - return nil -} - -func (b *Int32Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseInt(v, 10, 4*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(int32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int32(f)) - case float64: - b.Append(int32(v)) - case json.Number: - f, err := strconv.ParseInt(v.String(), 10, 4*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(int32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int32(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(int32(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Int32Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Int32Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Uint32Builder struct { - builder - - data *memory.Buffer - rawData []uint32 -} - -func NewUint32Builder(mem memory.Allocator) *Uint32Builder { - return &Uint32Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Uint32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint32 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Uint32Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Uint32Builder) Append(v uint32) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Uint32Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Uint32Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Uint32Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Uint32Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Uint32Builder) UnsafeAppend(v uint32) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Uint32Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Uint32Builder) AppendValues(v []uint32, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Uint32Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Uint32Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Uint32Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Uint32Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Uint32Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Uint32Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Uint32Traits.BytesRequired(n)) - b.rawData = arrow.Uint32Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Uint32Builder) Value(i int) uint32 { - return b.rawData[i] -} - -// NewArray creates a Uint32 array from the memory buffers used by the builder and resets the Uint32Builder -// so it can be used to build a new array. -func (b *Uint32Builder) NewArray() arrow.Array { - return b.NewUint32Array() -} - -// NewUint32Array creates a Uint32 array from the memory buffers used by the builder and resets the Uint32Builder -// so it can be used to build a new array. -func (b *Uint32Builder) NewUint32Array() (a *Uint32) { - data := b.newData() - a = NewUint32Data(data) - data.Release() - return -} - -func (b *Uint32Builder) newData() (data *Data) { - bytesRequired := arrow.Uint32Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Uint32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Uint32Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseUint(s, 10, 4*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(uint32(v)) - return nil -} - -func (b *Uint32Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseUint(v, 10, 4*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(uint32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint32(f)) - case float64: - b.Append(uint32(v)) - case json.Number: - f, err := strconv.ParseUint(v.String(), 10, 4*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(uint32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint32(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(uint32(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Uint32Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Uint32Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Float32Builder struct { - builder - - data *memory.Buffer - rawData []float32 -} - -func NewFloat32Builder(mem memory.Allocator) *Float32Builder { - return &Float32Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Float32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Float32 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Float32Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Float32Builder) Append(v float32) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Float32Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Float32Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Float32Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Float32Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Float32Builder) UnsafeAppend(v float32) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Float32Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Float32Builder) AppendValues(v []float32, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Float32Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Float32Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Float32Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Float32Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Float32Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Float32Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Float32Traits.BytesRequired(n)) - b.rawData = arrow.Float32Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Float32Builder) Value(i int) float32 { - return b.rawData[i] -} - -// NewArray creates a Float32 array from the memory buffers used by the builder and resets the Float32Builder -// so it can be used to build a new array. -func (b *Float32Builder) NewArray() arrow.Array { - return b.NewFloat32Array() -} - -// NewFloat32Array creates a Float32 array from the memory buffers used by the builder and resets the Float32Builder -// so it can be used to build a new array. -func (b *Float32Builder) NewFloat32Array() (a *Float32) { - data := b.newData() - a = NewFloat32Data(data) - data.Release() - return -} - -func (b *Float32Builder) newData() (data *Data) { - bytesRequired := arrow.Float32Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Float32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Float32Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseFloat(s, 4*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(float32(v)) - return nil -} - -func (b *Float32Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseFloat(v, 4*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(float32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(float32(f)) - case float64: - b.Append(float32(v)) - case json.Number: - f, err := strconv.ParseFloat(v.String(), 4*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(float32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(float32(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(float32(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Float32Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Float32Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Int16Builder struct { - builder - - data *memory.Buffer - rawData []int16 -} - -func NewInt16Builder(mem memory.Allocator) *Int16Builder { - return &Int16Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Int16Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int16 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Int16Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Int16Builder) Append(v int16) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Int16Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Int16Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Int16Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Int16Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Int16Builder) UnsafeAppend(v int16) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Int16Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Int16Builder) AppendValues(v []int16, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Int16Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Int16Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Int16Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Int16Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Int16Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Int16Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Int16Traits.BytesRequired(n)) - b.rawData = arrow.Int16Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Int16Builder) Value(i int) int16 { - return b.rawData[i] -} - -// NewArray creates a Int16 array from the memory buffers used by the builder and resets the Int16Builder -// so it can be used to build a new array. -func (b *Int16Builder) NewArray() arrow.Array { - return b.NewInt16Array() -} - -// NewInt16Array creates a Int16 array from the memory buffers used by the builder and resets the Int16Builder -// so it can be used to build a new array. -func (b *Int16Builder) NewInt16Array() (a *Int16) { - data := b.newData() - a = NewInt16Data(data) - data.Release() - return -} - -func (b *Int16Builder) newData() (data *Data) { - bytesRequired := arrow.Int16Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Int16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Int16Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseInt(s, 10, 2*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(int16(v)) - return nil -} - -func (b *Int16Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseInt(v, 10, 2*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(int16(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int16(f)) - case float64: - b.Append(int16(v)) - case json.Number: - f, err := strconv.ParseInt(v.String(), 10, 2*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(int16(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int16(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(int16(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Int16Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Int16Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Uint16Builder struct { - builder - - data *memory.Buffer - rawData []uint16 -} - -func NewUint16Builder(mem memory.Allocator) *Uint16Builder { - return &Uint16Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Uint16Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint16 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Uint16Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Uint16Builder) Append(v uint16) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Uint16Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Uint16Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Uint16Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Uint16Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Uint16Builder) UnsafeAppend(v uint16) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Uint16Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Uint16Builder) AppendValues(v []uint16, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Uint16Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Uint16Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Uint16Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Uint16Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Uint16Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Uint16Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Uint16Traits.BytesRequired(n)) - b.rawData = arrow.Uint16Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Uint16Builder) Value(i int) uint16 { - return b.rawData[i] -} - -// NewArray creates a Uint16 array from the memory buffers used by the builder and resets the Uint16Builder -// so it can be used to build a new array. -func (b *Uint16Builder) NewArray() arrow.Array { - return b.NewUint16Array() -} - -// NewUint16Array creates a Uint16 array from the memory buffers used by the builder and resets the Uint16Builder -// so it can be used to build a new array. -func (b *Uint16Builder) NewUint16Array() (a *Uint16) { - data := b.newData() - a = NewUint16Data(data) - data.Release() - return -} - -func (b *Uint16Builder) newData() (data *Data) { - bytesRequired := arrow.Uint16Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Uint16, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Uint16Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseUint(s, 10, 2*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(uint16(v)) - return nil -} - -func (b *Uint16Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseUint(v, 10, 2*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(uint16(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint16(f)) - case float64: - b.Append(uint16(v)) - case json.Number: - f, err := strconv.ParseUint(v.String(), 10, 2*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(uint16(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint16(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(uint16(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Uint16Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Uint16Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Int8Builder struct { - builder - - data *memory.Buffer - rawData []int8 -} - -func NewInt8Builder(mem memory.Allocator) *Int8Builder { - return &Int8Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Int8Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Int8 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Int8Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Int8Builder) Append(v int8) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Int8Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Int8Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Int8Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Int8Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Int8Builder) UnsafeAppend(v int8) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Int8Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Int8Builder) AppendValues(v []int8, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Int8Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Int8Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Int8Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Int8Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Int8Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Int8Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Int8Traits.BytesRequired(n)) - b.rawData = arrow.Int8Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Int8Builder) Value(i int) int8 { - return b.rawData[i] -} - -// NewArray creates a Int8 array from the memory buffers used by the builder and resets the Int8Builder -// so it can be used to build a new array. -func (b *Int8Builder) NewArray() arrow.Array { - return b.NewInt8Array() -} - -// NewInt8Array creates a Int8 array from the memory buffers used by the builder and resets the Int8Builder -// so it can be used to build a new array. -func (b *Int8Builder) NewInt8Array() (a *Int8) { - data := b.newData() - a = NewInt8Data(data) - data.Release() - return -} - -func (b *Int8Builder) newData() (data *Data) { - bytesRequired := arrow.Int8Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Int8, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Int8Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseInt(s, 10, 1*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(int8(v)) - return nil -} - -func (b *Int8Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseInt(v, 10, 1*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(int8(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int8(f)) - case float64: - b.Append(int8(v)) - case json.Number: - f, err := strconv.ParseInt(v.String(), 10, 1*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(int8(0)), - Offset: dec.InputOffset(), - } - } - b.Append(int8(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(int8(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Int8Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Int8Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Uint8Builder struct { - builder - - data *memory.Buffer - rawData []uint8 -} - -func NewUint8Builder(mem memory.Allocator) *Uint8Builder { - return &Uint8Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Uint8Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Uint8 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Uint8Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Uint8Builder) Append(v uint8) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Uint8Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Uint8Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Uint8Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Uint8Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Uint8Builder) UnsafeAppend(v uint8) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Uint8Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Uint8Builder) AppendValues(v []uint8, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Uint8Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Uint8Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Uint8Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Uint8Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Uint8Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Uint8Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Uint8Traits.BytesRequired(n)) - b.rawData = arrow.Uint8Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Uint8Builder) Value(i int) uint8 { - return b.rawData[i] -} - -// NewArray creates a Uint8 array from the memory buffers used by the builder and resets the Uint8Builder -// so it can be used to build a new array. -func (b *Uint8Builder) NewArray() arrow.Array { - return b.NewUint8Array() -} - -// NewUint8Array creates a Uint8 array from the memory buffers used by the builder and resets the Uint8Builder -// so it can be used to build a new array. -func (b *Uint8Builder) NewUint8Array() (a *Uint8) { - data := b.newData() - a = NewUint8Data(data) - data.Release() - return -} - -func (b *Uint8Builder) newData() (data *Data) { - bytesRequired := arrow.Uint8Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Uint8, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Uint8Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - v, err := strconv.ParseUint(s, 10, 1*8) - if err != nil { - b.AppendNull() - return err - } - b.Append(uint8(v)) - return nil -} - -func (b *Uint8Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - - case string: - f, err := strconv.ParseUint(v, 10, 1*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(uint8(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint8(f)) - case float64: - b.Append(uint8(v)) - case json.Number: - f, err := strconv.ParseUint(v.String(), 10, 1*8) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(uint8(0)), - Offset: dec.InputOffset(), - } - } - b.Append(uint8(f)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(uint8(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Uint8Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Uint8Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Time32Builder struct { - builder - - dtype *arrow.Time32Type - data *memory.Buffer - rawData []arrow.Time32 -} - -func NewTime32Builder(mem memory.Allocator, dtype *arrow.Time32Type) *Time32Builder { - return &Time32Builder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} -} - -func (b *Time32Builder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Time32Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Time32Builder) Append(v arrow.Time32) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Time32Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Time32Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Time32Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Time32Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Time32Builder) UnsafeAppend(v arrow.Time32) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Time32Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Time32Builder) AppendValues(v []arrow.Time32, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Time32Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Time32Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Time32Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Time32Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Time32Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Time32Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Time32Traits.BytesRequired(n)) - b.rawData = arrow.Time32Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Time32Builder) Value(i int) arrow.Time32 { - return b.rawData[i] -} - -// NewArray creates a Time32 array from the memory buffers used by the builder and resets the Time32Builder -// so it can be used to build a new array. -func (b *Time32Builder) NewArray() arrow.Array { - return b.NewTime32Array() -} - -// NewTime32Array creates a Time32 array from the memory buffers used by the builder and resets the Time32Builder -// so it can be used to build a new array. -func (b *Time32Builder) NewTime32Array() (a *Time32) { - data := b.newData() - a = NewTime32Data(data) - data.Release() - return -} - -func (b *Time32Builder) newData() (data *Data) { - bytesRequired := arrow.Time32Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Time32Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - val, err := arrow.Time32FromString(s, b.dtype.Unit) - if err != nil { - b.AppendNull() - return err - } - b.Append(val) - return nil -} - -func (b *Time32Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - tm, err := arrow.Time32FromString(v, b.dtype.Unit) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Time32(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(tm) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Time32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Time32(n)) - case float64: - b.Append(arrow.Time32(v)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Time32(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Time32Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Time32Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Time64Builder struct { - builder - - dtype *arrow.Time64Type - data *memory.Buffer - rawData []arrow.Time64 -} - -func NewTime64Builder(mem memory.Allocator, dtype *arrow.Time64Type) *Time64Builder { - return &Time64Builder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} -} - -func (b *Time64Builder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Time64Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Time64Builder) Append(v arrow.Time64) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Time64Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Time64Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Time64Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Time64Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Time64Builder) UnsafeAppend(v arrow.Time64) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Time64Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Time64Builder) AppendValues(v []arrow.Time64, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Time64Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Time64Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Time64Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Time64Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Time64Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Time64Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Time64Traits.BytesRequired(n)) - b.rawData = arrow.Time64Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Time64Builder) Value(i int) arrow.Time64 { - return b.rawData[i] -} - -// NewArray creates a Time64 array from the memory buffers used by the builder and resets the Time64Builder -// so it can be used to build a new array. -func (b *Time64Builder) NewArray() arrow.Array { - return b.NewTime64Array() -} - -// NewTime64Array creates a Time64 array from the memory buffers used by the builder and resets the Time64Builder -// so it can be used to build a new array. -func (b *Time64Builder) NewTime64Array() (a *Time64) { - data := b.newData() - a = NewTime64Data(data) - data.Release() - return -} - -func (b *Time64Builder) newData() (data *Data) { - bytesRequired := arrow.Time64Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Time64Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - val, err := arrow.Time64FromString(s, b.dtype.Unit) - if err != nil { - b.AppendNull() - return err - } - b.Append(val) - return nil -} - -func (b *Time64Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - tm, err := arrow.Time64FromString(v, b.dtype.Unit) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Time64(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(tm) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Time64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Time64(n)) - case float64: - b.Append(arrow.Time64(v)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Time64(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Time64Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Time64Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Date32Builder struct { - builder - - data *memory.Buffer - rawData []arrow.Date32 -} - -func NewDate32Builder(mem memory.Allocator) *Date32Builder { - return &Date32Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Date32Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Date32 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Date32Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Date32Builder) Append(v arrow.Date32) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Date32Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Date32Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Date32Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Date32Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Date32Builder) UnsafeAppend(v arrow.Date32) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Date32Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Date32Builder) AppendValues(v []arrow.Date32, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Date32Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Date32Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Date32Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Date32Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Date32Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Date32Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Date32Traits.BytesRequired(n)) - b.rawData = arrow.Date32Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Date32Builder) Value(i int) arrow.Date32 { - return b.rawData[i] -} - -// NewArray creates a Date32 array from the memory buffers used by the builder and resets the Date32Builder -// so it can be used to build a new array. -func (b *Date32Builder) NewArray() arrow.Array { - return b.NewDate32Array() -} - -// NewDate32Array creates a Date32 array from the memory buffers used by the builder and resets the Date32Builder -// so it can be used to build a new array. -func (b *Date32Builder) NewDate32Array() (a *Date32) { - data := b.newData() - a = NewDate32Data(data) - data.Release() - return -} - -func (b *Date32Builder) newData() (data *Data) { - bytesRequired := arrow.Date32Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Date32, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Date32Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - tm, err := time.Parse("2006-01-02", s) - if err != nil { - b.AppendNull() - return err - } - b.Append(arrow.Date32FromTime(tm)) - return nil -} - -func (b *Date32Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - tm, err := time.Parse("2006-01-02", v) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Date32(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(arrow.Date32FromTime(tm)) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Date32(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Date32(n)) - case float64: - b.Append(arrow.Date32(v)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Date32(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Date32Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Date32Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type Date64Builder struct { - builder - - data *memory.Buffer - rawData []arrow.Date64 -} - -func NewDate64Builder(mem memory.Allocator) *Date64Builder { - return &Date64Builder{builder: builder{refCount: 1, mem: mem}} -} - -func (b *Date64Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.Date64 } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *Date64Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *Date64Builder) Append(v arrow.Date64) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *Date64Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *Date64Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *Date64Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *Date64Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *Date64Builder) UnsafeAppend(v arrow.Date64) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *Date64Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *Date64Builder) AppendValues(v []arrow.Date64, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.Date64Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *Date64Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.Date64Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.Date64Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *Date64Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *Date64Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.Date64Traits.BytesRequired(n)) - b.rawData = arrow.Date64Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *Date64Builder) Value(i int) arrow.Date64 { - return b.rawData[i] -} - -// NewArray creates a Date64 array from the memory buffers used by the builder and resets the Date64Builder -// so it can be used to build a new array. -func (b *Date64Builder) NewArray() arrow.Array { - return b.NewDate64Array() -} - -// NewDate64Array creates a Date64 array from the memory buffers used by the builder and resets the Date64Builder -// so it can be used to build a new array. -func (b *Date64Builder) NewDate64Array() (a *Date64) { - data := b.newData() - a = NewDate64Data(data) - data.Release() - return -} - -func (b *Date64Builder) newData() (data *Data) { - bytesRequired := arrow.Date64Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(arrow.PrimitiveTypes.Date64, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *Date64Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - tm, err := time.Parse("2006-01-02", s) - if err != nil { - b.AppendNull() - return err - } - b.Append(arrow.Date64FromTime(tm)) - return nil -} - -func (b *Date64Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - tm, err := time.Parse("2006-01-02", v) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Date64(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(arrow.Date64FromTime(tm)) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Date64(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Date64(n)) - case float64: - b.Append(arrow.Date64(v)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Date64(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *Date64Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *Date64Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type DurationBuilder struct { - builder - - dtype *arrow.DurationType - data *memory.Buffer - rawData []arrow.Duration -} - -func NewDurationBuilder(mem memory.Allocator, dtype *arrow.DurationType) *DurationBuilder { - return &DurationBuilder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} -} - -func (b *DurationBuilder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *DurationBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *DurationBuilder) Append(v arrow.Duration) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *DurationBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *DurationBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *DurationBuilder) AppendEmptyValue() { - b.Append(0) -} - -func (b *DurationBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *DurationBuilder) UnsafeAppend(v arrow.Duration) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *DurationBuilder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *DurationBuilder) AppendValues(v []arrow.Duration, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.DurationTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *DurationBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.DurationTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.DurationTraits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *DurationBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *DurationBuilder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.DurationTraits.BytesRequired(n)) - b.rawData = arrow.DurationTraits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *DurationBuilder) Value(i int) arrow.Duration { - return b.rawData[i] -} - -// NewArray creates a Duration array from the memory buffers used by the builder and resets the DurationBuilder -// so it can be used to build a new array. -func (b *DurationBuilder) NewArray() arrow.Array { - return b.NewDurationArray() -} - -// NewDurationArray creates a Duration array from the memory buffers used by the builder and resets the DurationBuilder -// so it can be used to build a new array. -func (b *DurationBuilder) NewDurationArray() (a *Duration) { - data := b.newData() - a = NewDurationData(data) - data.Release() - return -} - -func (b *DurationBuilder) newData() (data *Data) { - bytesRequired := arrow.DurationTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *DurationBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - dur, err := time.ParseDuration(s) - if err != nil { - return err - } - - b.Append(arrow.Duration(dur / b.dtype.Unit.Multiplier())) - return nil -} - -func (b *DurationBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Duration(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Duration(n)) - case float64: - b.Append(arrow.Duration(v)) - case string: - // be flexible for specifying durations by accepting forms like - // 3h2m0.5s regardless of the unit and converting it to the proper - // precision. - val, err := time.ParseDuration(v) - if err != nil { - // if we got an error, maybe it was because the attempt to create - // a time.Duration (int64) in nanoseconds would overflow. check if - // the string is just a large number followed by the unit suffix - if strings.HasSuffix(v, b.dtype.Unit.String()) { - value, err := strconv.ParseInt(v[:len(v)-len(b.dtype.Unit.String())], 10, 64) - if err == nil { - b.Append(arrow.Duration(value)) - break - } - } - - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Duration(0)), - Offset: dec.InputOffset(), - } - } - - switch b.dtype.Unit { - case arrow.Nanosecond: - b.Append(arrow.Duration(val.Nanoseconds())) - case arrow.Microsecond: - b.Append(arrow.Duration(val.Microseconds())) - case arrow.Millisecond: - b.Append(arrow.Duration(val.Milliseconds())) - case arrow.Second: - b.Append(arrow.Duration(val.Seconds())) - } - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Duration(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *DurationBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *DurationBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ Builder = (*Int64Builder)(nil) - _ Builder = (*Uint64Builder)(nil) - _ Builder = (*Float64Builder)(nil) - _ Builder = (*Int32Builder)(nil) - _ Builder = (*Uint32Builder)(nil) - _ Builder = (*Float32Builder)(nil) - _ Builder = (*Int16Builder)(nil) - _ Builder = (*Uint16Builder)(nil) - _ Builder = (*Int8Builder)(nil) - _ Builder = (*Uint8Builder)(nil) - _ Builder = (*Time32Builder)(nil) - _ Builder = (*Time64Builder)(nil) - _ Builder = (*Date32Builder)(nil) - _ Builder = (*Date64Builder)(nil) - _ Builder = (*DurationBuilder)(nil) -) diff --git a/go/arrow/array/numericbuilder.gen.go.tmpl b/go/arrow/array/numericbuilder.gen.go.tmpl deleted file mode 100644 index d8b92cf60cc39..0000000000000 --- a/go/arrow/array/numericbuilder.gen.go.tmpl +++ /dev/null @@ -1,447 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -{{range .In}} - -type {{.Name}}Builder struct { - builder - -{{if .Opt.Parametric -}} - dtype *arrow.{{.Name}}Type -{{end -}} - data *memory.Buffer - rawData []{{or .QualifiedType .Type}} -} - -{{if .Opt.Parametric}} -func New{{.Name}}Builder(mem memory.Allocator, dtype *arrow.{{.Name}}Type) *{{.Name}}Builder { - return &{{.Name}}Builder{builder: builder{refCount:1, mem: mem}, dtype: dtype} -} - -func (b *{{.Name}}Builder) Type() arrow.DataType { return b.dtype } - -{{else}} -func New{{.Name}}Builder(mem memory.Allocator) *{{.Name}}Builder { - return &{{.Name}}Builder{builder: builder{refCount:1, mem: mem}} -} - -func (b *{{.Name}}Builder) Type() arrow.DataType { return arrow.PrimitiveTypes.{{.Name}} } -{{end}} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *{{.Name}}Builder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *{{.Name}}Builder) Append(v {{or .QualifiedType .Type}}) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *{{.Name}}Builder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *{{.Name}}Builder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *{{.Name}}Builder) AppendEmptyValue() { - b.Append(0) -} - -func (b *{{.Name}}Builder) AppendEmptyValues(n int) { - for i := 0; i < n; i ++ { - b.AppendEmptyValue() - } -} - -func (b *{{.Name}}Builder) UnsafeAppend(v {{or .QualifiedType .Type}}) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *{{.Name}}Builder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *{{.Name}}Builder) AppendValues(v []{{or .QualifiedType .Type}}, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.{{.Name}}Traits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *{{.Name}}Builder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.{{.Name}}Traits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.{{.Name}}Traits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *{{.Name}}Builder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *{{.Name}}Builder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.{{.Name}}Traits.BytesRequired(n)) - b.rawData = arrow.{{.Name}}Traits.CastFromBytes(b.data.Bytes()) - } -} - -func (b *{{.Name}}Builder) Value(i int) {{or .QualifiedType .Type}} { - return b.rawData[i] -} - -// NewArray creates a {{.Name}} array from the memory buffers used by the builder and resets the {{.Name}}Builder -// so it can be used to build a new array. -func (b *{{.Name}}Builder) NewArray() arrow.Array { - return b.New{{.Name}}Array() -} - -// New{{.Name}}Array creates a {{.Name}} array from the memory buffers used by the builder and resets the {{.Name}}Builder -// so it can be used to build a new array. -func (b *{{.Name}}Builder) New{{.Name}}Array() (a *{{.Name}}) { - data := b.newData() - a = New{{.Name}}Data(data) - data.Release() - return -} - -func (b *{{.Name}}Builder) newData() (data *Data) { - bytesRequired := arrow.{{.Name}}Traits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } -{{if .Opt.Parametric -}} - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) -{{else -}} - data = NewData(arrow.PrimitiveTypes.{{.Name}}, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) -{{end -}} - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *{{.Name}}Builder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - {{if or (eq .Name "Date32") -}} - tm, err := time.Parse("2006-01-02", s) - if err != nil { - b.AppendNull() - return err - } - b.Append(arrow.Date32FromTime(tm)) - {{else if or (eq .Name "Date64") -}} - tm, err := time.Parse("2006-01-02", s) - if err != nil { - b.AppendNull() - return err - } - b.Append(arrow.Date64FromTime(tm)) - {{else if or (eq .Name "Time32") -}} - val, err := arrow.Time32FromString(s, b.dtype.Unit) - if err != nil { - b.AppendNull() - return err - } - b.Append(val) - {{else if or (eq .Name "Time64") -}} - val, err := arrow.Time64FromString(s, b.dtype.Unit) - if err != nil { - b.AppendNull() - return err - } - b.Append(val) - {{else if (eq .Name "Duration") -}} - dur, err := time.ParseDuration(s) - if err != nil { - return err - } - - b.Append(arrow.Duration(dur / b.dtype.Unit.Multiplier())) - {{else if or (eq .Name "Int8") (eq .Name "Int16") (eq .Name "Int32") (eq .Name "Int64") -}} - v, err := strconv.ParseInt(s, 10, {{.Size}} * 8) - if err != nil { - b.AppendNull() - return err - } - b.Append({{.name}}(v)) - {{else if or (eq .Name "Uint8") (eq .Name "Uint16") (eq .Name "Uint32") (eq .Name "Uint64") -}} - v, err := strconv.ParseUint(s, 10, {{.Size}} * 8) - if err != nil { - b.AppendNull() - return err - } - b.Append({{.name}}(v)) - {{else if or (eq .Name "Float32") (eq .Name "Float64") -}} - v, err := strconv.ParseFloat(s, {{.Size}} * 8) - if err != nil { - b.AppendNull() - return err - } - b.Append({{.name}}(v)) - {{end -}} - return nil -} - -func (b *{{.Name}}Builder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() -{{if or (eq .Name "Date32") (eq .Name "Date64") -}} - case string: - tm, err := time.Parse("2006-01-02", v) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf({{.QualifiedType}}(0)), - Offset: dec.InputOffset(), - } - } - - b.Append({{.QualifiedType}}FromTime(tm)) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf({{.QualifiedType}}(0)), - Offset: dec.InputOffset(), - } - } - b.Append({{.QualifiedType}}(n)) - case float64: - b.Append({{.QualifiedType}}(v)) -{{else if or (eq .Name "Time32") (eq .Name "Time64") -}} - case string: - tm, err := {{.QualifiedType}}FromString(v, b.dtype.Unit) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf({{.QualifiedType}}(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(tm) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf({{.QualifiedType}}(0)), - Offset: dec.InputOffset(), - } - } - b.Append({{.QualifiedType}}(n)) - case float64: - b.Append({{.QualifiedType}}(v)) -{{else if eq .Name "Duration" -}} - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf({{.QualifiedType}}(0)), - Offset: dec.InputOffset(), - } - } - b.Append({{.QualifiedType}}(n)) - case float64: - b.Append({{.QualifiedType}}(v)) - case string: - // be flexible for specifying durations by accepting forms like - // 3h2m0.5s regardless of the unit and converting it to the proper - // precision. - val, err := time.ParseDuration(v) - if err != nil { - // if we got an error, maybe it was because the attempt to create - // a time.Duration (int64) in nanoseconds would overflow. check if - // the string is just a large number followed by the unit suffix - if strings.HasSuffix(v, b.dtype.Unit.String()) { - value, err := strconv.ParseInt(v[:len(v)-len(b.dtype.Unit.String())], 10, 64) - if err == nil { - b.Append(arrow.Duration(value)) - break - } - } - - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf({{.QualifiedType}}(0)), - Offset: dec.InputOffset(), - } - } - - switch b.dtype.Unit { - case arrow.Nanosecond: - b.Append({{.QualifiedType}}(val.Nanoseconds())) - case arrow.Microsecond: - b.Append({{.QualifiedType}}(val.Microseconds())) - case arrow.Millisecond: - b.Append({{.QualifiedType}}(val.Milliseconds())) - case arrow.Second: - b.Append({{.QualifiedType}}(val.Seconds())) - } -{{else}} - case string: -{{if or (eq .Name "Float32") (eq .Name "Float64") -}} - f, err := strconv.ParseFloat(v, {{.Size}}*8) -{{else if eq (printf "%.1s" .Name) "U" -}} - f, err := strconv.ParseUint(v, 10, {{.Size}}*8) -{{else -}} - f, err := strconv.ParseInt(v, 10, {{.Size}}*8) -{{end -}} - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf({{.name}}(0)), - Offset: dec.InputOffset(), - } - } - b.Append({{.name}}(f)) - case float64: - b.Append({{.name}}(v)) - case json.Number: -{{if or (eq .Name "Float32") (eq .Name "Float64") -}} - f, err := strconv.ParseFloat(v.String(), {{.Size}}*8) -{{else if eq (printf "%.1s" .Name) "U" -}} - f, err := strconv.ParseUint(v.String(), 10, {{.Size}}*8) -{{else -}} - f, err := strconv.ParseInt(v.String(), 10, {{.Size}}*8) -{{end -}} - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf({{.name}}(0)), - Offset: dec.InputOffset(), - } - } - b.Append({{.name}}(f)) -{{end}} - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf({{or .QualifiedType .Type}}(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *{{.Name}}Builder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *{{.Name}}Builder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} -{{end}} - -var ( -{{- range .In}} - _ Builder = (*{{.Name}}Builder)(nil) -{{- end}} -) diff --git a/go/arrow/array/numericbuilder.gen_test.go b/go/arrow/array/numericbuilder.gen_test.go deleted file mode 100644 index 8adf86853b7c7..0000000000000 --- a/go/arrow/array/numericbuilder.gen_test.go +++ /dev/null @@ -1,3125 +0,0 @@ -// Code generated by array/numericbuilder.gen_test.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "math" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestInt64StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewInt64Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Int64) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewInt64Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Int64) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewInt64Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt64Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewInt64Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewInt64Array() - - // check state of builder after NewInt64Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewInt64Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewInt64Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewInt64Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []int64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int64Values(), "unexpected Int64Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Int64Values(), 10, "unexpected length of Int64Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewInt64Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []int64{7, 8}, a.Int64Values()) - assert.Len(t, a.Int64Values(), 2) - - a.Release() - - var ( - want = []int64{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewInt64Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Int64); !ok { - t.Fatalf("could not type-assert to array.Int64") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Int64) - if !ok { - t.Fatalf("could not type-assert to array.Int64") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestInt64Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt64Builder(mem) - defer ab.Release() - - exp := []int64{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewInt64Array() - assert.Equal(t, exp, a.Int64Values()) - - a.Release() -} - -func TestInt64Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt64Builder(mem) - defer ab.Release() - - exp := []int64{0, 1, 2, 3} - - ab.AppendValues([]int64{}, nil) - a := ab.NewInt64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewInt64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]int64{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewInt64Array() - assert.Equal(t, exp, a.Int64Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]int64{}, nil) - a = ab.NewInt64Array() - assert.Equal(t, exp, a.Int64Values()) - a.Release() -} - -func TestInt64Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt64Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestUint64StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewUint64Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Uint64) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewUint64Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Uint64) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewUint64Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint64Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewUint64Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewUint64Array() - - // check state of builder after NewUint64Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewUint64Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewUint64Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewUint64Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []uint64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Uint64Values(), "unexpected Uint64Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Uint64Values(), 10, "unexpected length of Uint64Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewUint64Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []uint64{7, 8}, a.Uint64Values()) - assert.Len(t, a.Uint64Values(), 2) - - a.Release() - - var ( - want = []uint64{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewUint64Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Uint64); !ok { - t.Fatalf("could not type-assert to array.Uint64") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Uint64) - if !ok { - t.Fatalf("could not type-assert to array.Uint64") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestUint64Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint64Builder(mem) - defer ab.Release() - - exp := []uint64{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewUint64Array() - assert.Equal(t, exp, a.Uint64Values()) - - a.Release() -} - -func TestUint64Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint64Builder(mem) - defer ab.Release() - - exp := []uint64{0, 1, 2, 3} - - ab.AppendValues([]uint64{}, nil) - a := ab.NewUint64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewUint64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]uint64{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewUint64Array() - assert.Equal(t, exp, a.Uint64Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]uint64{}, nil) - a = ab.NewUint64Array() - assert.Equal(t, exp, a.Uint64Values()) - a.Release() -} - -func TestUint64Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint64Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestFloat64StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewFloat64Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Float64) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewFloat64Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Float64) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewFloat64Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat64Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewFloat64Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewFloat64Array() - - // check state of builder after NewFloat64Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewFloat64Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewFloat64Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewFloat64Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []float64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Float64Values(), "unexpected Float64Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Float64Values(), 10, "unexpected length of Float64Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewFloat64Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []float64{7, 8}, a.Float64Values()) - assert.Len(t, a.Float64Values(), 2) - - a.Release() - - var ( - want = []float64{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewFloat64Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Float64); !ok { - t.Fatalf("could not type-assert to array.Float64") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Float64) - if !ok { - t.Fatalf("could not type-assert to array.Float64") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestFloat64Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat64Builder(mem) - defer ab.Release() - - exp := []float64{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewFloat64Array() - assert.Equal(t, exp, a.Float64Values()) - - a.Release() -} - -func TestFloat64Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat64Builder(mem) - defer ab.Release() - - exp := []float64{0, 1, 2, 3} - - ab.AppendValues([]float64{}, nil) - a := ab.NewFloat64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewFloat64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]float64{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewFloat64Array() - assert.Equal(t, exp, a.Float64Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]float64{}, nil) - a = ab.NewFloat64Array() - assert.Equal(t, exp, a.Float64Values()) - a.Release() -} - -func TestFloat64Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat64Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestFloat64BuilderUnmarshalJSON(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - bldr := array.NewFloat64Builder(mem) - defer bldr.Release() - - jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` - - err := bldr.UnmarshalJSON([]byte(jsonstr)) - assert.NoError(t, err) - - arr := bldr.NewFloat64Array() - defer arr.Release() - - assert.NotNil(t, arr) - - assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) - assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) - assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) -} - -func TestInt32StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewInt32Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Int32) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewInt32Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Int32) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewInt32Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt32Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewInt32Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewInt32Array() - - // check state of builder after NewInt32Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewInt32Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewInt32Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewInt32Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []int32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int32Values(), "unexpected Int32Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Int32Values(), 10, "unexpected length of Int32Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewInt32Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []int32{7, 8}, a.Int32Values()) - assert.Len(t, a.Int32Values(), 2) - - a.Release() - - var ( - want = []int32{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewInt32Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Int32); !ok { - t.Fatalf("could not type-assert to array.Int32") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Int32) - if !ok { - t.Fatalf("could not type-assert to array.Int32") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestInt32Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt32Builder(mem) - defer ab.Release() - - exp := []int32{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewInt32Array() - assert.Equal(t, exp, a.Int32Values()) - - a.Release() -} - -func TestInt32Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt32Builder(mem) - defer ab.Release() - - exp := []int32{0, 1, 2, 3} - - ab.AppendValues([]int32{}, nil) - a := ab.NewInt32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewInt32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]int32{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewInt32Array() - assert.Equal(t, exp, a.Int32Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]int32{}, nil) - a = ab.NewInt32Array() - assert.Equal(t, exp, a.Int32Values()) - a.Release() -} - -func TestInt32Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt32Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestUint32StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewUint32Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Uint32) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewUint32Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Uint32) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewUint32Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint32Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewUint32Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewUint32Array() - - // check state of builder after NewUint32Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewUint32Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewUint32Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewUint32Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []uint32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Uint32Values(), "unexpected Uint32Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Uint32Values(), 10, "unexpected length of Uint32Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewUint32Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []uint32{7, 8}, a.Uint32Values()) - assert.Len(t, a.Uint32Values(), 2) - - a.Release() - - var ( - want = []uint32{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewUint32Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Uint32); !ok { - t.Fatalf("could not type-assert to array.Uint32") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Uint32) - if !ok { - t.Fatalf("could not type-assert to array.Uint32") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestUint32Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint32Builder(mem) - defer ab.Release() - - exp := []uint32{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewUint32Array() - assert.Equal(t, exp, a.Uint32Values()) - - a.Release() -} - -func TestUint32Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint32Builder(mem) - defer ab.Release() - - exp := []uint32{0, 1, 2, 3} - - ab.AppendValues([]uint32{}, nil) - a := ab.NewUint32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewUint32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]uint32{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewUint32Array() - assert.Equal(t, exp, a.Uint32Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]uint32{}, nil) - a = ab.NewUint32Array() - assert.Equal(t, exp, a.Uint32Values()) - a.Release() -} - -func TestUint32Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint32Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestFloat32StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewFloat32Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Float32) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewFloat32Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Float32) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewFloat32Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat32Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewFloat32Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewFloat32Array() - - // check state of builder after NewFloat32Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewFloat32Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewFloat32Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewFloat32Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []float32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Float32Values(), "unexpected Float32Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Float32Values(), 10, "unexpected length of Float32Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewFloat32Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []float32{7, 8}, a.Float32Values()) - assert.Len(t, a.Float32Values(), 2) - - a.Release() - - var ( - want = []float32{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewFloat32Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Float32); !ok { - t.Fatalf("could not type-assert to array.Float32") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Float32) - if !ok { - t.Fatalf("could not type-assert to array.Float32") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestFloat32Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat32Builder(mem) - defer ab.Release() - - exp := []float32{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewFloat32Array() - assert.Equal(t, exp, a.Float32Values()) - - a.Release() -} - -func TestFloat32Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat32Builder(mem) - defer ab.Release() - - exp := []float32{0, 1, 2, 3} - - ab.AppendValues([]float32{}, nil) - a := ab.NewFloat32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewFloat32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]float32{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewFloat32Array() - assert.Equal(t, exp, a.Float32Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]float32{}, nil) - a = ab.NewFloat32Array() - assert.Equal(t, exp, a.Float32Values()) - a.Release() -} - -func TestFloat32Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewFloat32Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestFloat32BuilderUnmarshalJSON(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - bldr := array.NewFloat32Builder(mem) - defer bldr.Release() - - jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` - - err := bldr.UnmarshalJSON([]byte(jsonstr)) - assert.NoError(t, err) - - arr := bldr.NewFloat32Array() - defer arr.Release() - - assert.NotNil(t, arr) - - assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) - assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) - assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) -} - -func TestInt16StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewInt16Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Int16) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewInt16Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Int16) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewInt16Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt16Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewInt16Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewInt16Array() - - // check state of builder after NewInt16Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewInt16Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewInt16Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewInt16Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []int16{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int16Values(), "unexpected Int16Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Int16Values(), 10, "unexpected length of Int16Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewInt16Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []int16{7, 8}, a.Int16Values()) - assert.Len(t, a.Int16Values(), 2) - - a.Release() - - var ( - want = []int16{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewInt16Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Int16); !ok { - t.Fatalf("could not type-assert to array.Int16") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Int16) - if !ok { - t.Fatalf("could not type-assert to array.Int16") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestInt16Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt16Builder(mem) - defer ab.Release() - - exp := []int16{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewInt16Array() - assert.Equal(t, exp, a.Int16Values()) - - a.Release() -} - -func TestInt16Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt16Builder(mem) - defer ab.Release() - - exp := []int16{0, 1, 2, 3} - - ab.AppendValues([]int16{}, nil) - a := ab.NewInt16Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewInt16Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]int16{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewInt16Array() - assert.Equal(t, exp, a.Int16Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]int16{}, nil) - a = ab.NewInt16Array() - assert.Equal(t, exp, a.Int16Values()) - a.Release() -} - -func TestInt16Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt16Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestUint16StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewUint16Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Uint16) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewUint16Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Uint16) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewUint16Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint16Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewUint16Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewUint16Array() - - // check state of builder after NewUint16Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewUint16Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewUint16Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewUint16Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []uint16{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Uint16Values(), "unexpected Uint16Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Uint16Values(), 10, "unexpected length of Uint16Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewUint16Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []uint16{7, 8}, a.Uint16Values()) - assert.Len(t, a.Uint16Values(), 2) - - a.Release() - - var ( - want = []uint16{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewUint16Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Uint16); !ok { - t.Fatalf("could not type-assert to array.Uint16") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Uint16) - if !ok { - t.Fatalf("could not type-assert to array.Uint16") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestUint16Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint16Builder(mem) - defer ab.Release() - - exp := []uint16{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewUint16Array() - assert.Equal(t, exp, a.Uint16Values()) - - a.Release() -} - -func TestUint16Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint16Builder(mem) - defer ab.Release() - - exp := []uint16{0, 1, 2, 3} - - ab.AppendValues([]uint16{}, nil) - a := ab.NewUint16Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewUint16Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]uint16{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewUint16Array() - assert.Equal(t, exp, a.Uint16Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]uint16{}, nil) - a = ab.NewUint16Array() - assert.Equal(t, exp, a.Uint16Values()) - a.Release() -} - -func TestUint16Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint16Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestInt8StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewInt8Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Int8) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewInt8Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Int8) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewInt8Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt8Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewInt8Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewInt8Array() - - // check state of builder after NewInt8Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewInt8Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewInt8Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewInt8Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []int8{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Int8Values(), "unexpected Int8Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Int8Values(), 10, "unexpected length of Int8Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewInt8Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []int8{7, 8}, a.Int8Values()) - assert.Len(t, a.Int8Values(), 2) - - a.Release() - - var ( - want = []int8{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewInt8Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Int8); !ok { - t.Fatalf("could not type-assert to array.Int8") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Int8) - if !ok { - t.Fatalf("could not type-assert to array.Int8") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestInt8Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt8Builder(mem) - defer ab.Release() - - exp := []int8{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewInt8Array() - assert.Equal(t, exp, a.Int8Values()) - - a.Release() -} - -func TestInt8Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt8Builder(mem) - defer ab.Release() - - exp := []int8{0, 1, 2, 3} - - ab.AppendValues([]int8{}, nil) - a := ab.NewInt8Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewInt8Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]int8{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewInt8Array() - assert.Equal(t, exp, a.Int8Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]int8{}, nil) - a = ab.NewInt8Array() - assert.Equal(t, exp, a.Int8Values()) - a.Release() -} - -func TestInt8Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewInt8Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestUint8StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewUint8Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Uint8) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewUint8Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Uint8) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewUint8Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint8Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewUint8Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewUint8Array() - - // check state of builder after NewUint8Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewUint8Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewUint8Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewUint8Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []uint8{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Uint8Values(), "unexpected Uint8Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Uint8Values(), 10, "unexpected length of Uint8Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewUint8Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []uint8{7, 8}, a.Uint8Values()) - assert.Len(t, a.Uint8Values(), 2) - - a.Release() - - var ( - want = []uint8{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewUint8Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Uint8); !ok { - t.Fatalf("could not type-assert to array.Uint8") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Uint8) - if !ok { - t.Fatalf("could not type-assert to array.Uint8") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestUint8Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint8Builder(mem) - defer ab.Release() - - exp := []uint8{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewUint8Array() - assert.Equal(t, exp, a.Uint8Values()) - - a.Release() -} - -func TestUint8Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint8Builder(mem) - defer ab.Release() - - exp := []uint8{0, 1, 2, 3} - - ab.AppendValues([]uint8{}, nil) - a := ab.NewUint8Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewUint8Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]uint8{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewUint8Array() - assert.Equal(t, exp, a.Uint8Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]uint8{}, nil) - a = ab.NewUint8Array() - assert.Equal(t, exp, a.Uint8Values()) - a.Release() -} - -func TestUint8Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewUint8Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestTime32StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.Time32Type{Unit: arrow.Second} - b := array.NewTime32Builder(mem, dt) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Time32) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewTime32Builder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Time32) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewTime32Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time32Type{Unit: arrow.Second} - ab := array.NewTime32Builder(mem, dtype) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewTime32Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewTime32Array() - - // check state of builder after NewTime32Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewTime32Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewTime32Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewTime32Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Time32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Time32Values(), "unexpected Time32Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Time32Values(), 10, "unexpected length of Time32Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewTime32Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Time32{7, 8}, a.Time32Values()) - assert.Len(t, a.Time32Values(), 2) - - a.Release() - - var ( - want = []arrow.Time32{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewTime32Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Time32); !ok { - t.Fatalf("could not type-assert to array.Time32") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Time32) - if !ok { - t.Fatalf("could not type-assert to array.Time32") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestTime32Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time32Type{Unit: arrow.Second} - ab := array.NewTime32Builder(mem, dtype) - defer ab.Release() - - exp := []arrow.Time32{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewTime32Array() - assert.Equal(t, exp, a.Time32Values()) - - a.Release() -} - -func TestTime32Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time32Type{Unit: arrow.Second} - ab := array.NewTime32Builder(mem, dtype) - defer ab.Release() - - exp := []arrow.Time32{0, 1, 2, 3} - - ab.AppendValues([]arrow.Time32{}, nil) - a := ab.NewTime32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewTime32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Time32{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewTime32Array() - assert.Equal(t, exp, a.Time32Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Time32{}, nil) - a = ab.NewTime32Array() - assert.Equal(t, exp, a.Time32Values()) - a.Release() -} - -func TestTime32Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time32Type{Unit: arrow.Second} - ab := array.NewTime32Builder(mem, dtype) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestTime64StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.Time64Type{Unit: arrow.Microsecond} - b := array.NewTime64Builder(mem, dt) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Time64) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewTime64Builder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Time64) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewTime64Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time64Type{Unit: arrow.Second} - ab := array.NewTime64Builder(mem, dtype) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewTime64Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewTime64Array() - - // check state of builder after NewTime64Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewTime64Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewTime64Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewTime64Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Time64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Time64Values(), "unexpected Time64Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Time64Values(), 10, "unexpected length of Time64Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewTime64Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Time64{7, 8}, a.Time64Values()) - assert.Len(t, a.Time64Values(), 2) - - a.Release() - - var ( - want = []arrow.Time64{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewTime64Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Time64); !ok { - t.Fatalf("could not type-assert to array.Time64") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Time64) - if !ok { - t.Fatalf("could not type-assert to array.Time64") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestTime64Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time64Type{Unit: arrow.Second} - ab := array.NewTime64Builder(mem, dtype) - defer ab.Release() - - exp := []arrow.Time64{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewTime64Array() - assert.Equal(t, exp, a.Time64Values()) - - a.Release() -} - -func TestTime64Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time64Type{Unit: arrow.Second} - ab := array.NewTime64Builder(mem, dtype) - defer ab.Release() - - exp := []arrow.Time64{0, 1, 2, 3} - - ab.AppendValues([]arrow.Time64{}, nil) - a := ab.NewTime64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewTime64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Time64{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewTime64Array() - assert.Equal(t, exp, a.Time64Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Time64{}, nil) - a = ab.NewTime64Array() - assert.Equal(t, exp, a.Time64Values()) - a.Release() -} - -func TestTime64Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.Time64Type{Unit: arrow.Second} - ab := array.NewTime64Builder(mem, dtype) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestDate32StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewDate32Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Date32) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDate32Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Date32) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewDate32Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate32Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewDate32Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewDate32Array() - - // check state of builder after NewDate32Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDate32Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDate32Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDate32Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Date32{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Date32Values(), "unexpected Date32Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Date32Values(), 10, "unexpected length of Date32Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewDate32Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Date32{7, 8}, a.Date32Values()) - assert.Len(t, a.Date32Values(), 2) - - a.Release() - - var ( - want = []arrow.Date32{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewDate32Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Date32); !ok { - t.Fatalf("could not type-assert to array.Date32") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Date32) - if !ok { - t.Fatalf("could not type-assert to array.Date32") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestDate32Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate32Builder(mem) - defer ab.Release() - - exp := []arrow.Date32{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewDate32Array() - assert.Equal(t, exp, a.Date32Values()) - - a.Release() -} - -func TestDate32Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate32Builder(mem) - defer ab.Release() - - exp := []arrow.Date32{0, 1, 2, 3} - - ab.AppendValues([]arrow.Date32{}, nil) - a := ab.NewDate32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewDate32Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Date32{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewDate32Array() - assert.Equal(t, exp, a.Date32Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Date32{}, nil) - a = ab.NewDate32Array() - assert.Equal(t, exp, a.Date32Values()) - a.Release() -} - -func TestDate32Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate32Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestDate64StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := array.NewDate64Builder(mem) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Date64) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDate64Builder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Date64) - defer arr1.Release() - - assert.Exactly(t, arr.Len(), arr1.Len()) - for i := 0; i < arr.Len(); i++ { - assert.Exactly(t, arr.IsValid(i), arr1.IsValid(i)) - assert.Exactly(t, arr.ValueStr(i), arr1.ValueStr(i)) - if arr.IsValid(i) { - assert.Exactly(t, arr.Value(i).ToTime(), arr1.Value(i).ToTime()) - } - } -} - -func TestNewDate64Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate64Builder(mem) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewDate64Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewDate64Array() - - // check state of builder after NewDate64Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDate64Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDate64Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDate64Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Date64{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.Date64Values(), "unexpected Date64Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.Date64Values(), 10, "unexpected length of Date64Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewDate64Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Date64{7, 8}, a.Date64Values()) - assert.Len(t, a.Date64Values(), 2) - - a.Release() - - var ( - want = []arrow.Date64{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewDate64Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Date64); !ok { - t.Fatalf("could not type-assert to array.Date64") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Date64) - if !ok { - t.Fatalf("could not type-assert to array.Date64") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestDate64Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate64Builder(mem) - defer ab.Release() - - exp := []arrow.Date64{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewDate64Array() - assert.Equal(t, exp, a.Date64Values()) - - a.Release() -} - -func TestDate64Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate64Builder(mem) - defer ab.Release() - - exp := []arrow.Date64{0, 1, 2, 3} - - ab.AppendValues([]arrow.Date64{}, nil) - a := ab.NewDate64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewDate64Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Date64{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewDate64Array() - assert.Equal(t, exp, a.Date64Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Date64{}, nil) - a = ab.NewDate64Array() - assert.Equal(t, exp, a.Date64Values()) - a.Release() -} - -func TestDate64Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ab := array.NewDate64Builder(mem) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestDurationStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.DurationType{Unit: arrow.Second} - b := array.NewDurationBuilder(mem, dt) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Duration) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDurationBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Duration) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewDurationBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.DurationType{Unit: arrow.Second} - ab := array.NewDurationBuilder(mem, dtype) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before NewDurationArray - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewDurationArray() - - // check state of builder after NewDurationArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewDurationArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewDurationArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewDurationArray did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Duration{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.DurationValues(), "unexpected DurationValues") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.DurationValues(), 10, "unexpected length of DurationValues") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewDurationArray() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Duration{7, 8}, a.DurationValues()) - assert.Len(t, a.DurationValues(), 2) - - a.Release() - - var ( - want = []arrow.Duration{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewDurationArray() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Duration); !ok { - t.Fatalf("could not type-assert to array.Duration") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Duration) - if !ok { - t.Fatalf("could not type-assert to array.Duration") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestDurationBuilder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.DurationType{Unit: arrow.Second} - ab := array.NewDurationBuilder(mem, dtype) - defer ab.Release() - - exp := []arrow.Duration{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewDurationArray() - assert.Equal(t, exp, a.DurationValues()) - - a.Release() -} - -func TestDurationBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.DurationType{Unit: arrow.Second} - ab := array.NewDurationBuilder(mem, dtype) - defer ab.Release() - - exp := []arrow.Duration{0, 1, 2, 3} - - ab.AppendValues([]arrow.Duration{}, nil) - a := ab.NewDurationArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewDurationArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Duration{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewDurationArray() - assert.Equal(t, exp, a.DurationValues()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Duration{}, nil) - a = ab.NewDurationArray() - assert.Equal(t, exp, a.DurationValues()) - a.Release() -} - -func TestDurationBuilder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.DurationType{Unit: arrow.Second} - ab := array.NewDurationBuilder(mem, dtype) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} diff --git a/go/arrow/array/numericbuilder.gen_test.go.tmpl b/go/arrow/array/numericbuilder.gen_test.go.tmpl deleted file mode 100644 index f3cd08a63745d..0000000000000 --- a/go/arrow/array/numericbuilder.gen_test.go.tmpl +++ /dev/null @@ -1,299 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -{{range .In}} -func Test{{.Name}}StringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - -{{if .Opt.Parametric -}} -{{ if or (eq .Name "Time64") -}} - dt := &arrow.{{.Name}}Type{Unit: arrow.Microsecond} -{{else -}} - dt := &arrow.{{.Name}}Type{Unit: arrow.Second} -{{end -}} - b := array.New{{.Name}}Builder(mem, dt) -{{else -}} - b := array.New{{.Name}}Builder(mem) -{{end -}} - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.{{.Name}}) - defer arr.Release() - - // 2. create array via AppendValueFromString -{{if .Opt.Parametric -}} - b1 := array.New{{.Name}}Builder(mem, dt) -{{else -}} - b1 := array.New{{.Name}}Builder(mem) -{{end -}} - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.{{.Name}}) - defer arr1.Release() - -{{ if or (eq .Name "Date64") -}} - assert.Exactly(t, arr.Len(), arr1.Len()) - for i := 0; i < arr.Len(); i++ { - assert.Exactly(t, arr.IsValid(i), arr1.IsValid(i)) - assert.Exactly(t, arr.ValueStr(i), arr1.ValueStr(i)) - if arr.IsValid(i) { - assert.Exactly(t, arr.Value(i).ToTime(), arr1.Value(i).ToTime()) - } - } -{{else -}} - assert.True(t, array.Equal(arr, arr1)) -{{end -}} -} - -func TestNew{{.Name}}Builder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - -{{if .Opt.Parametric -}} - dtype := &arrow.{{.Name}}Type{Unit: arrow.Second} - ab := array.New{{.Name}}Builder(mem, dtype) -{{else}} - ab := array.New{{.Name}}Builder(mem) -{{end -}} - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - - // check state of builder before New{{.Name}}Array - assert.Equal(t, 10, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.New{{.Name}}Array() - - // check state of builder after New{{.Name}}Array - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), New{{.Name}}Array did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), New{{.Name}}Array did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), New{{.Name}}Array did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []{{or .QualifiedType .Type}}{1, 2, 3, 0, 5, 6, 0, 8, 9, 10}, a.{{.Name}}Values(), "unexpected {{.Name}}Values") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.{{.Name}}Values(), 10, "unexpected length of {{.Name}}Values") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.New{{.Name}}Array() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []{{or .QualifiedType .Type}}{7, 8}, a.{{.Name}}Values()) - assert.Len(t, a.{{.Name}}Values(), 2) - - a.Release() - - var ( - want = []{{or .QualifiedType .Type}}{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.New{{.Name}}Array() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.{{.Name}}); !ok { - t.Fatalf("could not type-assert to array.{{.Name}}") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.{{.Name}}) - if !ok { - t.Fatalf("could not type-assert to array.{{.Name}}") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func Test{{.Name}}Builder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - -{{if .Opt.Parametric -}} - dtype := &arrow.{{.Name}}Type{Unit: arrow.Second} - ab := array.New{{.Name}}Builder(mem, dtype) -{{else}} - ab := array.New{{.Name}}Builder(mem) -{{end -}} - defer ab.Release() - - exp := []{{or .QualifiedType .Type}}{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.New{{.Name}}Array() - assert.Equal(t, exp, a.{{.Name}}Values()) - - a.Release() -} - -func Test{{.Name}}Builder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - -{{if .Opt.Parametric -}} - dtype := &arrow.{{.Name}}Type{Unit: arrow.Second} - ab := array.New{{.Name}}Builder(mem, dtype) -{{else}} - ab := array.New{{.Name}}Builder(mem) -{{end -}} - defer ab.Release() - - exp := []{{or .QualifiedType .Type}}{0, 1, 2, 3} - - ab.AppendValues([]{{or .QualifiedType .Type}}{}, nil) - a := ab.New{{.Name}}Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.New{{.Name}}Array() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]{{or .QualifiedType .Type}}{}, nil) - ab.AppendValues(exp, nil) - a = ab.New{{.Name}}Array() - assert.Equal(t, exp, a.{{.Name}}Values()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]{{or .QualifiedType .Type}}{}, nil) - a = ab.New{{.Name}}Array() - assert.Equal(t, exp, a.{{.Name}}Values()) - a.Release() -} - -func Test{{.Name}}Builder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - -{{if .Opt.Parametric -}} - dtype := &arrow.{{.Name}}Type{Unit: arrow.Second} - ab := array.New{{.Name}}Builder(mem, dtype) -{{else}} - ab := array.New{{.Name}}Builder(mem) -{{end -}} - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func Test{{.Name}}BuilderUnmarshalJSON(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - bldr := array.New{{.Name}}Builder(mem) - defer bldr.Release() - - jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` - - err := bldr.UnmarshalJSON([]byte(jsonstr)) - assert.NoError(t, err) - - arr := bldr.New{{.Name}}Array() - defer arr.Release() - - assert.NotNil(t, arr) - - assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) - assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) - assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) -} - -{{end}} - - diff --git a/go/arrow/array/record.go b/go/arrow/array/record.go deleted file mode 100644 index 2735f1baa9a30..0000000000000 --- a/go/arrow/array/record.go +++ /dev/null @@ -1,411 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// RecordReader reads a stream of records. -type RecordReader interface { - Retain() - Release() - - Schema() *arrow.Schema - - Next() bool - Record() arrow.Record - Err() error -} - -// simpleRecords is a simple iterator over a collection of records. -type simpleRecords struct { - refCount int64 - - schema *arrow.Schema - recs []arrow.Record - cur arrow.Record -} - -// NewRecordReader returns a simple iterator over the given slice of records. -func NewRecordReader(schema *arrow.Schema, recs []arrow.Record) (RecordReader, error) { - rs := &simpleRecords{ - refCount: 1, - schema: schema, - recs: recs, - cur: nil, - } - - for _, rec := range rs.recs { - rec.Retain() - } - - for _, rec := range recs { - if !rec.Schema().Equal(rs.schema) { - rs.Release() - return nil, fmt.Errorf("arrow/array: mismatch schema") - } - } - - return rs, nil -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (rs *simpleRecords) Retain() { - atomic.AddInt64(&rs.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (rs *simpleRecords) Release() { - debug.Assert(atomic.LoadInt64(&rs.refCount) > 0, "too many releases") - - if atomic.AddInt64(&rs.refCount, -1) == 0 { - if rs.cur != nil { - rs.cur.Release() - } - for _, rec := range rs.recs { - rec.Release() - } - rs.recs = nil - } -} - -func (rs *simpleRecords) Schema() *arrow.Schema { return rs.schema } -func (rs *simpleRecords) Record() arrow.Record { return rs.cur } -func (rs *simpleRecords) Next() bool { - if len(rs.recs) == 0 { - return false - } - if rs.cur != nil { - rs.cur.Release() - } - rs.cur = rs.recs[0] - rs.recs = rs.recs[1:] - return true -} -func (rs *simpleRecords) Err() error { return nil } - -// simpleRecord is a basic, non-lazy in-memory record batch. -type simpleRecord struct { - refCount int64 - - schema *arrow.Schema - - rows int64 - arrs []arrow.Array -} - -// NewRecord returns a basic, non-lazy in-memory record batch. -// -// NewRecord panics if the columns and schema are inconsistent. -// NewRecord panics if rows is larger than the height of the columns. -func NewRecord(schema *arrow.Schema, cols []arrow.Array, nrows int64) arrow.Record { - rec := &simpleRecord{ - refCount: 1, - schema: schema, - rows: nrows, - arrs: make([]arrow.Array, len(cols)), - } - copy(rec.arrs, cols) - for _, arr := range rec.arrs { - arr.Retain() - } - - if rec.rows < 0 { - switch len(rec.arrs) { - case 0: - rec.rows = 0 - default: - rec.rows = int64(rec.arrs[0].Len()) - } - } - - err := rec.validate() - if err != nil { - rec.Release() - panic(err) - } - - return rec -} - -func (rec *simpleRecord) SetColumn(i int, arr arrow.Array) (arrow.Record, error) { - if i < 0 || i >= len(rec.arrs) { - return nil, fmt.Errorf("arrow/array: column index out of range [0, %d): got=%d", len(rec.arrs), i) - } - - if arr.Len() != int(rec.rows) { - return nil, fmt.Errorf("arrow/array: mismatch number of rows in column %q: got=%d, want=%d", - rec.schema.Field(i).Name, - arr.Len(), rec.rows, - ) - } - - f := rec.schema.Field(i) - if !arrow.TypeEqual(f.Type, arr.DataType()) { - return nil, fmt.Errorf("arrow/array: column %q type mismatch: got=%v, want=%v", - f.Name, - arr.DataType(), f.Type, - ) - } - arrs := make([]arrow.Array, len(rec.arrs)) - copy(arrs, rec.arrs) - arrs[i] = arr - - return NewRecord(rec.schema, arrs, rec.rows), nil -} - -func (rec *simpleRecord) validate() error { - if rec.rows == 0 && len(rec.arrs) == 0 { - return nil - } - - if len(rec.arrs) != rec.schema.NumFields() { - return fmt.Errorf("arrow/array: number of columns/fields mismatch") - } - - for i, arr := range rec.arrs { - f := rec.schema.Field(i) - if int64(arr.Len()) < rec.rows { - return fmt.Errorf("arrow/array: mismatch number of rows in column %q: got=%d, want=%d", - f.Name, - arr.Len(), rec.rows, - ) - } - if !arrow.TypeEqual(f.Type, arr.DataType()) { - return fmt.Errorf("arrow/array: column %q type mismatch: got=%v, want=%v", - f.Name, - arr.DataType(), f.Type, - ) - } - } - return nil -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (rec *simpleRecord) Retain() { - atomic.AddInt64(&rec.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (rec *simpleRecord) Release() { - debug.Assert(atomic.LoadInt64(&rec.refCount) > 0, "too many releases") - - if atomic.AddInt64(&rec.refCount, -1) == 0 { - for _, arr := range rec.arrs { - arr.Release() - } - rec.arrs = nil - } -} - -func (rec *simpleRecord) Schema() *arrow.Schema { return rec.schema } -func (rec *simpleRecord) NumRows() int64 { return rec.rows } -func (rec *simpleRecord) NumCols() int64 { return int64(len(rec.arrs)) } -func (rec *simpleRecord) Columns() []arrow.Array { return rec.arrs } -func (rec *simpleRecord) Column(i int) arrow.Array { return rec.arrs[i] } -func (rec *simpleRecord) ColumnName(i int) string { return rec.schema.Field(i).Name } - -// NewSlice constructs a zero-copy slice of the record with the indicated -// indices i and j, corresponding to array[i:j]. -// The returned record must be Release()'d after use. -// -// NewSlice panics if the slice is outside the valid range of the record array. -// NewSlice panics if j < i. -func (rec *simpleRecord) NewSlice(i, j int64) arrow.Record { - arrs := make([]arrow.Array, len(rec.arrs)) - for ii, arr := range rec.arrs { - arrs[ii] = NewSlice(arr, i, j) - } - defer func() { - for _, arr := range arrs { - arr.Release() - } - }() - return NewRecord(rec.schema, arrs, j-i) -} - -func (rec *simpleRecord) String() string { - o := new(strings.Builder) - fmt.Fprintf(o, "record:\n %v\n", rec.schema) - fmt.Fprintf(o, " rows: %d\n", rec.rows) - for i, col := range rec.arrs { - fmt.Fprintf(o, " col[%d][%s]: %v\n", i, rec.schema.Field(i).Name, col) - } - - return o.String() -} - -func (rec *simpleRecord) MarshalJSON() ([]byte, error) { - arr := RecordToStructArray(rec) - defer arr.Release() - return arr.MarshalJSON() -} - -// RecordBuilder eases the process of building a Record, iteratively, from -// a known Schema. -type RecordBuilder struct { - refCount int64 - mem memory.Allocator - schema *arrow.Schema - fields []Builder -} - -// NewRecordBuilder returns a builder, using the provided memory allocator and a schema. -func NewRecordBuilder(mem memory.Allocator, schema *arrow.Schema) *RecordBuilder { - b := &RecordBuilder{ - refCount: 1, - mem: mem, - schema: schema, - fields: make([]Builder, schema.NumFields()), - } - - for i := 0; i < schema.NumFields(); i++ { - b.fields[i] = NewBuilder(b.mem, schema.Field(i).Type) - } - - return b -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (b *RecordBuilder) Retain() { - atomic.AddInt64(&b.refCount, 1) -} - -// Release decreases the reference count by 1. -func (b *RecordBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - for _, f := range b.fields { - f.Release() - } - b.fields = nil - } -} - -func (b *RecordBuilder) Schema() *arrow.Schema { return b.schema } -func (b *RecordBuilder) Fields() []Builder { return b.fields } -func (b *RecordBuilder) Field(i int) Builder { return b.fields[i] } - -func (b *RecordBuilder) Reserve(size int) { - for _, f := range b.fields { - f.Reserve(size) - } -} - -// NewRecord creates a new record from the memory buffers and resets the -// RecordBuilder so it can be used to build a new record. -// -// The returned Record must be Release()'d after use. -// -// NewRecord panics if the fields' builder do not have the same length. -func (b *RecordBuilder) NewRecord() arrow.Record { - cols := make([]arrow.Array, len(b.fields)) - rows := int64(0) - - defer func(cols []arrow.Array) { - for _, col := range cols { - if col == nil { - continue - } - col.Release() - } - }(cols) - - for i, f := range b.fields { - cols[i] = f.NewArray() - irow := int64(cols[i].Len()) - if i > 0 && irow != rows { - panic(fmt.Errorf("arrow/array: field %d has %d rows. want=%d", i, irow, rows)) - } - rows = irow - } - - return NewRecord(b.schema, cols, rows) -} - -// UnmarshalJSON for record builder will read in a single object and add the values -// to each field in the recordbuilder, missing fields will get a null and unexpected -// keys will be ignored. If reading in an array of records as a single batch, then use -// a structbuilder and use RecordFromStruct. -func (b *RecordBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - // should start with a '{' - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '{' { - return fmt.Errorf("record should start with '{', not %s", t) - } - - keylist := make(map[string]bool) - for dec.More() { - keyTok, err := dec.Token() - if err != nil { - return err - } - - key := keyTok.(string) - if keylist[key] { - return fmt.Errorf("key %s shows up twice in row to be decoded", key) - } - keylist[key] = true - - indices := b.schema.FieldIndices(key) - if len(indices) == 0 { - var extra interface{} - if err := dec.Decode(&extra); err != nil { - return err - } - continue - } - - if err := b.fields[indices[0]].UnmarshalOne(dec); err != nil { - return err - } - } - - for i := 0; i < b.schema.NumFields(); i++ { - if !keylist[b.schema.Field(i).Name] { - b.fields[i].AppendNull() - } - } - return nil -} - -var ( - _ arrow.Record = (*simpleRecord)(nil) - _ RecordReader = (*simpleRecords)(nil) -) diff --git a/go/arrow/array/record_test.go b/go/arrow/array/record_test.go deleted file mode 100644 index 8e6dc3b06d25e..0000000000000 --- a/go/arrow/array/record_test.go +++ /dev/null @@ -1,787 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestRecord(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ) - col1 := func() arrow.Array { - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - return ib.NewInt32Array() - }() - defer col1.Release() - - col2 := func() arrow.Array { - b := array.NewFloat64Builder(mem) - defer b.Release() - - b.AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - return b.NewFloat64Array() - }() - defer col2.Release() - - col2_1 := func() arrow.Array { - b := array.NewFloat64Builder(mem) - defer b.Release() - - b.AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - return b.NewFloat64Array() - }() - defer col2_1.Release() - - cols := []arrow.Array{col1, col2} - rec := array.NewRecord(schema, cols, -1) - defer rec.Release() - - rec.Retain() - rec.Release() - - if got, want := rec.Schema(), schema; !got.Equal(want) { - t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) - } - - if got, want := rec.NumRows(), int64(10); got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - if got, want := rec.NumCols(), int64(2); got != want { - t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) - } - if got, want := rec.Columns()[0], cols[0]; got != want { - t.Fatalf("invalid column: got=%q, want=%q", got, want) - } - if got, want := rec.Column(0), cols[0]; got != want { - t.Fatalf("invalid column: got=%q, want=%q", got, want) - } - if got, want := rec.ColumnName(0), schema.Field(0).Name; got != want { - t.Fatalf("invalid column name: got=%q, want=%q", got, want) - } - if _, err := rec.SetColumn(0, col2_1); err == nil { - t.Fatalf("expected an error") - } - newRec, err := rec.SetColumn(1, col2_1) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - defer newRec.Release() - if !reflect.DeepEqual(newRec.Column(1), col2_1) { - t.Fatalf("invalid column: got=%q, want=%q", rec.Column(1), col2_1) - } - - for _, tc := range []struct { - i, j int64 - err error - }{ - {i: 0, j: 10, err: nil}, - {i: 1, j: 10, err: nil}, - {i: 1, j: 9, err: nil}, - {i: 0, j: 0, err: nil}, - {i: 1, j: 1, err: nil}, - {i: 10, j: 10, err: nil}, - {i: 1, j: 0, err: fmt.Errorf("arrow/array: index out of range")}, - {i: 1, j: 11, err: fmt.Errorf("arrow/array: index out of range")}, - } { - t.Run(fmt.Sprintf("slice-%02d-%02d", tc.i, tc.j), func(t *testing.T) { - if tc.err != nil { - defer func() { - e := recover() - if e == nil { - t.Fatalf("expected an error %q", tc.err) - } - switch err := e.(type) { - case string: - if err != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - case error: - if err.Error() != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - default: - t.Fatalf("invalid type for panic message: %T (err=%v)", err, err) - } - }() - } - sub := rec.NewSlice(tc.i, tc.j) - defer sub.Release() - - if got, want := sub.NumRows(), tc.j-tc.i; got != want { - t.Fatalf("invalid rec-slice number of rows: got=%d, want=%d", got, want) - } - }) - } - - for _, tc := range []struct { - schema *arrow.Schema - cols []arrow.Array - rows int64 - err error - }{ - { - schema: schema, - cols: nil, - rows: 0, - }, - { - schema: schema, - cols: cols[:1], - rows: 0, - err: fmt.Errorf("arrow/array: number of columns/fields mismatch"), - }, - { - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - }, - nil, - ), - cols: cols, - rows: 0, - err: fmt.Errorf("arrow/array: number of columns/fields mismatch"), - }, - { - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Int32}, - }, - nil, - ), - cols: cols, - rows: 0, - err: fmt.Errorf(`arrow/array: column "f2-f64" type mismatch: got=float64, want=int32`), - }, - { - schema: schema, - cols: cols, - rows: 11, - err: fmt.Errorf(`arrow/array: mismatch number of rows in column "f1-i32": got=10, want=11`), - }, - { - schema: schema, - cols: cols, - rows: 10, - err: nil, - }, - { - schema: schema, - cols: cols, - rows: 3, - err: nil, - }, - { - schema: schema, - cols: cols, - rows: 0, - err: nil, - }, - } { - t.Run("", func(t *testing.T) { - if tc.err != nil { - defer func() { - e := recover() - if e == nil { - t.Fatalf("expected an error %q", tc.err) - } - switch err := e.(type) { - case string: - if err != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - case error: - if err.Error() != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - default: - t.Fatalf("invalid type for panic message: %T (err=%v)", err, err) - } - }() - } - rec := array.NewRecord(tc.schema, tc.cols, tc.rows) - defer rec.Release() - if got, want := rec.NumRows(), tc.rows; got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - }) - } -} - -func TestRecordReader(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ) - rec1 := func() arrow.Record { - col1 := func() arrow.Array { - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - return ib.NewInt32Array() - }() - defer col1.Release() - - col2 := func() arrow.Array { - b := array.NewFloat64Builder(mem) - defer b.Release() - - b.AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - return b.NewFloat64Array() - }() - defer col2.Release() - - cols := []arrow.Array{col1, col2} - return array.NewRecord(schema, cols, -1) - }() - defer rec1.Release() - - rec2 := func() arrow.Record { - col1 := func() arrow.Array { - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) - return ib.NewInt32Array() - }() - defer col1.Release() - - col2 := func() arrow.Array { - b := array.NewFloat64Builder(mem) - defer b.Release() - - b.AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) - return b.NewFloat64Array() - }() - defer col2.Release() - - cols := []arrow.Array{col1, col2} - return array.NewRecord(schema, cols, -1) - }() - defer rec2.Release() - - recs := []arrow.Record{rec1, rec2} - itr, err := array.NewRecordReader(schema, recs) - if err != nil { - t.Fatal(err) - } - defer itr.Release() - - itr.Retain() - itr.Release() - - if got, want := itr.Schema(), schema; !got.Equal(want) { - t.Fatalf("invalid schema. got=%#v, want=%#v", got, want) - } - - n := 0 - for itr.Next() { - n++ - if got, want := itr.Record(), recs[n-1]; !reflect.DeepEqual(got, want) { - t.Fatalf("itr[%d], invalid record. got=%#v, want=%#v", n-1, got, want) - } - } - if err := itr.Err(); err != nil { - t.Fatalf("itr error: %#v", err) - } - - if n != len(recs) { - t.Fatalf("invalid number of iterations. got=%d, want=%d", n, len(recs)) - } - - for _, tc := range []struct { - name string - schema *arrow.Schema - err error - }{ - { - name: "mismatch-name", - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-XXX", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ), - err: fmt.Errorf("arrow/array: mismatch schema"), - }, - { - name: "mismatch-type", - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Int64}, - }, - nil, - ), - err: fmt.Errorf("arrow/array: mismatch schema"), - }, - } { - t.Run(tc.name, func(t *testing.T) { - itr, err := array.NewRecordReader(tc.schema, recs) - if itr != nil { - itr.Release() - } - if err == nil { - t.Fatalf("expected an error: %v", tc.err) - } - if !assert.Equal(t, tc.err, err) { - t.Fatalf("invalid error: got=%v, want=%v", err, tc.err) - } - }) - } -} - -func TestRecordBuilderRespectsFixedSizeArrayNullability(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - cases := []struct { - assertion string - fields []arrow.Field - }{ - { - "nullable", - []arrow.Field{{Name: "data", Type: arrow.FixedSizeListOf(1, arrow.PrimitiveTypes.Int32)}}, - }, - { - "not nullable", - []arrow.Field{{Name: "data", Type: arrow.FixedSizeListOfNonNullable(1, arrow.PrimitiveTypes.Int32)}}, - }, - } - for _, c := range cases { - t.Run(c.assertion, func(t *testing.T) { - schema := arrow.NewSchema(c.fields, nil) - b := array.NewRecordBuilder(mem, schema) - defer b.Release() - - lb := b.Field(0).(*array.FixedSizeListBuilder) - lb.Append(true) - - vb := lb.ValueBuilder().(*array.Int32Builder) - vb.Append(10) - - rec := b.NewRecord() - defer rec.Release() - - if got, want := rec.Column(0).String(), "[[10]]"; got != want { - t.Fatalf("invalid record: got=%q, want=%q", got, want) - } - }) - } -} - -func TestRecordBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - mapDt := arrow.MapOf(arrow.BinaryTypes.String, arrow.BinaryTypes.String) - mapDt.KeysSorted = true - mapDt.SetItemNullable(false) - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - {Name: "map", Type: mapDt}, - }, - nil, - ) - - b := array.NewRecordBuilder(mem, schema) - defer b.Release() - - b.Retain() - b.Release() - - b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3}, nil) - b.Field(0).(*array.Int32Builder).AppendValues([]int32{4, 5}, nil) - b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5}, nil) - mb := b.Field(2).(*array.MapBuilder) - for i := 0; i < 5; i++ { - mb.Append(true) - - if i%3 == 0 { - mb.KeyBuilder().(*array.StringBuilder).AppendValues([]string{fmt.Sprint(i), "2", "3"}, nil) - mb.ItemBuilder().(*array.StringBuilder).AppendValues([]string{"a", "b", "c"}, nil) - } - } - - rec := b.NewRecord() - defer rec.Release() - - if got, want := rec.Schema(), schema; !got.Equal(want) { - t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) - } - - if got, want := rec.NumRows(), int64(5); got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - if got, want := rec.NumCols(), int64(3); got != want { - t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) - } - if got, want := rec.ColumnName(0), schema.Field(0).Name; got != want { - t.Fatalf("invalid column name: got=%q, want=%q", got, want) - } - if got, want := rec.Column(2).String(), `[{["0" "2" "3"] ["a" "b" "c"]} {[] []} {[] []} {["3" "2" "3"] ["a" "b" "c"]} {[] []}]`; got != want { - t.Fatalf("invalid column name: got=%q, want=%q", got, want) - } -} - -type testMessage struct { - Foo *testMessageFoo - Bars []*testMessageBar -} - -func (m *testMessage) Reset() { *m = testMessage{} } - -func (m *testMessage) GetFoo() *testMessageFoo { - if m != nil { - return m.Foo - } - return nil -} - -func (m *testMessage) GetBars() []*testMessageBar { - if m != nil { - return m.Bars - } - return nil -} - -type testMessageFoo struct { - A int32 - B []uint32 -} - -func (m *testMessageFoo) Reset() { *m = testMessageFoo{} } - -func (m *testMessageFoo) GetA() int32 { - if m != nil { - return m.A - } - return 0 -} - -func (m *testMessageFoo) GetB() []uint32 { - if m != nil { - return m.B - } - return nil -} - -type testMessageBar struct { - C int64 - D []uint64 -} - -func (m *testMessageBar) Reset() { *m = testMessageBar{} } - -func (m *testMessageBar) GetC() int64 { - if m != nil { - return m.C - } - return 0 -} - -func (m *testMessageBar) GetD() []uint64 { - if m != nil { - return m.D - } - return nil -} - -var testMessageSchema = arrow.NewSchema( - []arrow.Field{ - {Name: "foo", Type: arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int32}, - arrow.Field{Name: "b", Type: arrow.ListOf( - arrow.PrimitiveTypes.Uint32, - )}, - )}, - {Name: "bars", Type: arrow.ListOf( - arrow.StructOf( - arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int64}, - arrow.Field{Name: "d", Type: arrow.ListOf( - arrow.PrimitiveTypes.Uint64, - )}, - ), - )}, - }, - nil, -) - -func (m *testMessage) Fill(rec arrow.Record, row int) error { - m.Reset() - - // foo - if 0 < rec.NumCols() { - src0 := rec.Column(0).Data() - typedSrc0 := array.NewStructData(src0) - defer typedSrc0.Release() - if typedSrc0.IsValid(row) { - m0 := &testMessageFoo{} - { - - // a - if 0 < typedSrc0.NumField() { - src0_0 := typedSrc0.Field(0).Data() - typedSrc0_0 := array.NewInt32Data(src0_0) - defer typedSrc0_0.Release() - m0.A = typedSrc0_0.Value(row) - } - - // b - if 1 < typedSrc0.NumField() { - src0_1 := typedSrc0.Field(1).Data() - listSrc0_1 := array.NewListData(src0_1) - defer listSrc0_1.Release() - if listSrc0_1.IsValid(row) { - typedSrc0_1 := array.NewUint32Data(listSrc0_1.ListValues().Data()) - typedSrc0_1.Release() - start0_1 := int(listSrc0_1.Offsets()[row]) - end0_1 := int(listSrc0_1.Offsets()[row+1]) - for row := start0_1; row < end0_1; row++ { - m0.B = append(m0.B, typedSrc0_1.Value(row)) - } - } - } - } - m.Foo = m0 - } - } - - // bars - if 1 < rec.NumCols() { - src1 := rec.Column(1).Data() - listSrc1 := array.NewListData(src1) - defer listSrc1.Release() - if listSrc1.IsValid(row) { - typedSrc1 := array.NewStructData(listSrc1.ListValues().Data()) - defer typedSrc1.Release() - start1 := int(listSrc1.Offsets()[row]) - end1 := int(listSrc1.Offsets()[row+1]) - for row := start1; row < end1; row++ { - if typedSrc1.IsValid(row) { - m1 := &testMessageBar{} - { - - // c - if 0 < typedSrc1.NumField() { - src1_0 := typedSrc1.Field(0).Data() - typedSrc1_0 := array.NewInt64Data(src1_0) - defer typedSrc1_0.Release() - m1.C = typedSrc1_0.Value(row) - } - - // d - if 1 < typedSrc1.NumField() { - src1_1 := typedSrc1.Field(1).Data() - listSrc1_1 := array.NewListData(src1_1) - defer listSrc1_1.Release() - if listSrc1_1.IsValid(row) { - typedSrc1_1 := array.NewUint64Data(listSrc1_1.ListValues().Data()) - defer typedSrc1_1.Release() - start1_1 := int(listSrc1_1.Offsets()[row]) - end1_1 := int(listSrc1_1.Offsets()[row+1]) - for row := start1_1; row < end1_1; row++ { - m1.D = append(m1.D, typedSrc1_1.Value(row)) - } - } - } - } - m.Bars = append(m.Bars, m1) - } else { - m.Bars = append(m.Bars, nil) - } - } - } - } - return nil -} - -func newTestMessageArrowRecordBuilder(mem memory.Allocator) *testMessageArrowRecordBuilder { - return &testMessageArrowRecordBuilder{ - rb: array.NewRecordBuilder(mem, testMessageSchema), - } -} - -type testMessageArrowRecordBuilder struct { - rb *array.RecordBuilder -} - -func (b *testMessageArrowRecordBuilder) Build() arrow.Record { - return b.rb.NewRecord() -} - -func (b *testMessageArrowRecordBuilder) Release() { - b.rb.Release() -} - -func (b *testMessageArrowRecordBuilder) Append(m *testMessage) { - - // foo - { - builder0 := b.rb.Field(0) - v0 := m.GetFoo() - valueBuilder0 := builder0.(*array.StructBuilder) - if v0 == nil { - valueBuilder0.AppendNull() - } else { - valueBuilder0.Append(true) - - // a - { - v0_0 := v0.GetA() - builder0_0 := valueBuilder0.FieldBuilder(0) - valueBuilder0_0 := builder0_0.(*array.Int32Builder) - valueBuilder0_0.Append(v0_0) - } - - // b - { - v0_1 := v0.GetB() - builder0_1 := valueBuilder0.FieldBuilder(1) - listBuilder0_1 := builder0_1.(*array.ListBuilder) - if len(v0_1) == 0 { - listBuilder0_1.AppendNull() - } else { - listBuilder0_1.Append(true) - valueBuilder0_1 := listBuilder0_1.ValueBuilder().(*array.Uint32Builder) - for _, item := range v0_1 { - valueBuilder0_1.Append(item) - } - } - } - } - } - - // bars - { - builder1 := b.rb.Field(1) - v1 := m.GetBars() - listBuilder1 := builder1.(*array.ListBuilder) - if len(v1) == 0 { - listBuilder1.AppendNull() - } else { - listBuilder1.Append(true) - valueBuilder1 := listBuilder1.ValueBuilder().(*array.StructBuilder) - for _, item := range v1 { - if item == nil { - valueBuilder1.AppendNull() - } else { - valueBuilder1.Append(true) - - // c - { - v1_0 := item.GetC() - builder1_0 := valueBuilder1.FieldBuilder(0) - valueBuilder1_0 := builder1_0.(*array.Int64Builder) - valueBuilder1_0.Append(v1_0) - } - - // d - { - v1_1 := item.GetD() - builder1_1 := valueBuilder1.FieldBuilder(1) - listBuilder1_1 := builder1_1.(*array.ListBuilder) - if len(v1_1) == 0 { - listBuilder1_1.AppendNull() - } else { - listBuilder1_1.Append(true) - valueBuilder1_1 := listBuilder1_1.ValueBuilder().(*array.Uint64Builder) - for _, item := range v1_1 { - valueBuilder1_1.Append(item) - } - } - } - } - } - } - } -} - -func TestRecordBuilderMessages(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - b := newTestMessageArrowRecordBuilder(mem) - defer b.Release() - - var msgs []*testMessage - for i := 0; i < 1000; i++ { - msg := &testMessage{ - Foo: &testMessageFoo{ - A: int32(i), - B: []uint32{2, 3, 4, 5, 6, 7, 8, 9}, - }, - Bars: []*testMessageBar{ - { - C: 11, - D: []uint64{12, 13, 14}, - }, - { - C: 15, - D: []uint64{16, 17, 18, 19}, - }, - nil, - { - C: 20, - D: []uint64{21}, - }, - }, - } - msgs = append(msgs, msg) - b.Append(msg) - } - - rec := b.Build() - defer rec.Release() - - var got testMessage - for i := 0; i < 1000; i++ { - got.Fill(rec, i) - if !reflect.DeepEqual(&got, msgs[i]) { - t.Fatalf("row[%d], invalid record. got=%#v, want=%#v", i, &got, msgs[i]) - } - } -} diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go deleted file mode 100644 index 88b4568ad5e84..0000000000000 --- a/go/arrow/array/string.go +++ /dev/null @@ -1,718 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "reflect" - "strings" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -type StringLike interface { - arrow.Array - Value(int) string - ValueLen(int) int -} - -// String represents an immutable sequence of variable-length UTF-8 strings. -type String struct { - array - offsets []int32 - values string -} - -// NewStringData constructs a new String array from data. -func NewStringData(data arrow.ArrayData) *String { - a := &String{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the String with a different set of Data. -func (a *String) Reset(data arrow.ArrayData) { - a.setData(data.(*Data)) -} - -// Value returns the slice at index i. This value should not be mutated. -func (a *String) Value(i int) string { - i = i + a.array.data.offset - return a.values[a.offsets[i]:a.offsets[i+1]] -} - -func (a *String) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.Value(i) -} - -// ValueOffset returns the offset of the value at index i. -func (a *String) ValueOffset(i int) int { - if i < 0 || i > a.array.data.length { - panic("arrow/array: index out of range") - } - return int(a.offsets[i+a.array.data.offset]) -} - -func (a *String) ValueOffset64(i int) int64 { - return int64(a.ValueOffset(i)) -} - -func (a *String) ValueLen(i int) int { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - beg := a.array.data.offset + i - return int(a.offsets[beg+1] - a.offsets[beg]) -} - -func (a *String) ValueOffsets() []int32 { - beg := a.array.data.offset - end := beg + a.array.data.length + 1 - return a.offsets[beg:end] -} - -func (a *String) ValueBytes() []byte { - beg := a.array.data.offset - end := beg + a.array.data.length - if a.array.data.buffers[2] != nil { - return a.array.data.buffers[2].Bytes()[a.offsets[beg]:a.offsets[end]] - } - return nil -} - -func (a *String) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%q", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *String) setData(data *Data) { - if len(data.buffers) != 3 { - panic("arrow/array: len(data.buffers) != 3") - } - - a.array.setData(data) - - if vdata := data.buffers[2]; vdata != nil { - b := vdata.Bytes() - a.values = *(*string)(unsafe.Pointer(&b)) - } - - if offsets := data.buffers[1]; offsets != nil { - a.offsets = arrow.Int32Traits.CastFromBytes(offsets.Bytes()) - } - - if a.array.data.length < 1 { - return - } - - expNumOffsets := a.array.data.offset + a.array.data.length + 1 - if len(a.offsets) < expNumOffsets { - panic(fmt.Errorf("arrow/array: string offset buffer must have at least %d values", expNumOffsets)) - } - - if int(a.offsets[expNumOffsets-1]) > len(a.values) { - panic("arrow/array: string offsets out of bounds of data buffer") - } -} - -func (a *String) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.Value(i) - } - return nil -} - -func (a *String) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - if a.IsValid(i) { - vals[i] = a.Value(i) - } else { - vals[i] = nil - } - } - return json.Marshal(vals) -} - -func arrayEqualString(left, right *String) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -// String represents an immutable sequence of variable-length UTF-8 strings. -type LargeString struct { - array - offsets []int64 - values string -} - -// NewStringData constructs a new String array from data. -func NewLargeStringData(data arrow.ArrayData) *LargeString { - a := &LargeString{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the String with a different set of Data. -func (a *LargeString) Reset(data arrow.ArrayData) { - a.setData(data.(*Data)) -} - -// Value returns the slice at index i. This value should not be mutated. -func (a *LargeString) Value(i int) string { - i = i + a.array.data.offset - return a.values[a.offsets[i]:a.offsets[i+1]] -} - -func (a *LargeString) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.Value(i) -} - -// ValueOffset returns the offset of the value at index i. -func (a *LargeString) ValueOffset(i int) int64 { - if i < 0 || i > a.array.data.length { - panic("arrow/array: index out of range") - } - return a.offsets[i+a.array.data.offset] -} - -func (a *LargeString) ValueOffset64(i int) int64 { - return a.ValueOffset(i) -} - -func (a *LargeString) ValueLen(i int) int { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - beg := a.array.data.offset + i - return int(a.offsets[beg+1] - a.offsets[beg]) -} - -func (a *LargeString) ValueOffsets() []int64 { - beg := a.array.data.offset - end := beg + a.array.data.length + 1 - return a.offsets[beg:end] -} - -func (a *LargeString) ValueBytes() []byte { - beg := a.array.data.offset - end := beg + a.array.data.length - if a.array.data.buffers[2] != nil { - return a.array.data.buffers[2].Bytes()[a.offsets[beg]:a.offsets[end]] - } - return nil -} - -func (a *LargeString) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%q", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *LargeString) setData(data *Data) { - if len(data.buffers) != 3 { - panic("arrow/array: len(data.buffers) != 3") - } - - a.array.setData(data) - - if vdata := data.buffers[2]; vdata != nil { - b := vdata.Bytes() - a.values = *(*string)(unsafe.Pointer(&b)) - } - - if offsets := data.buffers[1]; offsets != nil { - a.offsets = arrow.Int64Traits.CastFromBytes(offsets.Bytes()) - } - - if a.array.data.length < 1 { - return - } - - expNumOffsets := a.array.data.offset + a.array.data.length + 1 - if len(a.offsets) < expNumOffsets { - panic(fmt.Errorf("arrow/array: string offset buffer must have at least %d values", expNumOffsets)) - } - - if int(a.offsets[expNumOffsets-1]) > len(a.values) { - panic("arrow/array: string offsets out of bounds of data buffer") - } -} - -func (a *LargeString) GetOneForMarshal(i int) interface{} { - if a.IsValid(i) { - return a.Value(i) - } - return nil -} - -func (a *LargeString) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - return json.Marshal(vals) -} - -func arrayEqualLargeString(left, right *LargeString) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type StringView struct { - array - values []arrow.ViewHeader - dataBuffers []*memory.Buffer -} - -func NewStringViewData(data arrow.ArrayData) *StringView { - a := &StringView{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the String with a different set of Data. -func (a *StringView) Reset(data arrow.ArrayData) { - a.setData(data.(*Data)) -} - -func (a *StringView) setData(data *Data) { - if len(data.buffers) < 2 { - panic("len(data.buffers) < 2") - } - a.array.setData(data) - - if valueData := data.buffers[1]; valueData != nil { - a.values = arrow.ViewHeaderTraits.CastFromBytes(valueData.Bytes()) - } - - a.dataBuffers = data.buffers[2:] -} - -func (a *StringView) ValueHeader(i int) *arrow.ViewHeader { - if i < 0 || i >= a.array.data.length { - panic("arrow/array: index out of range") - } - return &a.values[a.array.data.offset+i] -} - -func (a *StringView) Value(i int) string { - s := a.ValueHeader(i) - if s.IsInline() { - return s.InlineString() - } - start := s.BufferOffset() - buf := a.dataBuffers[s.BufferIndex()] - value := buf.Bytes()[start : start+int32(s.Len())] - return *(*string)(unsafe.Pointer(&value)) -} - -func (a *StringView) ValueLen(i int) int { - s := a.ValueHeader(i) - return s.Len() -} - -func (a *StringView) String() string { - var o strings.Builder - o.WriteString("[") - for i := 0; i < a.Len(); i++ { - if i > 0 { - o.WriteString(" ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(&o, "%q", a.Value(i)) - } - } - o.WriteString("]") - return o.String() -} - -func (a *StringView) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - return a.Value(i) -} - -func (a *StringView) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - return a.Value(i) -} - -func (a *StringView) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := 0; i < a.Len(); i++ { - vals[i] = a.GetOneForMarshal(i) - } - return json.Marshal(vals) -} - -func arrayEqualStringView(left, right *StringView) bool { - leftBufs, rightBufs := left.dataBuffers, right.dataBuffers - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if !left.ValueHeader(i).Equals(leftBufs, right.ValueHeader(i), rightBufs) { - return false - } - } - return true -} - -// A StringBuilder is used to build a String array using the Append methods. -type StringBuilder struct { - *BinaryBuilder -} - -// NewStringBuilder creates a new StringBuilder. -func NewStringBuilder(mem memory.Allocator) *StringBuilder { - b := &StringBuilder{ - BinaryBuilder: NewBinaryBuilder(mem, arrow.BinaryTypes.String), - } - return b -} - -func (b *StringBuilder) Type() arrow.DataType { - return arrow.BinaryTypes.String -} - -// Append appends a string to the builder. -func (b *StringBuilder) Append(v string) { - b.BinaryBuilder.Append([]byte(v)) -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *StringBuilder) AppendValues(v []string, valid []bool) { - b.BinaryBuilder.AppendStringValues(v, valid) -} - -// Value returns the string at index i. -func (b *StringBuilder) Value(i int) string { - return string(b.BinaryBuilder.Value(i)) -} - -// NewArray creates a String array from the memory buffers used by the builder and resets the StringBuilder -// so it can be used to build a new array. -func (b *StringBuilder) NewArray() arrow.Array { - return b.NewStringArray() -} - -// NewStringArray creates a String array from the memory buffers used by the builder and resets the StringBuilder -// so it can be used to build a new array. -func (b *StringBuilder) NewStringArray() (a *String) { - data := b.newData() - a = NewStringData(data) - data.Release() - return -} - -func (b *StringBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - b.Append(v) - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(v), - Type: reflect.TypeOf(string("")), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *StringBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *StringBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("string builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -// A LargeStringBuilder is used to build a LargeString array using the Append methods. -// LargeString is for when you need the offset buffer to be 64-bit integers -// instead of 32-bit integers. -type LargeStringBuilder struct { - *BinaryBuilder -} - -// NewStringBuilder creates a new StringBuilder. -func NewLargeStringBuilder(mem memory.Allocator) *LargeStringBuilder { - b := &LargeStringBuilder{ - BinaryBuilder: NewBinaryBuilder(mem, arrow.BinaryTypes.LargeString), - } - return b -} - -func (b *LargeStringBuilder) Type() arrow.DataType { return arrow.BinaryTypes.LargeString } - -// Append appends a string to the builder. -func (b *LargeStringBuilder) Append(v string) { - b.BinaryBuilder.Append([]byte(v)) -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *LargeStringBuilder) AppendValues(v []string, valid []bool) { - b.BinaryBuilder.AppendStringValues(v, valid) -} - -// Value returns the string at index i. -func (b *LargeStringBuilder) Value(i int) string { - return string(b.BinaryBuilder.Value(i)) -} - -// NewArray creates a String array from the memory buffers used by the builder and resets the StringBuilder -// so it can be used to build a new array. -func (b *LargeStringBuilder) NewArray() arrow.Array { - return b.NewLargeStringArray() -} - -// NewStringArray creates a String array from the memory buffers used by the builder and resets the StringBuilder -// so it can be used to build a new array. -func (b *LargeStringBuilder) NewLargeStringArray() (a *LargeString) { - data := b.newData() - a = NewLargeStringData(data) - data.Release() - return -} - -func (b *LargeStringBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - b.Append(v) - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(v), - Type: reflect.TypeOf(string("")), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *LargeStringBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *LargeStringBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("string builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -type StringViewBuilder struct { - *BinaryViewBuilder -} - -func NewStringViewBuilder(mem memory.Allocator) *StringViewBuilder { - bldr := &StringViewBuilder{ - BinaryViewBuilder: NewBinaryViewBuilder(mem), - } - bldr.dtype = arrow.BinaryTypes.StringView - return bldr -} - -func (b *StringViewBuilder) Append(v string) { - b.BinaryViewBuilder.AppendString(v) -} - -func (b *StringViewBuilder) AppendValues(v []string, valid []bool) { - b.BinaryViewBuilder.AppendStringValues(v, valid) -} - -func (b *StringViewBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case string: - b.Append(v) - case []byte: - b.BinaryViewBuilder.Append(v) - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf([]byte{}), - Offset: dec.InputOffset(), - } - } - return nil -} - -func (b *StringViewBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *StringViewBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary view builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -func (b *StringViewBuilder) NewArray() arrow.Array { - return b.NewStringViewArray() -} - -func (b *StringViewBuilder) NewStringViewArray() (a *StringView) { - data := b.newData() - a = NewStringViewData(data) - data.Release() - return -} - -type StringLikeBuilder interface { - Builder - Append(string) - AppendValues([]string, []bool) - UnsafeAppend([]byte) - ReserveData(int) -} - -var ( - _ arrow.Array = (*String)(nil) - _ arrow.Array = (*LargeString)(nil) - _ arrow.Array = (*StringView)(nil) - _ Builder = (*StringBuilder)(nil) - _ Builder = (*LargeStringBuilder)(nil) - _ Builder = (*StringViewBuilder)(nil) - _ StringLikeBuilder = (*StringBuilder)(nil) - _ StringLikeBuilder = (*LargeStringBuilder)(nil) - _ StringLikeBuilder = (*StringViewBuilder)(nil) - _ StringLike = (*String)(nil) - _ StringLike = (*LargeString)(nil) - _ StringLike = (*StringView)(nil) -) diff --git a/go/arrow/array/string_test.go b/go/arrow/array/string_test.go deleted file mode 100644 index efbe51edd1a03..0000000000000 --- a/go/arrow/array/string_test.go +++ /dev/null @@ -1,794 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "bytes" - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestStringArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - want = []string{"hello", "世界", "", "bye"} - valids = []bool{true, true, false, true} - offsets = []int32{0, 5, 11, 11, 14} - ) - - sb := array.NewStringBuilder(mem) - defer sb.Release() - - sb.Retain() - sb.Release() - - assert.NoError(t, sb.AppendValueFromString(want[0])) - sb.AppendValues(want[1:2], nil) - - sb.AppendNull() - sb.Append(want[3]) - - if got, want := sb.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := sb.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - arr := sb.NewStringArray() - defer arr.Release() - - arr.Retain() - arr.Release() - - assert.Equal(t, "hello", arr.ValueStr(0)) - - if got, want := arr.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - for i := range want { - if arr.IsNull(i) != !valids[i] { - t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) - } - switch { - case arr.IsNull(i): - default: - got := arr.Value(i) - if got != want[i] { - t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) - } - } - - if got, want := arr.ValueOffset(i), int(offsets[i]); got != want { - t.Fatalf("arr-offset-beg[%d]: got=%d, want=%d", i, got, want) - } - if got, want := arr.ValueOffset(i+1), int(offsets[i+1]); got != want { - t.Fatalf("arr-offset-end[%d]: got=%d, want=%d", i+1, got, want) - } - } - - if !reflect.DeepEqual(offsets, arr.ValueOffsets()) { - t.Fatalf("ValueOffsets got=%v, want=%v", arr.ValueOffsets(), offsets) - } - - sub := array.MakeFromData(arr.Data()) - defer sub.Release() - - if sub.DataType().ID() != arrow.STRING { - t.Fatalf("invalid type: got=%q, want=string", sub.DataType().Name()) - } - - if _, ok := sub.(*array.String); !ok { - t.Fatalf("could not type-assert to array.String") - } - - if got, want := arr.String(), `["hello" "世界" (null) "bye"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if !bytes.Equal([]byte(`hello世界bye`), arr.ValueBytes()) { - t.Fatalf("got=%q, want=%q", string(arr.ValueBytes()), `hello世界bye`) - } - - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.String) - if !ok { - t.Fatalf("could not type-assert to array.String") - } - - if got, want := v.String(), `[(null) "bye"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if !bytes.Equal(v.ValueBytes(), []byte("bye")) { - t.Fatalf("got=%q, want=%q", string(v.ValueBytes()), "bye") - } - - for i := 0; i < v.Len(); i++ { - if got, want := v.ValueOffset(0), int(offsets[i+slice.Offset()]); got != want { - t.Fatalf("val-offset-with-offset[%d]: got=%q, want=%q", i, got, want) - } - } - - if !reflect.DeepEqual(offsets[2:5], v.ValueOffsets()) { - t.Fatalf("ValueOffsets got=%v, want=%v", v.ValueOffsets(), offsets[2:5]) - } -} - -func TestStringBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - want := []string{"hello", "世界", "", "bye"} - - ab := array.NewStringBuilder(mem) - defer ab.Release() - - stringValues := func(a *array.String) []string { - vs := make([]string, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - ab.AppendValues([]string{}, nil) - a := ab.NewStringArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewStringArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]string{}, nil) - ab.AppendValues(want, nil) - a = ab.NewStringArray() - assert.Equal(t, want, stringValues(a)) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]string{}, nil) - a = ab.NewStringArray() - assert.Equal(t, want, stringValues(a)) - a.Release() -} - -// TestStringReset tests the Reset() method on the String type by creating two different Strings and then -// resetting the contents of string2 with the values from string1. -func TestStringReset(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - sb1 := array.NewStringBuilder(mem) - sb2 := array.NewStringBuilder(mem) - defer sb1.Release() - defer sb2.Release() - - sb1.Append("string1") - sb1.AppendNull() - - var ( - string1 = sb1.NewStringArray() - string2 = sb2.NewStringArray() - - string1Data = string1.Data() - ) - string2.Reset(string1Data) - - assert.Equal(t, "string1", string2.Value(0)) -} - -func TestStringInvalidOffsets(t *testing.T) { - const expectedPanic = "arrow/array: string offsets out of bounds of data buffer" - - makeBuffers := func(valids []bool, offsets []int32, data string) []*memory.Buffer { - offsetBuf := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets)) - var nullBufBytes []byte - var nullBuf *memory.Buffer - if valids != nil { - nullBufBytes = make([]byte, bitutil.BytesForBits(int64(len(valids)))) - for i, v := range valids { - bitutil.SetBitTo(nullBufBytes, i, v) - } - nullBuf = memory.NewBufferBytes(nullBufBytes) - } - return []*memory.Buffer{nullBuf, offsetBuf, memory.NewBufferBytes([]byte(data))} - } - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{}, "") - array.NewStringData(array.NewData(arrow.BinaryTypes.String, 0, buffers, nil, 0, 0)) - }, "empty array with no offsets") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{0, 5}, "") - array.NewStringData(array.NewData(arrow.BinaryTypes.String, 0, buffers, nil, 0, 0)) - }, "empty array, offsets ignored") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{0, 3, 4, 9}, "oooabcdef") - array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 2)) - }, "data has offset and value offsets are valid") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int32{0, 3, 6, 9, 9}, "012345678") - arr := array.NewStringData(array.NewData(arrow.BinaryTypes.String, 4, buffers, nil, 0, 0)) - if assert.Equal(t, 4, arr.Len()) && assert.Zero(t, arr.NullN()) { - assert.Equal(t, "012", arr.Value(0)) - assert.Equal(t, "345", arr.Value(1)) - assert.Equal(t, "678", arr.Value(2)) - assert.Equal(t, "", arr.Value(3), "trailing empty string value will have offset past end") - } - }, "simple valid case") - - assert.NotPanics(t, func() { - buffers := makeBuffers([]bool{true, false, true, false}, []int32{0, 3, 4, 9, 9}, "oooabcdef") - arr := array.NewStringData(array.NewData(arrow.BinaryTypes.String, 4, buffers, nil, 2, 0)) - if assert.Equal(t, 4, arr.Len()) && assert.Equal(t, 2, arr.NullN()) { - assert.Equal(t, "ooo", arr.Value(0)) - assert.True(t, arr.IsNull(1)) - assert.Equal(t, "bcdef", arr.Value(2)) - assert.True(t, arr.IsNull(3)) - } - }, "simple valid case with nulls") - - assert.PanicsWithValue(t, expectedPanic, func() { - buffers := makeBuffers(nil, []int32{0, 5}, "abc") - array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 0)) - }, "last offset is overflowing") - - assert.PanicsWithError(t, "arrow/array: string offset buffer must have at least 2 values", func() { - buffers := makeBuffers(nil, []int32{0}, "abc") - array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 0)) - }, "last offset is missing") - - assert.PanicsWithValue(t, expectedPanic, func() { - buffers := makeBuffers(nil, []int32{0, 3, 10, 15}, "oooabcdef") - array.NewStringData(array.NewData(arrow.BinaryTypes.String, 1, buffers, nil, 0, 2)) - }, "data has offset and value offset is overflowing") -} - -func TestStringStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - values = []string{"hello", "世界", "", "bye"} - valid = []bool{true, true, false, true} - ) - - b := array.NewStringBuilder(mem) - defer b.Release() - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.String) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewStringBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.String) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestLargeStringArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - want = []string{"hello", "世界", "", "bye"} - valids = []bool{true, true, false, true} - offsets = []int64{0, 5, 11, 11, 14} - ) - - sb := array.NewLargeStringBuilder(mem) - defer sb.Release() - - sb.Retain() - sb.Release() - - sb.AppendValues(want[:2], nil) - - sb.AppendNull() - sb.Append(want[3]) - - if got, want := sb.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := sb.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - arr := sb.NewLargeStringArray() - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - for i := range want { - if arr.IsNull(i) != !valids[i] { - t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) - } - switch { - case arr.IsNull(i): - default: - got := arr.Value(i) - if got != want[i] { - t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) - } - } - - if got, want := arr.ValueOffset(i), offsets[i]; got != want { - t.Fatalf("arr-offset-beg[%d]: got=%d, want=%d", i, got, want) - } - if got, want := arr.ValueOffset(i+1), offsets[i+1]; got != want { - t.Fatalf("arr-offset-end[%d]: got=%d, want=%d", i+1, got, want) - } - } - - if !reflect.DeepEqual(offsets, arr.ValueOffsets()) { - t.Fatalf("ValueOffsets got=%v, want=%v", arr.ValueOffsets(), offsets) - } - - sub := array.MakeFromData(arr.Data()) - defer sub.Release() - - if sub.DataType().ID() != arrow.LARGE_STRING { - t.Fatalf("invalid type: got=%q, want=large_string", sub.DataType().Name()) - } - - if _, ok := sub.(*array.LargeString); !ok { - t.Fatalf("could not type-assert to array.LargeString") - } - - if got, want := arr.String(), `["hello" "世界" (null) "bye"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if !bytes.Equal([]byte(`hello世界bye`), arr.ValueBytes()) { - t.Fatalf("got=%q, want=%q", string(arr.ValueBytes()), `hello世界bye`) - } - - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.LargeString) - if !ok { - t.Fatalf("could not type-assert to array.LargeString") - } - - if got, want := v.String(), `[(null) "bye"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if !bytes.Equal(v.ValueBytes(), []byte("bye")) { - t.Fatalf("got=%q, want=%q", string(v.ValueBytes()), "bye") - } - - for i := 0; i < v.Len(); i++ { - if got, want := v.ValueOffset(0), offsets[i+slice.Offset()]; got != want { - t.Fatalf("val-offset-with-offset[%d]: got=%q, want=%q", i, got, want) - } - } - - if !reflect.DeepEqual(offsets[2:5], v.ValueOffsets()) { - t.Fatalf("ValueOffsets got=%v, want=%v", v.ValueOffsets(), offsets[2:5]) - } -} - -func TestLargeStringBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - want := []string{"hello", "世界", "", "bye"} - - ab := array.NewLargeStringBuilder(mem) - defer ab.Release() - - stringValues := func(a *array.LargeString) []string { - vs := make([]string, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - ab.AppendValues([]string{}, nil) - a := ab.NewLargeStringArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewLargeStringArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]string{}, nil) - ab.AppendValues(want, nil) - a = ab.NewLargeStringArray() - assert.Equal(t, want, stringValues(a)) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]string{}, nil) - a = ab.NewLargeStringArray() - assert.Equal(t, want, stringValues(a)) - a.Release() -} - -// TestStringReset tests the Reset() method on the String type by creating two different Strings and then -// resetting the contents of string2 with the values from string1. -func TestLargeStringReset(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - sb1 := array.NewLargeStringBuilder(mem) - sb2 := array.NewLargeStringBuilder(mem) - defer sb1.Release() - defer sb2.Release() - - sb1.Append("string1") - sb1.AppendNull() - - var ( - string1 = sb1.NewLargeStringArray() - string2 = sb2.NewLargeStringArray() - - string1Data = string1.Data() - ) - string2.Reset(string1Data) - - assert.Equal(t, "string1", string2.Value(0)) -} - -func TestLargeStringInvalidOffsets(t *testing.T) { - const expectedPanic = "arrow/array: string offsets out of bounds of data buffer" - - makeBuffers := func(valids []bool, offsets []int64, data string) []*memory.Buffer { - offsetBuf := memory.NewBufferBytes(arrow.Int64Traits.CastToBytes(offsets)) - var nullBufBytes []byte - var nullBuf *memory.Buffer - if valids != nil { - nullBufBytes = make([]byte, bitutil.BytesForBits(int64(len(valids)))) - for i, v := range valids { - bitutil.SetBitTo(nullBufBytes, i, v) - } - nullBuf = memory.NewBufferBytes(nullBufBytes) - } - return []*memory.Buffer{nullBuf, offsetBuf, memory.NewBufferBytes([]byte(data))} - } - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int64{}, "") - array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 0, buffers, nil, 0, 0)) - }, "empty array with no offsets") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int64{0, 5}, "") - array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 0, buffers, nil, 0, 0)) - }, "empty array, offsets ignored") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int64{0, 3, 4, 9}, "oooabcdef") - array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 2)) - }, "data has offset and value offsets are valid") - - assert.NotPanics(t, func() { - buffers := makeBuffers(nil, []int64{0, 3, 6, 9, 9}, "012345678") - arr := array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 4, buffers, nil, 0, 0)) - if assert.Equal(t, 4, arr.Len()) && assert.Zero(t, arr.NullN()) { - assert.Equal(t, "012", arr.Value(0)) - assert.Equal(t, "345", arr.Value(1)) - assert.Equal(t, "678", arr.Value(2)) - assert.Equal(t, "", arr.Value(3), "trailing empty string value will have offset past end") - } - }, "simple valid case") - - assert.NotPanics(t, func() { - buffers := makeBuffers([]bool{true, false, true, false}, []int64{0, 3, 4, 9, 9}, "oooabcdef") - arr := array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 4, buffers, nil, 2, 0)) - if assert.Equal(t, 4, arr.Len()) && assert.Equal(t, 2, arr.NullN()) { - assert.Equal(t, "ooo", arr.Value(0)) - assert.True(t, arr.IsNull(1)) - assert.Equal(t, "bcdef", arr.Value(2)) - assert.True(t, arr.IsNull(3)) - } - }, "simple valid case with nulls") - - assert.PanicsWithValue(t, expectedPanic, func() { - buffers := makeBuffers(nil, []int64{0, 5}, "abc") - array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 0)) - }, "last offset is overflowing") - - assert.PanicsWithError(t, "arrow/array: string offset buffer must have at least 2 values", func() { - buffers := makeBuffers(nil, []int64{0}, "abc") - array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 0)) - }, "last offset is missing") - - assert.PanicsWithValue(t, expectedPanic, func() { - buffers := makeBuffers(nil, []int64{0, 3, 10, 15}, "oooabcdef") - array.NewLargeStringData(array.NewData(arrow.BinaryTypes.LargeString, 1, buffers, nil, 0, 2)) - }, "data has offset and value offset is overflowing") -} - -func TestLargeStringStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - values = []string{"hello", "世界", "", "bye"} - valid = []bool{true, true, false, true} - ) - - b := array.NewLargeStringBuilder(mem) - defer b.Release() - - b.AppendValues(values, valid) - - arr := b.NewArray().(*array.LargeString) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewLargeStringBuilder(mem) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.LargeString) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestStringValueLen(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - values := []string{"a", "bc", "", "", "hijk", "lm", "", "opq", "", "tu"} - valids := []bool{true, true, false, false, true, true, true, true, false, true} - - b := array.NewStringBuilder(mem) - defer b.Release() - - b.AppendStringValues(values, valids) - - arr := b.NewArray().(*array.String) - defer arr.Release() - - slice := array.NewSlice(arr, 2, 9).(*array.String) - defer slice.Release() - - vs := values[2:9] - - for i, v := range vs { - assert.Equal(t, len(v), slice.ValueLen(i)) - } -} -func TestStringViewArray(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - var ( - // only the last string is long enough to not get inlined - want = []string{"hello", "世界", "", "say goodbye daffy"} - valids = []bool{true, true, false, true} - ) - - sb := array.NewStringViewBuilder(mem) - defer sb.Release() - - sb.Retain() - sb.Release() - - assert.NoError(t, sb.AppendValueFromString(want[0])) - sb.AppendValues(want[1:2], nil) - - sb.AppendNull() - sb.Append(want[3]) - - if got, want := sb.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := sb.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - arr := sb.NewStringViewArray() - defer arr.Release() - - arr.Retain() - arr.Release() - - assert.Equal(t, "hello", arr.ValueStr(0)) - - if got, want := arr.Len(), len(want); got != want { - t.Fatalf("invalid len: got=%d, want=%d", got, want) - } - - if got, want := arr.NullN(), 1; got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - - for i := range want { - if arr.IsNull(i) != !valids[i] { - t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i]) - } - switch { - case arr.IsNull(i): - default: - got := arr.Value(i) - if got != want[i] { - t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i]) - } - } - } - - sub := array.MakeFromData(arr.Data()) - defer sub.Release() - - if sub.DataType().ID() != arrow.STRING_VIEW { - t.Fatalf("invalid type: got=%q, want=string view", sub.DataType().Name()) - } - - if _, ok := sub.(*array.StringView); !ok { - t.Fatalf("could not type-assert to array.String") - } - - if got, want := arr.String(), `["hello" "世界" (null) "say goodbye daffy"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - // only the last string gets stuck into a buffer the rest are inlined - // in the headers. - if !bytes.Equal([]byte(`say goodbye daffy`), arr.Data().Buffers()[2].Bytes()) { - t.Fatalf("got=%q, want=%q", string(arr.Data().Buffers()[2].Bytes()), `say goodbye daffy`) - } - - // check the prefix for the non-inlined value - if [4]byte{'s', 'a', 'y', ' '} != arr.ValueHeader(3).Prefix() { - t.Fatalf("got=%q, want=%q", arr.ValueHeader(3).Prefix(), `say `) - } - - slice := array.NewSliceData(arr.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.StringView) - if !ok { - t.Fatalf("could not type-assert to array.StringView") - } - - if got, want := v.String(), `[(null) "say goodbye daffy"]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - if !bytes.Equal([]byte(`say goodbye daffy`), v.Data().Buffers()[2].Bytes()) { - t.Fatalf("got=%q, want=%q", string(v.Data().Buffers()[2].Bytes()), `say goodbye daffy`) - } - - // check the prefix for the non-inlined value - if [4]byte{'s', 'a', 'y', ' '} != v.ValueHeader(1).Prefix() { - t.Fatalf("got=%q, want=%q", v.ValueHeader(1).Prefix(), `say `) - } -} - -func TestStringViewBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - want := []string{"hello", "世界", "", "say goodbye daffy"} - - ab := array.NewStringViewBuilder(mem) - defer ab.Release() - - stringValues := func(a *array.StringView) []string { - vs := make([]string, a.Len()) - for i := range vs { - vs[i] = a.Value(i) - } - return vs - } - - ab.AppendValues([]string{}, nil) - a := ab.NewStringViewArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewStringViewArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]string{}, nil) - ab.AppendValues(want, nil) - a = ab.NewStringViewArray() - assert.Equal(t, want, stringValues(a)) - a.Release() - - ab.AppendValues(want, nil) - ab.AppendValues([]string{}, nil) - a = ab.NewStringViewArray() - assert.Equal(t, want, stringValues(a)) - a.Release() -} - -// TestStringReset tests the Reset() method on the String type by creating two different Strings and then -// resetting the contents of string2 with the values from string1. -func TestStringViewReset(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - sb1 := array.NewStringViewBuilder(mem) - sb2 := array.NewStringViewBuilder(mem) - defer sb1.Release() - defer sb2.Release() - - sb1.Append("string1") - sb1.AppendNull() - - var ( - string1 = sb1.NewStringViewArray() - string2 = sb2.NewStringViewArray() - - string1Data = string1.Data() - ) - string2.Reset(string1Data) - - assert.Equal(t, "string1", string2.Value(0)) -} diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go deleted file mode 100644 index 279ac1d87b25b..0000000000000 --- a/go/arrow/array/struct.go +++ /dev/null @@ -1,491 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "errors" - "fmt" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// Struct represents an ordered sequence of relative types. -type Struct struct { - array - fields []arrow.Array -} - -// NewStructArray constructs a new Struct Array out of the columns passed -// in and the field names. The length of all cols must be the same and -// there should be the same number of columns as names. -func NewStructArray(cols []arrow.Array, names []string) (*Struct, error) { - return NewStructArrayWithNulls(cols, names, nil, 0, 0) -} - -// NewStructArrayWithNulls is like NewStructArray as a convenience function, -// but also takes in a null bitmap, the number of nulls, and an optional offset -// to use for creating the Struct Array. -func NewStructArrayWithNulls(cols []arrow.Array, names []string, nullBitmap *memory.Buffer, nullCount int, offset int) (*Struct, error) { - if len(cols) != len(names) { - return nil, fmt.Errorf("%w: mismatching number of fields and child arrays", arrow.ErrInvalid) - } - if len(cols) == 0 { - return nil, fmt.Errorf("%w: can't infer struct array length with 0 child arrays", arrow.ErrInvalid) - } - length := cols[0].Len() - children := make([]arrow.ArrayData, len(cols)) - fields := make([]arrow.Field, len(cols)) - for i, c := range cols { - if length != c.Len() { - return nil, fmt.Errorf("%w: mismatching child array lengths", arrow.ErrInvalid) - } - children[i] = c.Data() - fields[i].Name = names[i] - fields[i].Type = c.DataType() - fields[i].Nullable = true - } - data := NewData(arrow.StructOf(fields...), length, []*memory.Buffer{nullBitmap}, children, nullCount, offset) - defer data.Release() - return NewStructData(data), nil -} - -// NewStructData returns a new Struct array value from data. -func NewStructData(data arrow.ArrayData) *Struct { - a := &Struct{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -func (a *Struct) NumField() int { return len(a.fields) } -func (a *Struct) Field(i int) arrow.Array { return a.fields[i] } - -// ValueStr returns the string representation (as json) of the value at index i. -func (a *Struct) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - - data, err := json.Marshal(a.GetOneForMarshal(i)) - if err != nil { - panic(err) - } - return string(data) -} - -func (a *Struct) String() string { - o := new(strings.Builder) - o.WriteString("{") - - structBitmap := a.NullBitmapBytes() - for i, v := range a.fields { - if i > 0 { - o.WriteString(" ") - } - if arrow.IsUnion(v.DataType().ID()) { - fmt.Fprintf(o, "%v", v) - continue - } else if !bytes.Equal(structBitmap, v.NullBitmapBytes()) { - masked := a.newStructFieldWithParentValidityMask(i) - fmt.Fprintf(o, "%v", masked) - masked.Release() - continue - } - fmt.Fprintf(o, "%v", v) - } - o.WriteString("}") - return o.String() -} - -// newStructFieldWithParentValidityMask returns the Interface at fieldIndex -// with a nullBitmapBytes adjusted according on the parent struct nullBitmapBytes. -// From the docs: -// -// "When reading the struct array the parent validity bitmap takes priority." -func (a *Struct) newStructFieldWithParentValidityMask(fieldIndex int) arrow.Array { - field := a.Field(fieldIndex) - nullBitmapBytes := field.NullBitmapBytes() - maskedNullBitmapBytes := make([]byte, len(nullBitmapBytes)) - copy(maskedNullBitmapBytes, nullBitmapBytes) - for i := 0; i < field.Len(); i++ { - if a.IsNull(i) { - bitutil.ClearBit(maskedNullBitmapBytes, i) - } - } - data := NewSliceData(field.Data(), 0, int64(field.Len())).(*Data) - defer data.Release() - bufs := make([]*memory.Buffer, len(data.Buffers())) - copy(bufs, data.buffers) - bufs[0].Release() - bufs[0] = memory.NewBufferBytes(maskedNullBitmapBytes) - data.buffers = bufs - maskedField := MakeFromData(data) - return maskedField -} - -func (a *Struct) setData(data *Data) { - a.array.setData(data) - a.fields = make([]arrow.Array, len(data.childData)) - for i, child := range data.childData { - if data.offset != 0 || child.Len() != data.length { - sub := NewSliceData(child, int64(data.offset), int64(data.offset+data.length)) - a.fields[i] = MakeFromData(sub) - sub.Release() - } else { - a.fields[i] = MakeFromData(child) - } - } -} - -func (a *Struct) GetOneForMarshal(i int) interface{} { - if a.IsNull(i) { - return nil - } - - tmp := make(map[string]interface{}) - fieldList := a.data.dtype.(*arrow.StructType).Fields() - for j, d := range a.fields { - tmp[fieldList[j].Name] = d.GetOneForMarshal(i) - } - return tmp -} - -func (a *Struct) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func arrayEqualStruct(left, right *Struct) bool { - for i, lf := range left.fields { - rf := right.fields[i] - if !Equal(lf, rf) { - return false - } - } - return true -} - -func (a *Struct) Retain() { - a.array.Retain() - for _, f := range a.fields { - f.Retain() - } -} - -func (a *Struct) Release() { - a.array.Release() - for _, f := range a.fields { - f.Release() - } -} - -type StructBuilder struct { - builder - - dtype arrow.DataType - fields []Builder -} - -// NewStructBuilder returns a builder, using the provided memory allocator. -func NewStructBuilder(mem memory.Allocator, dtype *arrow.StructType) *StructBuilder { - b := &StructBuilder{ - builder: builder{refCount: 1, mem: mem}, - dtype: dtype, - fields: make([]Builder, dtype.NumFields()), - } - for i, f := range dtype.Fields() { - b.fields[i] = NewBuilder(b.mem, f.Type) - } - return b -} - -func (b *StructBuilder) Type() arrow.DataType { - fields := make([]arrow.Field, len(b.fields)) - copy(fields, b.dtype.(*arrow.StructType).Fields()) - for i, b := range b.fields { - fields[i].Type = b.Type() - } - return arrow.StructOf(fields...) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *StructBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - - for _, f := range b.fields { - f.Release() - } - } -} - -func (b *StructBuilder) Append(v bool) { - // Intentionally not calling `Reserve` as it will recursively call - // `Reserve` on the child builders, which during profiling has shown to be - // very expensive due to iterating over children, dynamic dispatch and all - // other code that gets executed even if previously `Reserve` was called to - // preallocate. Not calling `Reserve` has no downsides as when appending to - // the underlying children they already ensure they have enough space - // reserved. The only thing we must do is ensure we have enough space in - // the validity bitmap of the struct builder itself. - b.builder.reserve(1, b.resizeHelper) - b.unsafeAppendBoolToBitmap(v) - if !v { - for _, f := range b.fields { - f.AppendNull() - } - } -} - -func (b *StructBuilder) AppendValues(valids []bool) { - b.Reserve(len(valids)) - b.builder.unsafeAppendBoolsToBitmap(valids, len(valids)) -} - -func (b *StructBuilder) AppendNull() { b.Append(false) } - -func (b *StructBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *StructBuilder) AppendEmptyValue() { - b.Append(true) - for _, f := range b.fields { - f.AppendEmptyValue() - } -} - -func (b *StructBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *StructBuilder) unsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -func (b *StructBuilder) init(capacity int) { - b.builder.init(capacity) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *StructBuilder) Reserve(n int) { - b.builder.reserve(n, b.resizeHelper) - for _, f := range b.fields { - f.Reserve(n) - } -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *StructBuilder) Resize(n int) { - b.resizeHelper(n) - for _, f := range b.fields { - f.Resize(n) - } -} - -func (b *StructBuilder) resizeHelper(n int) { - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(n, b.builder.init) - } -} - -func (b *StructBuilder) NumField() int { return len(b.fields) } -func (b *StructBuilder) FieldBuilder(i int) Builder { return b.fields[i] } - -// NewArray creates a Struct array from the memory buffers used by the builder and resets the StructBuilder -// so it can be used to build a new array. -func (b *StructBuilder) NewArray() arrow.Array { - return b.NewStructArray() -} - -// NewStructArray creates a Struct array from the memory buffers used by the builder and resets the StructBuilder -// so it can be used to build a new array. -func (b *StructBuilder) NewStructArray() (a *Struct) { - data := b.newData() - a = NewStructData(data) - data.Release() - return -} - -func (b *StructBuilder) newData() (data *Data) { - fields := make([]arrow.ArrayData, len(b.fields)) - for i, f := range b.fields { - arr := f.NewArray() - defer arr.Release() - fields[i] = arr.Data() - } - - data = NewData( - b.Type(), b.length, - []*memory.Buffer{ - b.nullBitmap, - }, - fields, - b.nulls, - 0, - ) - b.reset() - - return -} - -func (b *StructBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - if !strings.HasPrefix(s, "{") && !strings.HasSuffix(s, "}") { - return fmt.Errorf("%w: invalid string for struct should be be of form: {*}", arrow.ErrInvalid) - } - dec := json.NewDecoder(strings.NewReader(s)) - return b.UnmarshalOne(dec) -} - -func (b *StructBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t { - case json.Delim('{'): - b.Append(true) - keylist := make(map[string]bool) - for dec.More() { - keyTok, err := dec.Token() - if err != nil { - return err - } - - key, ok := keyTok.(string) - if !ok { - return errors.New("missing key") - } - - if keylist[key] { - return fmt.Errorf("key %s is specified twice", key) - } - - keylist[key] = true - - idx, ok := b.dtype.(*arrow.StructType).FieldIdx(key) - if !ok { - var extra interface{} - dec.Decode(&extra) - continue - } - - if err := b.fields[idx].UnmarshalOne(dec); err != nil { - return err - } - } - - // Append null values to all optional fields that were not presented in the json input - for _, field := range b.dtype.(*arrow.StructType).Fields() { - if !field.Nullable { - continue - } - idx, _ := b.dtype.(*arrow.StructType).FieldIdx(field.Name) - if _, hasKey := keylist[field.Name]; !hasKey { - b.fields[idx].AppendNull() - } - } - - // consume '}' - _, err := dec.Token() - return err - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Struct: fmt.Sprint(b.dtype), - } - } - return nil -} - -func (b *StructBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *StructBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("struct builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*Struct)(nil) - _ Builder = (*StructBuilder)(nil) -) diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go deleted file mode 100644 index 4338bbd0b136e..0000000000000 --- a/go/arrow/array/struct_test.go +++ /dev/null @@ -1,532 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestStructArray(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - f1s = []byte{'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'} - f2s = []int32{1, 2, 3, 4} - - f1Lengths = []int{3, 0, 3, 4} - f1Offsets = []int32{0, 3, 3, 6, 10} - f1Valids = []bool{true, false, true, true} - - isValid = []bool{true, true, true, true} - - fields = []arrow.Field{ - {Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)}, - {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, - } - dtype = arrow.StructOf(fields...) - ) - - sb := array.NewStructBuilder(pool, dtype) - defer sb.Release() - - for i := 0; i < 10; i++ { - f1b := sb.FieldBuilder(0).(*array.ListBuilder) - f1vb := f1b.ValueBuilder().(*array.Uint8Builder) - f2b := sb.FieldBuilder(1).(*array.Int32Builder) - - if got, want := sb.NumField(), 2; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - sb.Resize(len(f1Lengths)) - f1vb.Resize(len(f1s)) - f2b.Resize(len(f2s)) - - pos := 0 - for i, length := range f1Lengths { - f1b.Append(f1Valids[i]) - for j := 0; j < length; j++ { - f1vb.Append(f1s[pos]) - pos++ - } - f2b.Append(f2s[i]) - } - - for _, valid := range isValid { - sb.Append(valid) - } - - arr := sb.NewArray().(*array.Struct) - defer arr.Release() - - arr.Retain() - arr.Release() - - if got, want := arr.DataType().ID(), arrow.STRUCT; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - for i, valid := range isValid { - if got, want := arr.IsValid(i), valid; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - { - f1arr := arr.Field(0).(*array.List) - if got, want := f1arr.Len(), len(f1Lengths); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range f1Lengths { - if got, want := f1arr.IsValid(i), f1Valids[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := f1arr.IsNull(i), f1Lengths[i] == 0; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - - } - - if got, want := f1arr.Offsets(), f1Offsets; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - varr := f1arr.ListValues().(*array.Uint8) - if got, want := varr.Uint8Values(), f1s; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - } - - { - f2arr := arr.Field(1).(*array.Int32) - if got, want := f2arr.Len(), len(f2s); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := f2arr.Int32Values(), f2s; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%d, want=%d", got, want) - } - } - } -} - -func TestStructStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := arrow.StructOf( - arrow.Field{Name: "nullable_bool", Type: new(arrow.BooleanType), Nullable: true}, - arrow.Field{Name: "non_nullable_bool", Type: new(arrow.BooleanType)}, - ) - - builder := array.NewStructBuilder(memory.DefaultAllocator, dt) - nullableBld := builder.FieldBuilder(0).(*array.BooleanBuilder) - nonNullableBld := builder.FieldBuilder(1).(*array.BooleanBuilder) - - builder.Append(true) - nullableBld.Append(true) - nonNullableBld.Append(true) - - builder.Append(true) - nullableBld.AppendNull() - nonNullableBld.Append(true) - - builder.AppendNull() - - arr := builder.NewArray().(*array.Struct) - - // 2. create array via AppendValueFromString - b1 := array.NewStructBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Struct) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestStructArrayEmpty(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - sb := array.NewStructBuilder(pool, arrow.StructOf()) - defer sb.Release() - - if got, want := sb.NumField(), 0; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - arr := sb.NewArray().(*array.Struct) - - if got, want := arr.Len(), 0; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := arr.NumField(), 0; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } -} - -func TestStructArrayBulkAppend(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - f1s = []byte{'j', 'o', 'e', 'b', 'o', 'b', 'm', 'a', 'r', 'k'} - f2s = []int32{1, 2, 3, 4} - - f1Lengths = []int{3, 0, 3, 4} - f1Offsets = []int32{0, 3, 3, 6, 10} - f1Valids = []bool{true, false, true, true} - - isValid = []bool{true, true, true, true} - - fields = []arrow.Field{ - {Name: "f1", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint8)}, - {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, - } - dtype = arrow.StructOf(fields...) - ) - - sb := array.NewStructBuilder(pool, dtype) - defer sb.Release() - - for i := 0; i < 10; i++ { - f1b := sb.FieldBuilder(0).(*array.ListBuilder) - f1vb := f1b.ValueBuilder().(*array.Uint8Builder) - f2b := sb.FieldBuilder(1).(*array.Int32Builder) - - if got, want := sb.NumField(), 2; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - sb.Resize(len(f1Lengths)) - f1vb.Resize(len(f1s)) - f2b.Resize(len(f2s)) - - sb.AppendValues(isValid) - f1b.AppendValues(f1Offsets, f1Valids) - f1vb.AppendValues(f1s, nil) - f2b.AppendValues(f2s, nil) - - arr := sb.NewArray().(*array.Struct) - defer arr.Release() - - if got, want := arr.DataType().ID(), arrow.STRUCT; got != want { - t.Fatalf("got=%v, want=%v", got, want) - } - if got, want := arr.Len(), len(isValid); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - for i, valid := range isValid { - if got, want := arr.IsValid(i), valid; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - } - - { - f1arr := arr.Field(0).(*array.List) - if got, want := f1arr.Len(), len(f1Lengths); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range f1Lengths { - if got, want := f1arr.IsValid(i), f1Valids[i]; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - if got, want := f1arr.IsNull(i), f1Lengths[i] == 0; got != want { - t.Fatalf("got[%d]=%v, want[%d]=%v", i, got, i, want) - } - - } - - if got, want := f1arr.Offsets(), f1Offsets; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - - varr := f1arr.ListValues().(*array.Uint8) - if got, want := varr.Uint8Values(), f1s; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%v, want=%v", got, want) - } - } - - { - f2arr := arr.Field(1).(*array.Int32) - if got, want := f2arr.Len(), len(f2s); got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - if got, want := f2arr.Int32Values(), f2s; !reflect.DeepEqual(got, want) { - t.Fatalf("got=%d, want=%d", got, want) - } - } - } -} - -func TestStructArrayStringer(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - f1s = []float64{1.1, 1.2, 1.3, 1.4} - f2s = []int32{1, 2, 3, 4} - - fields = []arrow.Field{ - {Name: "f1", Type: arrow.PrimitiveTypes.Float64}, - {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, - } - dtype = arrow.StructOf(fields...) - ) - - sb := array.NewStructBuilder(pool, dtype) - defer sb.Release() - - f1b := sb.FieldBuilder(0).(*array.Float64Builder) - f2b := sb.FieldBuilder(1).(*array.Int32Builder) - - if got, want := sb.NumField(), 2; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range f1s { - sb.Append(true) - switch i { - case 1: - f1b.AppendNull() - f2b.Append(f2s[i]) - case 2: - f1b.Append(f1s[i]) - f2b.AppendNull() - default: - f1b.Append(f1s[i]) - f2b.Append(f2s[i]) - } - } - assert.NoError(t, sb.AppendValueFromString(`{"f1": 1.1, "f2": 1}`)) - arr := sb.NewArray().(*array.Struct) - defer arr.Release() - - assert.Equal(t, `{"f1":1.1,"f2":1}`, arr.ValueStr(4)) - want := "{[1.1 (null) 1.3 1.4 1.1] [1 2 (null) 4 1]}" - got := arr.String() - if got != want { - t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want) - } -} - -func TestStructArraySlice(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - f1s = []float64{1.1, 1.2, 1.3, 1.4} - f2s = []int32{1, 2, 3, 4} - valids = []bool{true, true, true, true} - - fields = []arrow.Field{ - {Name: "f1", Type: arrow.PrimitiveTypes.Float64}, - {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, - } - dtype = arrow.StructOf(fields...) - ) - - sb := array.NewStructBuilder(pool, dtype) - defer sb.Release() - - f1b := sb.FieldBuilder(0).(*array.Float64Builder) - - f2b := sb.FieldBuilder(1).(*array.Int32Builder) - - if got, want := sb.NumField(), 2; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - for i := range f1s { - sb.Append(valids[i]) - switch i { - case 1: - f1b.AppendNull() - f2b.Append(f2s[i]) - case 2: - f1b.Append(f1s[i]) - f2b.AppendNull() - default: - f1b.Append(f1s[i]) - f2b.Append(f2s[i]) - } - } - - arr := sb.NewArray().(*array.Struct) - defer arr.Release() - - // Slice - arrSlice := array.NewSlice(arr, 2, 4).(*array.Struct) - defer arrSlice.Release() - - want := "{[1.3 1.4] [(null) 4]}" - got := arrSlice.String() - if got != want { - t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want) - } -} - -func TestStructArrayNullBitmap(t *testing.T) { - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer pool.AssertSize(t, 0) - - var ( - f1s = []float64{1.1, 1.2, 1.3, 1.4} - f2s = []int32{1, 2, 3, 4} - valids = []bool{true, true, true, false} - - fields = []arrow.Field{ - {Name: "f1", Type: arrow.PrimitiveTypes.Float64}, - {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, - } - dtype = arrow.StructOf(fields...) - ) - - sb := array.NewStructBuilder(pool, dtype) - defer sb.Release() - - f1b := sb.FieldBuilder(0).(*array.Float64Builder) - - f2b := sb.FieldBuilder(1).(*array.Int32Builder) - - if got, want := sb.NumField(), 2; got != want { - t.Fatalf("got=%d, want=%d", got, want) - } - - sb.AppendValues(valids) - for i := range f1s { - f1b.Append(f1s[i]) - switch i { - case 1: - f2b.AppendNull() - default: - f2b.Append(f2s[i]) - } - } - - arr := sb.NewArray().(*array.Struct) - defer arr.Release() - - want := "{[1.1 1.2 1.3 (null)] [1 (null) 3 (null)]}" - got := arr.String() - if got != want { - t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want) - } -} - -func TestStructArrayUnmarshalJSONMissingFields(t *testing.T) { - pool := memory.NewGoAllocator() - - var ( - fields = []arrow.Field{ - {Name: "f1", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, - { - Name: "f3", Type: arrow.StructOf( - []arrow.Field{ - {Name: "f3_1", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "f3_2", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "f3_3", Type: arrow.BinaryTypes.String, Nullable: false}, - }..., - ), - }, - } - dtype = arrow.StructOf(fields...) - ) - - tests := []struct { - name string - jsonInput string - want string - panic bool - }{ - { - name: "missing required field", - jsonInput: `[{"f2": 3, "f3": {"f3_1": "test"}}]`, - panic: true, - want: "", - }, - { - name: "missing optional fields", - jsonInput: `[{"f2": 3, "f3": {"f3_3": "test"}}]`, - panic: false, - want: `{[(null)] [3] {[(null)] [(null)] ["test"]}}`, - }, - } - - for _, tc := range tests { - t.Run( - tc.name, func(t *testing.T) { - - var val bool - - sb := array.NewStructBuilder(pool, dtype) - defer sb.Release() - - if tc.panic { - defer func() { - e := recover() - if e == nil { - t.Fatalf("this should have panicked, but did not; slice value %v", val) - } - if got, want := e.(string), "arrow/array: index out of range"; got != want { - t.Fatalf("invalid error. got=%q, want=%q", got, want) - } - }() - } else { - defer func() { - if e := recover(); e != nil { - t.Fatalf("unexpected panic: %v", e) - } - }() - } - - err := sb.UnmarshalJSON([]byte(tc.jsonInput)) - if err != nil { - t.Fatal(err) - } - - arr := sb.NewArray().(*array.Struct) - defer arr.Release() - - got := arr.String() - if got != tc.want { - t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, tc.want) - } - }, - ) - } -} diff --git a/go/arrow/array/table.go b/go/arrow/array/table.go deleted file mode 100644 index 3b742ae78803d..0000000000000 --- a/go/arrow/array/table.go +++ /dev/null @@ -1,421 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "errors" - "fmt" - "math" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/internal/debug" -) - -// NewColumnSlice returns a new zero-copy slice of the column with the indicated -// indices i and j, corresponding to the column's array[i:j]. -// The returned column must be Release()'d after use. -// -// NewColSlice panics if the slice is outside the valid range of the column's array. -// NewColSlice panics if j < i. -func NewColumnSlice(col *arrow.Column, i, j int64) *arrow.Column { - slice := NewChunkedSlice(col.Data(), i, j) - defer slice.Release() - return arrow.NewColumn(col.Field(), slice) -} - -// NewChunkedSlice constructs a zero-copy slice of the chunked array with the indicated -// indices i and j, corresponding to array[i:j]. -// The returned chunked array must be Release()'d after use. -// -// NewSlice panics if the slice is outside the valid range of the input array. -// NewSlice panics if j < i. -func NewChunkedSlice(a *arrow.Chunked, i, j int64) *arrow.Chunked { - if j > int64(a.Len()) || i > j || i > int64(a.Len()) { - panic("arrow/array: index out of range") - } - - var ( - cur = 0 - beg = i - sz = j - i - chunks = make([]arrow.Array, 0, len(a.Chunks())) - ) - - for cur < len(a.Chunks()) && beg >= int64(a.Chunks()[cur].Len()) { - beg -= int64(a.Chunks()[cur].Len()) - cur++ - } - - for cur < len(a.Chunks()) && sz > 0 { - arr := a.Chunks()[cur] - end := beg + sz - if end > int64(arr.Len()) { - end = int64(arr.Len()) - } - chunks = append(chunks, NewSlice(arr, beg, end)) - sz -= int64(arr.Len()) - beg - beg = 0 - cur++ - } - chunks = chunks[:len(chunks):len(chunks)] - defer func() { - for _, chunk := range chunks { - chunk.Release() - } - }() - - return arrow.NewChunked(a.DataType(), chunks) -} - -// simpleTable is a basic, non-lazy in-memory table. -type simpleTable struct { - refCount int64 - - rows int64 - cols []arrow.Column - - schema *arrow.Schema -} - -// NewTable returns a new basic, non-lazy in-memory table. -// If rows is negative, the number of rows will be inferred from the height -// of the columns. -// -// NewTable panics if the columns and schema are inconsistent. -// NewTable panics if rows is larger than the height of the columns. -func NewTable(schema *arrow.Schema, cols []arrow.Column, rows int64) arrow.Table { - tbl := simpleTable{ - refCount: 1, - rows: rows, - cols: cols, - schema: schema, - } - - if tbl.rows < 0 { - switch len(tbl.cols) { - case 0: - tbl.rows = 0 - default: - tbl.rows = int64(tbl.cols[0].Len()) - } - } - - // validate the table and its constituents. - // note we retain the columns after having validated the table - // in case the validation fails and panics (and would otherwise leak - // a ref-count on the columns.) - tbl.validate() - - for i := range tbl.cols { - tbl.cols[i].Retain() - } - - return &tbl -} - -// NewTableFromSlice is a convenience function to create a table from a slice -// of slices of arrow.Array. -// -// Like other NewTable functions this can panic if: -// - len(schema.Fields) != len(data) -// - the total length of each column's array slice (ie: number of rows -// in the column) aren't the same for all columns. -func NewTableFromSlice(schema *arrow.Schema, data [][]arrow.Array) arrow.Table { - if len(data) != schema.NumFields() { - panic("array/table: mismatch in number of columns and data for creating a table") - } - - cols := make([]arrow.Column, schema.NumFields()) - for i, arrs := range data { - field := schema.Field(i) - chunked := arrow.NewChunked(field.Type, arrs) - cols[i] = *arrow.NewColumn(field, chunked) - chunked.Release() - } - - tbl := simpleTable{ - refCount: 1, - schema: schema, - cols: cols, - rows: int64(cols[0].Len()), - } - - defer func() { - if r := recover(); r != nil { - // if validate panics, let's release the columns - // so that we don't leak them, then propagate the panic - for _, c := range cols { - c.Release() - } - panic(r) - } - }() - // validate the table and its constituents. - tbl.validate() - - return &tbl -} - -// NewTableFromRecords returns a new basic, non-lazy in-memory table. -// -// NewTableFromRecords panics if the records and schema are inconsistent. -func NewTableFromRecords(schema *arrow.Schema, recs []arrow.Record) arrow.Table { - arrs := make([]arrow.Array, len(recs)) - cols := make([]arrow.Column, schema.NumFields()) - - defer func(cols []arrow.Column) { - for i := range cols { - cols[i].Release() - } - }(cols) - - for i := range cols { - field := schema.Field(i) - for j, rec := range recs { - arrs[j] = rec.Column(i) - } - chunk := arrow.NewChunked(field.Type, arrs) - cols[i] = *arrow.NewColumn(field, chunk) - chunk.Release() - } - - return NewTable(schema, cols, -1) -} - -func (tbl *simpleTable) Schema() *arrow.Schema { return tbl.schema } - -func (tbl *simpleTable) AddColumn(i int, field arrow.Field, column arrow.Column) (arrow.Table, error) { - if int64(column.Len()) != tbl.rows { - return nil, fmt.Errorf("arrow/array: column length mismatch: %d != %d", column.Len(), tbl.rows) - } - if field.Type != column.DataType() { - return nil, fmt.Errorf("arrow/array: column type mismatch: %v != %v", field.Type, column.DataType()) - } - newSchema, err := tbl.schema.AddField(i, field) - if err != nil { - return nil, err - } - cols := make([]arrow.Column, len(tbl.cols)+1) - copy(cols[:i], tbl.cols[:i]) - cols[i] = column - copy(cols[i+1:], tbl.cols[i:]) - newTable := NewTable(newSchema, cols, tbl.rows) - return newTable, nil -} - -func (tbl *simpleTable) NumRows() int64 { return tbl.rows } -func (tbl *simpleTable) NumCols() int64 { return int64(len(tbl.cols)) } -func (tbl *simpleTable) Column(i int) *arrow.Column { return &tbl.cols[i] } - -func (tbl *simpleTable) validate() { - if len(tbl.cols) != tbl.schema.NumFields() { - panic(errors.New("arrow/array: table schema mismatch")) - } - for i, col := range tbl.cols { - if !col.Field().Equal(tbl.schema.Field(i)) { - panic(fmt.Errorf("arrow/array: column field %q is inconsistent with schema", col.Name())) - } - - if int64(col.Len()) < tbl.rows { - panic(fmt.Errorf("arrow/array: column %q expected length >= %d but got length %d", col.Name(), tbl.rows, col.Len())) - } - } -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (tbl *simpleTable) Retain() { - atomic.AddInt64(&tbl.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (tbl *simpleTable) Release() { - debug.Assert(atomic.LoadInt64(&tbl.refCount) > 0, "too many releases") - - if atomic.AddInt64(&tbl.refCount, -1) == 0 { - for i := range tbl.cols { - tbl.cols[i].Release() - } - tbl.cols = nil - } -} - -func (tbl *simpleTable) String() string { - o := new(strings.Builder) - o.WriteString(tbl.Schema().String()) - o.WriteString("\n") - - for i := 0; i < int(tbl.NumCols()); i++ { - col := tbl.Column(i) - o.WriteString(col.Field().Name + ": [") - for j, chunk := range col.Data().Chunks() { - if j != 0 { - o.WriteString(", ") - } - o.WriteString(chunk.String()) - } - o.WriteString("]\n") - } - return o.String() -} - -// TableReader is a Record iterator over a (possibly chunked) Table -type TableReader struct { - refCount int64 - - tbl arrow.Table - cur int64 // current row - max int64 // total number of rows - rec arrow.Record // current Record - chksz int64 // chunk size - - chunks []*arrow.Chunked - slots []int // chunk indices - offsets []int64 // chunk offsets -} - -// NewTableReader returns a new TableReader to iterate over the (possibly chunked) Table. -// if chunkSize is <= 0, the biggest possible chunk will be selected. -func NewTableReader(tbl arrow.Table, chunkSize int64) *TableReader { - ncols := tbl.NumCols() - tr := &TableReader{ - refCount: 1, - tbl: tbl, - cur: 0, - max: int64(tbl.NumRows()), - chksz: chunkSize, - chunks: make([]*arrow.Chunked, ncols), - slots: make([]int, ncols), - offsets: make([]int64, ncols), - } - tr.tbl.Retain() - - if tr.chksz <= 0 { - tr.chksz = math.MaxInt64 - } - - for i := range tr.chunks { - col := tr.tbl.Column(i) - tr.chunks[i] = col.Data() - tr.chunks[i].Retain() - } - return tr -} - -func (tr *TableReader) Schema() *arrow.Schema { return tr.tbl.Schema() } -func (tr *TableReader) Record() arrow.Record { return tr.rec } - -func (tr *TableReader) Next() bool { - if tr.cur >= tr.max { - return false - } - - if tr.rec != nil { - tr.rec.Release() - } - - // determine the minimum contiguous slice across all columns - chunksz := imin64(tr.max, tr.chksz) - chunks := make([]arrow.Array, len(tr.chunks)) - for i := range chunks { - j := tr.slots[i] - chunk := tr.chunks[i].Chunk(j) - remain := int64(chunk.Len()) - tr.offsets[i] - if remain < chunksz { - chunksz = remain - } - - chunks[i] = chunk - } - - // slice the chunks, advance each chunk slot as appropriate. - batch := make([]arrow.Array, len(tr.chunks)) - for i, chunk := range chunks { - var slice arrow.Array - offset := tr.offsets[i] - switch int64(chunk.Len()) - offset { - case chunksz: - tr.slots[i]++ - tr.offsets[i] = 0 - if offset > 0 { - // need to slice - slice = NewSlice(chunk, offset, offset+chunksz) - } else { - // no need to slice - slice = chunk - slice.Retain() - } - default: - tr.offsets[i] += chunksz - slice = NewSlice(chunk, offset, offset+chunksz) - } - batch[i] = slice - } - - tr.cur += chunksz - tr.rec = NewRecord(tr.tbl.Schema(), batch, chunksz) - - for _, arr := range batch { - arr.Release() - } - - return true -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (tr *TableReader) Retain() { - atomic.AddInt64(&tr.refCount, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (tr *TableReader) Release() { - debug.Assert(atomic.LoadInt64(&tr.refCount) > 0, "too many releases") - - if atomic.AddInt64(&tr.refCount, -1) == 0 { - tr.tbl.Release() - for _, chk := range tr.chunks { - chk.Release() - } - if tr.rec != nil { - tr.rec.Release() - } - tr.tbl = nil - tr.chunks = nil - tr.slots = nil - tr.offsets = nil - } -} -func (tr *TableReader) Err() error { return nil } - -func imin64(a, b int64) int64 { - if a < b { - return a - } - return b -} - -var ( - _ arrow.Table = (*simpleTable)(nil) - _ RecordReader = (*TableReader)(nil) -) diff --git a/go/arrow/array/table_test.go b/go/arrow/array/table_test.go deleted file mode 100644 index e8357ac3dfb69..0000000000000 --- a/go/arrow/array/table_test.go +++ /dev/null @@ -1,833 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "errors" - "fmt" - "reflect" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -func TestChunked(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - c1 := arrow.NewChunked(arrow.PrimitiveTypes.Int32, nil) - c1.Retain() - c1.Release() - if got, want := c1.Len(), 0; got != want { - t.Fatalf("len differ. got=%d, want=%d", got, want) - } - if got, want := c1.NullN(), 0; got != want { - t.Fatalf("nulls: got=%d, want=%d", got, want) - } - if got, want := c1.DataType(), arrow.PrimitiveTypes.Int32; got != want { - t.Fatalf("dtype: got=%v, want=%v", got, want) - } - c1.Release() - - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - fb.AppendValues([]float64{6, 7}, nil) - f2 := fb.NewFloat64Array() - defer f2.Release() - - fb.AppendValues([]float64{8, 9, 10}, nil) - f3 := fb.NewFloat64Array() - defer f3.Release() - - c2 := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1, f2, f3}, - ) - defer c2.Release() - - if got, want := c2.Len(), 10; got != want { - t.Fatalf("len: got=%d, want=%d", got, want) - } - if got, want := c2.NullN(), 0; got != want { - t.Fatalf("nulls: got=%d, want=%d", got, want) - } - if got, want := c2.DataType(), arrow.PrimitiveTypes.Float64; got != want { - t.Fatalf("dtype: got=%v, want=%v", got, want) - } - if got, want := c2.Chunk(0), c2.Chunks()[0]; !reflect.DeepEqual(got, want) { - t.Fatalf("chunk: got=%v, want=%v", got, want) - } - - for _, tc := range []struct { - i, j int64 - len int - nulls int - chunks int - }{ - {i: 0, j: 10, len: 10, nulls: 0, chunks: 3}, - {i: 2, j: 3, len: 1, nulls: 0, chunks: 1}, - {i: 9, j: 10, len: 1, nulls: 0, chunks: 1}, - {i: 0, j: 5, len: 5, nulls: 0, chunks: 1}, - {i: 5, j: 7, len: 2, nulls: 0, chunks: 1}, - {i: 7, j: 10, len: 3, nulls: 0, chunks: 1}, - {i: 10, j: 10, len: 0, nulls: 0, chunks: 0}, - } { - t.Run("", func(t *testing.T) { - sub := array.NewChunkedSlice(c2, tc.i, tc.j) - defer sub.Release() - - if got, want := sub.Len(), tc.len; got != want { - t.Fatalf("len: got=%d, want=%d", got, want) - } - if got, want := sub.NullN(), tc.nulls; got != want { - t.Fatalf("nulls: got=%d, want=%d", got, want) - } - if got, want := sub.DataType(), arrow.PrimitiveTypes.Float64; got != want { - t.Fatalf("dtype: got=%v, want=%v", got, want) - } - if got, want := len(sub.Chunks()), tc.chunks; got != want { - t.Fatalf("chunks: got=%d, want=%d", got, want) - } - }) - } -} - -func TestChunkedEqualDataType(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - lb1 := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) - defer lb1.Release() - - v1 := lb1.NewArray() - defer v1.Release() - - lb2 := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) - defer lb2.Release() - - v2 := lb2.NewArray() - defer v2.Release() - - c1 := arrow.NewChunked(arrow.ListOf(arrow.PrimitiveTypes.Int32), []arrow.Array{ - v1, v2, - }) - defer c1.Release() -} - -func TestChunkedInvalid(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{6, 7}, nil) - f2 := ib.NewInt32Array() - defer f2.Release() - - defer func() { - e := recover() - if e == nil { - t.Fatalf("expected a panic") - } - - err, ok := e.(error) - if !ok { - t.Fatalf("expected an error") - } - - if !errors.Is(err, arrow.ErrInvalid) { - t.Fatalf("should be an ErrInvalid") - } - - if got, want := err.Error(), fmt.Sprintf("%s: arrow/array: mismatch data type float64 vs int32", arrow.ErrInvalid); got != want { - t.Fatalf("invalid error. got=%q, want=%q", got, want) - } - }() - - c1 := arrow.NewChunked(arrow.PrimitiveTypes.Int32, []arrow.Array{ - f1, f2, - }) - defer c1.Release() -} - -func TestChunkedSliceInvalid(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - fb.AppendValues([]float64{6, 7}, nil) - f2 := fb.NewFloat64Array() - defer f2.Release() - - fb.AppendValues([]float64{8, 9, 10}, nil) - f3 := fb.NewFloat64Array() - defer f3.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1, f2, f3}, - ) - defer c.Release() - - for _, tc := range []struct { - i, j int64 - }{ - {i: 2, j: 1}, - {i: 10, j: 11}, - {i: 11, j: 11}, - } { - t.Run("", func(t *testing.T) { - defer func() { - e := recover() - if e == nil { - t.Fatalf("expected a panic") - } - if got, want := e.(string), "arrow/array: index out of range"; got != want { - t.Fatalf("invalid error. got=%q, want=%q", got, want) - } - }() - sub := array.NewChunkedSlice(c, tc.i, tc.j) - defer sub.Release() - }) - } -} - -func TestColumn(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - type slice struct { - i, j int64 - len int - nulls int - chunks int - } - - for _, tc := range []struct { - chunk *arrow.Chunked - field arrow.Field - err error - slices []slice - }{ - { - chunk: func() *arrow.Chunked { - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{1, 2, 3}, nil) - i1 := ib.NewInt32Array() - defer i1.Release() - - ib.AppendValues([]int32{4, 5, 6, 7, 8, 9, 10}, nil) - i2 := ib.NewInt32Array() - defer i2.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Int32, - []arrow.Array{i1, i2}, - ) - return c - }(), - field: arrow.Field{Name: "i32", Type: arrow.PrimitiveTypes.Int32}, - slices: []slice{ - {i: 0, j: 10, len: 10, nulls: 0, chunks: 2}, - {i: 2, j: 3, len: 1, nulls: 0, chunks: 1}, - {i: 9, j: 10, len: 1, nulls: 0, chunks: 1}, - {i: 0, j: 5, len: 5, nulls: 0, chunks: 2}, - {i: 5, j: 7, len: 2, nulls: 0, chunks: 1}, - {i: 7, j: 10, len: 3, nulls: 0, chunks: 1}, - {i: 10, j: 10, len: 0, nulls: 0, chunks: 0}, - }, - }, - { - chunk: func() *arrow.Chunked { - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - fb.AppendValues([]float64{6, 7}, nil) - f2 := fb.NewFloat64Array() - defer f2.Release() - - fb.AppendValues([]float64{8, 9, 10}, nil) - f3 := fb.NewFloat64Array() - defer f3.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1, f2, f3}, - ) - return c - }(), - field: arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64}, - slices: []slice{ - {i: 0, j: 10, len: 10, nulls: 0, chunks: 3}, - {i: 2, j: 3, len: 1, nulls: 0, chunks: 1}, - {i: 9, j: 10, len: 1, nulls: 0, chunks: 1}, - {i: 0, j: 5, len: 5, nulls: 0, chunks: 1}, - {i: 5, j: 7, len: 2, nulls: 0, chunks: 1}, - {i: 7, j: 10, len: 3, nulls: 0, chunks: 1}, - {i: 10, j: 10, len: 0, nulls: 0, chunks: 0}, - }, - }, - { - chunk: func() *arrow.Chunked { - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1}, - ) - return c - }(), - field: arrow.Field{Name: "f32", Type: arrow.PrimitiveTypes.Float32}, - err: fmt.Errorf("%w: arrow/array: inconsistent data type float64 vs float32", arrow.ErrInvalid), - }, - } { - t.Run("", func(t *testing.T) { - defer tc.chunk.Release() - - if tc.err != nil { - defer func() { - e := recover() - if e == nil { - t.Fatalf("expected an error %q", tc.err) - } - switch err := e.(type) { - case string: - if err != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - case error: - if err.Error() != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - default: - t.Fatalf("invalid type for panic message: %T (err=%v)", err, err) - } - }() - } - - col := arrow.NewColumn(tc.field, tc.chunk) - defer col.Release() - - if got, want := col.Len(), tc.chunk.Len(); got != want { - t.Fatalf("invalid length: got=%d, want=%d", got, want) - } - if got, want := col.NullN(), tc.chunk.NullN(); got != want { - t.Fatalf("invalid nulls: got=%d, want=%d", got, want) - } - if got, want := col.Data(), tc.chunk; got != want { - t.Fatalf("invalid chunked: got=%#v, want=%#v", got, want) - } - if got, want := col.Field(), tc.field; !got.Equal(want) { - t.Fatalf("invalid field: got=%#v, want=%#v", got, want) - } - if got, want := col.Name(), tc.field.Name; got != want { - t.Fatalf("invalid name: got=%q, want=%q", got, want) - } - if got, want := col.DataType(), tc.field.Type; !reflect.DeepEqual(got, want) { - t.Fatalf("invalid data type: got=%#v, want=%#v", got, want) - } - - col.Retain() - col.Release() - - for _, slice := range tc.slices { - t.Run("", func(t *testing.T) { - sub := array.NewColumnSlice(col, slice.i, slice.j) - defer sub.Release() - - if got, want := sub.Len(), slice.len; got != want { - t.Fatalf("len: got=%d, want=%d", got, want) - } - if got, want := sub.NullN(), slice.nulls; got != want { - t.Fatalf("nulls: got=%d, want=%d", got, want) - } - if got, want := sub.DataType(), col.DataType(); got != want { - t.Fatalf("dtype: got=%v, want=%v", got, want) - } - if got, want := len(sub.Data().Chunks()), slice.chunks; got != want { - t.Fatalf("chunks: got=%d, want=%d", got, want) - } - }) - } - }) - } - -} - -func TestTable(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - preSchema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - }, - nil, - ) - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ) - col1 := func() *arrow.Column { - chunk := func() *arrow.Chunked { - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{1, 2, 3}, nil) - i1 := ib.NewInt32Array() - defer i1.Release() - - ib.AppendValues([]int32{4, 5, 6, 7, 8, 9, 10}, nil) - i2 := ib.NewInt32Array() - defer i2.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Int32, - []arrow.Array{i1, i2}, - ) - return c - }() - defer chunk.Release() - - return arrow.NewColumn(schema.Field(0), chunk) - }() - defer col1.Release() - - col2 := func() *arrow.Column { - chunk := func() *arrow.Chunked { - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - fb.AppendValues([]float64{6, 7}, nil) - f2 := fb.NewFloat64Array() - defer f2.Release() - - fb.AppendValues([]float64{8, 9, 10}, nil) - f3 := fb.NewFloat64Array() - defer f3.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1, f2, f3}, - ) - return c - }() - defer chunk.Release() - - return arrow.NewColumn(schema.Field(1), chunk) - }() - defer col2.Release() - - cols := []arrow.Column{*col1, *col2} - - slices := [][]arrow.Array{col1.Data().Chunks(), col2.Data().Chunks()} - - preTbl := array.NewTable(preSchema, []arrow.Column{*col1}, -1) - defer preTbl.Release() - tbl, err := preTbl.AddColumn( - 1, - arrow.Field{Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - *col2, - ) - defer tbl.Release() - if err != nil { - t.Fatalf("could not add column: %+v", err) - } - - tbl2 := array.NewTableFromSlice(schema, slices) - defer tbl2.Release() - - tbl.Retain() - tbl.Release() - - if got, want := tbl.Schema(), schema; !got.Equal(want) { - t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) - } - - if got, want := tbl.NumRows(), int64(10); got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - if got, want := tbl.NumCols(), int64(2); got != want { - t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) - } - if got, want := tbl.Column(0).Name(), col1.Name(); got != want { - t.Fatalf("invalid column: got=%q, want=%q", got, want) - } - - if got, want := tbl2.NumRows(), int64(10); got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - if got, want := tbl2.NumCols(), int64(2); got != want { - t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) - } - if got, want := tbl2.Column(0).Name(), col1.Name(); got != want { - t.Fatalf("invalid column: got=%q, want=%q", got, want) - } - - for _, tc := range []struct { - schema *arrow.Schema - cols []arrow.Column - rows int64 - err error - }{ - { - schema: schema, - cols: nil, - rows: -1, - err: fmt.Errorf("arrow/array: table schema mismatch"), - }, - { - schema: schema, - cols: cols[:1], - rows: 0, - err: fmt.Errorf("arrow/array: table schema mismatch"), - }, - { - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - }, - nil, - ), - cols: cols, - rows: 0, - err: fmt.Errorf("arrow/array: table schema mismatch"), - }, - { - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Int32}, - }, - nil, - ), - cols: cols, - rows: 0, - err: fmt.Errorf(`arrow/array: column field "f2-f64" is inconsistent with schema`), - }, - { - schema: arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f32", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ), - cols: cols, - rows: 0, - err: fmt.Errorf(`arrow/array: column field "f2-f64" is inconsistent with schema`), - }, - { - schema: schema, - cols: cols, - rows: 11, - err: fmt.Errorf(`arrow/array: column "f1-i32" expected length >= 11 but got length 10`), - }, - { - schema: schema, - cols: cols, - rows: 3, - err: nil, - }, - } { - t.Run("", func(t *testing.T) { - if tc.err != nil { - defer func() { - e := recover() - if e == nil { - t.Fatalf("expected an error %q", tc.err) - } - switch err := e.(type) { - case string: - if err != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - case error: - if err.Error() != tc.err.Error() { - t.Fatalf("invalid panic message. got=%q, want=%q", err, tc.err) - } - default: - t.Fatalf("invalid type for panic message: %T (err=%v)", err, err) - } - }() - } - tbl := array.NewTable(tc.schema, tc.cols, tc.rows) - defer tbl.Release() - if got, want := tbl.NumRows(), tc.rows; got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - }) - } -} - -func TestTableFromRecords(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ) - - b := array.NewRecordBuilder(mem, schema) - defer b.Release() - - b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil) - b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true}) - b.Field(1).(*array.Float64Builder).AppendValues([]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - - rec1 := b.NewRecord() - defer rec1.Release() - - b.Field(0).(*array.Int32Builder).AppendValues([]int32{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) - b.Field(1).(*array.Float64Builder).AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) - - rec2 := b.NewRecord() - defer rec2.Release() - - tbl := array.NewTableFromRecords(schema, []arrow.Record{rec1, rec2}) - defer tbl.Release() - - if got, want := tbl.Schema(), schema; !got.Equal(want) { - t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) - } - - if got, want := tbl.NumRows(), int64(20); got != want { - t.Fatalf("invalid number of rows: got=%d, want=%d", got, want) - } - if got, want := tbl.NumCols(), int64(2); got != want { - t.Fatalf("invalid number of columns: got=%d, want=%d", got, want) - } - if got, want := tbl.Column(0).Name(), schema.Field(0).Name; got != want { - t.Fatalf("invalid column: got=%q, want=%q", got, want) - } -} - -func TestTableReader(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ) - col1 := func() *arrow.Column { - chunk := func() *arrow.Chunked { - ib := array.NewInt32Builder(mem) - defer ib.Release() - - ib.AppendValues([]int32{1, 2, 3}, nil) - i1 := ib.NewInt32Array() - defer i1.Release() - - ib.AppendValues([]int32{4, 5, 6, 7, 8, 9, 10}, nil) - i2 := ib.NewInt32Array() - defer i2.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Int32, - []arrow.Array{i1, i2}, - ) - return c - }() - defer chunk.Release() - - return arrow.NewColumn(schema.Field(0), chunk) - }() - defer col1.Release() - - col2 := func() *arrow.Column { - chunk := func() *arrow.Chunked { - fb := array.NewFloat64Builder(mem) - defer fb.Release() - - fb.AppendValues([]float64{1, 2, 3, 4, 5}, nil) - f1 := fb.NewFloat64Array() - defer f1.Release() - - fb.AppendValues([]float64{6, 7}, nil) - f2 := fb.NewFloat64Array() - defer f2.Release() - - fb.AppendValues([]float64{8, 9, 10}, nil) - f3 := fb.NewFloat64Array() - defer f3.Release() - - c := arrow.NewChunked( - arrow.PrimitiveTypes.Float64, - []arrow.Array{f1, f2, f3}, - ) - return c - }() - defer chunk.Release() - - return arrow.NewColumn(schema.Field(1), chunk) - }() - defer col2.Release() - - cols := []arrow.Column{*col1, *col2} - tbl := array.NewTable(schema, cols, -1) - defer tbl.Release() - - tr := array.NewTableReader(tbl, 1) - defer tr.Release() - - tr.Retain() - tr.Release() - - for tr.Next() { - } - if err := tr.Err(); err != nil { - t.Fatalf("tr err: %#v", err) - } - - for _, tc := range []struct { - sz int64 - n int64 - rows []int64 - }{ - {sz: -1, n: 4, rows: []int64{3, 2, 2, 3}}, - {sz: +0, n: 4, rows: []int64{3, 2, 2, 3}}, - {sz: +1, n: 10, rows: []int64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, - {sz: +2, n: 6, rows: []int64{2, 1, 2, 2, 2, 1}}, - } { - t.Run(fmt.Sprintf("chunksz=%d", tc.sz), func(t *testing.T) { - tr := array.NewTableReader(tbl, tc.sz) - defer tr.Release() - - if got, want := tr.Schema(), tbl.Schema(); !got.Equal(want) { - t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) - } - - var ( - n int64 - sum int64 - ) - for tr.Next() { - rec := tr.Record() - if got, want := rec.Schema(), tbl.Schema(); !got.Equal(want) { - t.Fatalf("invalid schema: got=%#v, want=%#v", got, want) - } - if got, want := rec.NumRows(), tc.rows[n]; got != want { - t.Fatalf("invalid number of rows[%d]: got=%d, want=%d", n, got, want) - } - n++ - sum += rec.NumRows() - } - if err := tr.Err(); err != nil { - t.Fatalf("tr err: %#v", err) - } - - if got, want := n, tc.n; got != want { - t.Fatalf("invalid number of iterations: got=%d, want=%d", got, want) - } - if sum != tbl.NumRows() { - t.Fatalf("invalid number of rows iterated over: got=%d, want=%d", sum, tbl.NumRows()) - } - }) - } -} - -func TestTableToString(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema( - []arrow.Field{ - {Name: "f1-i32", Type: arrow.PrimitiveTypes.Int32}, - {Name: "f2-f64", Type: arrow.PrimitiveTypes.Float64}, - }, - nil, - ) - - b := array.NewRecordBuilder(mem, schema) - defer b.Release() - - b.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 3, 4, 5, 6}, nil) - b.Field(0).(*array.Int32Builder).AppendValues([]int32{7, 8, 9, 10}, []bool{true, true, false, true}) - b.Field(1).(*array.Float64Builder).AppendValues([]float64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, nil) - - rec1 := b.NewRecord() - defer rec1.Release() - - b.Field(0).(*array.Int32Builder).AppendValues([]int32{111, 112, 113, 114, 115, 116, 117, 118, 119, 120}, nil) - b.Field(1).(*array.Float64Builder).AppendValues([]float64{211, 212, 213, 214, 215, 216, 217, 218, 219, 220}, nil) - - rec2 := b.NewRecord() - defer rec2.Release() - - tbl := array.NewTableFromRecords(schema, []arrow.Record{rec1, rec2}) - defer tbl.Release() - - table_str := tbl.String() - expected_str := - `schema: - fields: 2 - - f1-i32: type=int32 - - f2-f64: type=float64 -f1-i32: [[1 2 3 4 5 6 7 8 (null) 10], [111 112 113 114 115 116 117 118 119 120]] -f2-f64: [[11 12 13 14 15 16 17 18 19 20], [211 212 213 214 215 216 217 218 219 220]] -` - if got, want := table_str, expected_str; table_str != expected_str { - t.Fatalf("invalid String: got=%#v, want=%#v", got, want) - } -} diff --git a/go/arrow/array/timestamp.go b/go/arrow/array/timestamp.go deleted file mode 100644 index 679d9a5a8a4cc..0000000000000 --- a/go/arrow/array/timestamp.go +++ /dev/null @@ -1,380 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "fmt" - "reflect" - "strings" - "sync/atomic" - "time" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" -) - -// Timestamp represents an immutable sequence of arrow.Timestamp values. -type Timestamp struct { - array - values []arrow.Timestamp -} - -// NewTimestampData creates a new Timestamp from Data. -func NewTimestampData(data arrow.ArrayData) *Timestamp { - a := &Timestamp{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// Reset resets the array for re-use. -func (a *Timestamp) Reset(data *Data) { - a.setData(data) -} - -// Value returns the value at the specified index. -func (a *Timestamp) Value(i int) arrow.Timestamp { return a.values[i] } - -// TimestampValues returns the values. -func (a *Timestamp) TimestampValues() []arrow.Timestamp { return a.values } - -// String returns a string representation of the array. -func (a *Timestamp) String() string { - o := new(strings.Builder) - o.WriteString("[") - for i, v := range a.values { - if i > 0 { - fmt.Fprintf(o, " ") - } - switch { - case a.IsNull(i): - o.WriteString(NullValueStr) - default: - fmt.Fprintf(o, "%v", v) - } - } - o.WriteString("]") - return o.String() -} - -func (a *Timestamp) setData(data *Data) { - a.array.setData(data) - vals := data.buffers[1] - if vals != nil { - a.values = arrow.TimestampTraits.CastFromBytes(vals.Bytes()) - beg := a.array.data.offset - end := beg + a.array.data.length - a.values = a.values[beg:end] - } -} - -func (a *Timestamp) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - - toTime, _ := a.DataType().(*arrow.TimestampType).GetToTimeFunc() - return toTime(a.values[i]).Format("2006-01-02 15:04:05.999999999Z0700") -} - -func (a *Timestamp) GetOneForMarshal(i int) interface{} { - if val := a.ValueStr(i); val != NullValueStr { - return val - } - return nil -} - -func (a *Timestamp) MarshalJSON() ([]byte, error) { - vals := make([]interface{}, a.Len()) - for i := range a.values { - vals[i] = a.GetOneForMarshal(i) - } - - return json.Marshal(vals) -} - -func arrayEqualTimestamp(left, right *Timestamp) bool { - for i := 0; i < left.Len(); i++ { - if left.IsNull(i) { - continue - } - if left.Value(i) != right.Value(i) { - return false - } - } - return true -} - -type TimestampBuilder struct { - builder - - dtype *arrow.TimestampType - data *memory.Buffer - rawData []arrow.Timestamp -} - -func NewTimestampBuilder(mem memory.Allocator, dtype *arrow.TimestampType) *TimestampBuilder { - return &TimestampBuilder{builder: builder{refCount: 1, mem: mem}, dtype: dtype} -} - -func (b *TimestampBuilder) Type() arrow.DataType { return b.dtype } - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -func (b *TimestampBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - if b.nullBitmap != nil { - b.nullBitmap.Release() - b.nullBitmap = nil - } - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - } -} - -func (b *TimestampBuilder) AppendTime(t time.Time) { - ts, err := arrow.TimestampFromTime(t, b.dtype.Unit) - if err != nil { - panic(err) - } - b.Append(ts) -} - -func (b *TimestampBuilder) Append(v arrow.Timestamp) { - b.Reserve(1) - b.UnsafeAppend(v) -} - -func (b *TimestampBuilder) AppendNull() { - b.Reserve(1) - b.UnsafeAppendBoolToBitmap(false) -} - -func (b *TimestampBuilder) AppendNulls(n int) { - for i := 0; i < n; i++ { - b.AppendNull() - } -} - -func (b *TimestampBuilder) AppendEmptyValue() { - b.Append(0) -} - -func (b *TimestampBuilder) AppendEmptyValues(n int) { - for i := 0; i < n; i++ { - b.AppendEmptyValue() - } -} - -func (b *TimestampBuilder) UnsafeAppend(v arrow.Timestamp) { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - b.rawData[b.length] = v - b.length++ -} - -func (b *TimestampBuilder) UnsafeAppendBoolToBitmap(isValid bool) { - if isValid { - bitutil.SetBit(b.nullBitmap.Bytes(), b.length) - } else { - b.nulls++ - } - b.length++ -} - -// AppendValues will append the values in the v slice. The valid slice determines which values -// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty, -// all values in v are appended and considered valid. -func (b *TimestampBuilder) AppendValues(v []arrow.Timestamp, valid []bool) { - if len(v) != len(valid) && len(valid) != 0 { - panic("len(v) != len(valid) && len(valid) != 0") - } - - if len(v) == 0 { - return - } - - b.Reserve(len(v)) - arrow.TimestampTraits.Copy(b.rawData[b.length:], v) - b.builder.unsafeAppendBoolsToBitmap(valid, len(v)) -} - -func (b *TimestampBuilder) init(capacity int) { - b.builder.init(capacity) - - b.data = memory.NewResizableBuffer(b.mem) - bytesN := arrow.TimestampTraits.BytesRequired(capacity) - b.data.Resize(bytesN) - b.rawData = arrow.TimestampTraits.CastFromBytes(b.data.Bytes()) -} - -// Reserve ensures there is enough space for appending n elements -// by checking the capacity and calling Resize if necessary. -func (b *TimestampBuilder) Reserve(n int) { - b.builder.reserve(n, b.Resize) -} - -// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(), -// additional memory will be allocated. If n is smaller, the allocated memory may reduced. -func (b *TimestampBuilder) Resize(n int) { - nBuilder := n - if n < minBuilderCapacity { - n = minBuilderCapacity - } - - if b.capacity == 0 { - b.init(n) - } else { - b.builder.resize(nBuilder, b.init) - b.data.Resize(arrow.TimestampTraits.BytesRequired(n)) - b.rawData = arrow.TimestampTraits.CastFromBytes(b.data.Bytes()) - } -} - -// NewArray creates a Timestamp array from the memory buffers used by the builder and resets the TimestampBuilder -// so it can be used to build a new array. -func (b *TimestampBuilder) NewArray() arrow.Array { - return b.NewTimestampArray() -} - -// NewTimestampArray creates a Timestamp array from the memory buffers used by the builder and resets the TimestampBuilder -// so it can be used to build a new array. -func (b *TimestampBuilder) NewTimestampArray() (a *Timestamp) { - data := b.newData() - a = NewTimestampData(data) - data.Release() - return -} - -func (b *TimestampBuilder) newData() (data *Data) { - bytesRequired := arrow.TimestampTraits.BytesRequired(b.length) - if bytesRequired > 0 && bytesRequired < b.data.Len() { - // trim buffers - b.data.Resize(bytesRequired) - } - data = NewData(b.dtype, b.length, []*memory.Buffer{b.nullBitmap, b.data}, nil, b.nulls, 0) - b.reset() - - if b.data != nil { - b.data.Release() - b.data = nil - b.rawData = nil - } - - return -} - -func (b *TimestampBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - - loc, err := b.dtype.GetZone() - if err != nil { - return err - } - - v, _, err := arrow.TimestampFromStringInLocation(s, b.dtype.Unit, loc) - if err != nil { - b.AppendNull() - return err - } - b.Append(v) - return nil -} - -func (b *TimestampBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch v := t.(type) { - case nil: - b.AppendNull() - case string: - loc, _ := b.dtype.GetZone() - tm, _, err := arrow.TimestampFromStringInLocation(v, b.dtype.Unit, loc) - if err != nil { - return &json.UnmarshalTypeError{ - Value: v, - Type: reflect.TypeOf(arrow.Timestamp(0)), - Offset: dec.InputOffset(), - } - } - - b.Append(tm) - case json.Number: - n, err := v.Int64() - if err != nil { - return &json.UnmarshalTypeError{ - Value: v.String(), - Type: reflect.TypeOf(arrow.Timestamp(0)), - Offset: dec.InputOffset(), - } - } - b.Append(arrow.Timestamp(n)) - case float64: - b.Append(arrow.Timestamp(v)) - - default: - return &json.UnmarshalTypeError{ - Value: fmt.Sprint(t), - Type: reflect.TypeOf(arrow.Timestamp(0)), - Offset: dec.InputOffset(), - } - } - - return nil -} - -func (b *TimestampBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *TimestampBuilder) UnmarshalJSON(data []byte) error { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("binary builder must unpack from json array, found %s", delim) - } - - return b.Unmarshal(dec) -} - -var ( - _ arrow.Array = (*Timestamp)(nil) - _ Builder = (*TimestampBuilder)(nil) -) diff --git a/go/arrow/array/timestamp_test.go b/go/arrow/array/timestamp_test.go deleted file mode 100644 index cb9f957d3f255..0000000000000 --- a/go/arrow/array/timestamp_test.go +++ /dev/null @@ -1,300 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "testing" - "time" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestTimestampStringRoundTrip(t *testing.T) { - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.TimestampType{Unit: arrow.Second} - b := array.NewTimestampBuilder(mem, dt) - defer b.Release() - - b.Append(1) - b.Append(2) - b.Append(3) - b.AppendNull() - b.Append(5) - b.Append(6) - b.AppendNull() - b.Append(8) - b.Append(9) - b.Append(10) - - arr := b.NewArray().(*array.Timestamp) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewTimestampBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.Timestamp) - defer arr1.Release() - - assert.True(t, array.Equal(arr, arr1)) -} - -func TestNewTimestampBuilder(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - timestamp := time.Now() - dtype := &arrow.TimestampType{Unit: arrow.Second} - ab := array.NewTimestampBuilder(mem, dtype) - defer ab.Release() - - ab.Retain() - ab.Release() - - ab.Append(1) - ab.Append(2) - ab.Append(3) - ab.AppendNull() - ab.Append(5) - ab.Append(6) - ab.AppendNull() - ab.Append(8) - ab.Append(9) - ab.Append(10) - ab.AppendTime(timestamp) - - // check state of builder before NewTimestampArray - assert.Equal(t, 11, ab.Len(), "unexpected Len()") - assert.Equal(t, 2, ab.NullN(), "unexpected NullN()") - - a := ab.NewTimestampArray() - - // check state of builder after NewTimestampArray - assert.Zero(t, ab.Len(), "unexpected ArrayBuilder.Len(), NewTimestampArray did not reset state") - assert.Zero(t, ab.Cap(), "unexpected ArrayBuilder.Cap(), NewTimestampArray did not reset state") - assert.Zero(t, ab.NullN(), "unexpected ArrayBuilder.NullN(), NewTimestampArray did not reset state") - - // check state of array - assert.Equal(t, 2, a.NullN(), "unexpected null count") - assert.Equal(t, []arrow.Timestamp{1, 2, 3, 0, 5, 6, 0, 8, 9, 10, arrow.Timestamp(timestamp.Unix())}, a.TimestampValues(), "unexpected TimestampValues") - assert.Equal(t, []byte{0xb7}, a.NullBitmapBytes()[:1]) // 4 bytes due to minBuilderCapacity - assert.Len(t, a.TimestampValues(), 11, "unexpected length of TimestampValues") - - a.Release() - - ab.Append(7) - ab.Append(8) - - a = ab.NewTimestampArray() - - assert.Equal(t, 0, a.NullN()) - assert.Equal(t, []arrow.Timestamp{7, 8}, a.TimestampValues()) - assert.Len(t, a.TimestampValues(), 2) - - a.Release() - - var ( - want = []arrow.Timestamp{1, 2, 3, 4} - valids = []bool{true, true, false, true} - ) - - ab.AppendValues(want, valids) - a = ab.NewTimestampArray() - - sub := array.MakeFromData(a.Data()) - defer sub.Release() - - if got, want := sub.DataType().ID(), a.DataType().ID(); got != want { - t.Fatalf("invalid type: got=%q, want=%q", got, want) - } - - if _, ok := sub.(*array.Timestamp); !ok { - t.Fatalf("could not type-assert to array.Timestamp") - } - - if got, want := a.String(), `[1 2 (null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - slice := array.NewSliceData(a.Data(), 2, 4) - defer slice.Release() - - sub1 := array.MakeFromData(slice) - defer sub1.Release() - - v, ok := sub1.(*array.Timestamp) - if !ok { - t.Fatalf("could not type-assert to array.Timestamp") - } - - if got, want := v.String(), `[(null) 4]`; got != want { - t.Fatalf("got=%q, want=%q", got, want) - } - - a.Release() -} - -func TestTimestampBuilder_AppendValues(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.TimestampType{Unit: arrow.Second} - ab := array.NewTimestampBuilder(mem, dtype) - defer ab.Release() - - exp := []arrow.Timestamp{0, 1, 2, 3} - ab.AppendValues(exp, nil) - a := ab.NewTimestampArray() - assert.Equal(t, exp, a.TimestampValues()) - - a.Release() -} - -func TestTimestampBuilder_Empty(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.TimestampType{Unit: arrow.Second} - ab := array.NewTimestampBuilder(mem, dtype) - defer ab.Release() - - exp := []arrow.Timestamp{0, 1, 2, 3} - - ab.AppendValues([]arrow.Timestamp{}, nil) - a := ab.NewTimestampArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues(nil, nil) - a = ab.NewTimestampArray() - assert.Zero(t, a.Len()) - a.Release() - - ab.AppendValues([]arrow.Timestamp{}, nil) - ab.AppendValues(exp, nil) - a = ab.NewTimestampArray() - assert.Equal(t, exp, a.TimestampValues()) - a.Release() - - ab.AppendValues(exp, nil) - ab.AppendValues([]arrow.Timestamp{}, nil) - a = ab.NewTimestampArray() - assert.Equal(t, exp, a.TimestampValues()) - a.Release() -} - -func TestTimestampBuilder_Resize(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dtype := &arrow.TimestampType{Unit: arrow.Second} - ab := array.NewTimestampBuilder(mem, dtype) - defer ab.Release() - - assert.Equal(t, 0, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - ab.Reserve(63) - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 0, ab.Len()) - - for i := 0; i < 63; i++ { - ab.Append(0) - } - assert.Equal(t, 64, ab.Cap()) - assert.Equal(t, 63, ab.Len()) - - ab.Resize(5) - assert.Equal(t, 5, ab.Len()) - - ab.Resize(32) - assert.Equal(t, 5, ab.Len()) -} - -func TestTimestampValueStr(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - dt := &arrow.TimestampType{Unit: arrow.Second, TimeZone: "America/Phoenix"} - b := array.NewTimestampBuilder(mem, dt) - defer b.Release() - - b.Append(-34226955) - b.Append(1456767743) - - arr := b.NewArray() - defer arr.Release() - - assert.Equal(t, "1968-11-30 13:30:45-0700", arr.ValueStr(0)) - assert.Equal(t, "2016-02-29 10:42:23-0700", arr.ValueStr(1)) -} - -func TestTimestampEquality(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - tsDatatypes := []*arrow.TimestampType{ - {Unit: arrow.Second}, - {Unit: arrow.Second, TimeZone: "UTC"}, - {Unit: arrow.Second, TimeZone: "America/Phoenix"}, - } - - arrs := make([]*array.Timestamp, 0, len(tsDatatypes)) - for _, dt := range tsDatatypes { - bldr := array.NewTimestampBuilder(mem, dt) - defer bldr.Release() - - bldr.Append(-34226955) - bldr.Append(1456767743) - - arr := bldr.NewTimestampArray() - defer arr.Release() - - arrs = append(arrs, arr) - } - - // No timezone, "wall clock" semantics - // These timestamps have no actual timezone, but we still represent as UTC per Go conventions - assert.Equal(t, "1968-11-30 20:30:45Z", arrs[0].ValueStr(0)) - assert.Equal(t, "2016-02-29 17:42:23Z", arrs[0].ValueStr(1)) - - // UTC timezone, "instant" semantics - assert.Equal(t, "1968-11-30 20:30:45Z", arrs[1].ValueStr(0)) - assert.Equal(t, "2016-02-29 17:42:23Z", arrs[1].ValueStr(1)) - - // America/Phoenix timezone, "instant" semantics - assert.Equal(t, "1968-11-30 13:30:45-0700", arrs[2].ValueStr(0)) - assert.Equal(t, "2016-02-29 10:42:23-0700", arrs[2].ValueStr(1)) - - // Despite timezone and semantics, the physical values are equivalent - assert.Equal(t, arrs[0].Value(0), arrs[1].Value(0)) - assert.Equal(t, arrs[0].Value(0), arrs[2].Value(0)) - assert.Equal(t, arrs[1].Value(0), arrs[2].Value(0)) - - assert.Equal(t, arrs[0].Value(1), arrs[1].Value(1)) - assert.Equal(t, arrs[0].Value(1), arrs[2].Value(1)) - assert.Equal(t, arrs[1].Value(1), arrs[2].Value(1)) -} diff --git a/go/arrow/array/union.go b/go/arrow/array/union.go deleted file mode 100644 index 5d2a8b8ecb2f0..0000000000000 --- a/go/arrow/array/union.go +++ /dev/null @@ -1,1370 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "bytes" - "errors" - "fmt" - "math" - "reflect" - "strings" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/bitutils" - "github.com/apache/arrow/go/v18/internal/json" -) - -// Union is a convenience interface to encompass both Sparse and Dense -// union array types. -type Union interface { - arrow.Array - // NumFields returns the number of child fields in this union. - // Equivalent to len(UnionType().Fields()) - NumFields() int - // Validate returns an error if there are any issues with the lengths - // or types of the children arrays mismatching with the Type of the - // Union Array. nil is returned if there are no problems. - Validate() error - // ValidateFull runs the same checks that Validate() does, but additionally - // checks that all childIDs are valid (>= 0 || ==InvalidID) and for - // dense unions validates that all offsets are within the bounds of their - // respective child. - ValidateFull() error - // TypeCodes returns the type id buffer for the union Array, equivalent to - // Data().Buffers()[1]. Note: This will not account for any slice offset. - TypeCodes() *memory.Buffer - // RawTypeCodes returns a slice of UnionTypeCodes properly accounting for - // any slice offset. - RawTypeCodes() []arrow.UnionTypeCode - // TypeCode returns the logical type code of the value at the requested index - TypeCode(i int) arrow.UnionTypeCode - // ChildID returns the index of the physical child containing the value - // at the requested index. Equivalent to: - // - // arr.UnionType().ChildIDs()[arr.RawTypeCodes()[i+arr.Data().Offset()]] - ChildID(i int) int - // UnionType is a convenience function to retrieve the properly typed UnionType - // instead of having to call DataType() and manually assert the type. - UnionType() arrow.UnionType - // Mode returns the union mode of the underlying Array, either arrow.SparseMode - // or arrow.DenseMode. - Mode() arrow.UnionMode - // Field returns the requested child array for this union. Returns nil if a - // nonexistent position is passed in. - // - // The appropriate child for an index can be retrieved with Field(ChildID(index)) - Field(pos int) arrow.Array -} - -const kMaxElems = math.MaxInt32 - -type union struct { - array - - unionType arrow.UnionType - typecodes []arrow.UnionTypeCode - - children []arrow.Array -} - -func (a *union) Retain() { - a.array.Retain() - for _, c := range a.children { - c.Retain() - } -} - -func (a *union) Release() { - a.array.Release() - for _, c := range a.children { - c.Release() - } -} - -func (a *union) NumFields() int { return len(a.unionType.Fields()) } - -func (a *union) Mode() arrow.UnionMode { return a.unionType.Mode() } - -func (a *union) UnionType() arrow.UnionType { return a.unionType } - -func (a *union) TypeCodes() *memory.Buffer { - return a.data.buffers[1] -} - -func (a *union) RawTypeCodes() []arrow.UnionTypeCode { - if a.data.length > 0 { - return a.typecodes[a.data.offset:] - } - return []arrow.UnionTypeCode{} -} - -func (a *union) TypeCode(i int) arrow.UnionTypeCode { - return a.typecodes[i+a.data.offset] -} - -func (a *union) ChildID(i int) int { - return a.unionType.ChildIDs()[a.typecodes[i+a.data.offset]] -} - -func (a *union) setData(data *Data) { - a.unionType = data.dtype.(arrow.UnionType) - debug.Assert(len(data.buffers) >= 2, "arrow/array: invalid number of union array buffers") - - if data.length > 0 { - a.typecodes = arrow.Int8Traits.CastFromBytes(data.buffers[1].Bytes()) - } else { - a.typecodes = []int8{} - } - a.children = make([]arrow.Array, len(data.childData)) - for i, child := range data.childData { - if a.unionType.Mode() == arrow.SparseMode && (data.offset != 0 || child.Len() != data.length) { - child = NewSliceData(child, int64(data.offset), int64(data.offset+data.length)) - defer child.Release() - } - a.children[i] = MakeFromData(child) - } - a.array.setData(data) -} - -func (a *union) Field(pos int) (result arrow.Array) { - if pos < 0 || pos >= len(a.children) { - return nil - } - - return a.children[pos] -} - -func (a *union) Validate() error { - fields := a.unionType.Fields() - for i, f := range fields { - fieldData := a.data.childData[i] - if a.unionType.Mode() == arrow.SparseMode && fieldData.Len() < a.data.length+a.data.offset { - return fmt.Errorf("arrow/array: sparse union child array #%d has length smaller than expected for union array (%d < %d)", - i, fieldData.Len(), a.data.length+a.data.offset) - } - - if !arrow.TypeEqual(f.Type, fieldData.DataType()) { - return fmt.Errorf("arrow/array: union child array #%d does not match type field %s vs %s", - i, fieldData.DataType(), f.Type) - } - } - return nil -} - -func (a *union) ValidateFull() error { - if err := a.Validate(); err != nil { - return err - } - - childIDs := a.unionType.ChildIDs() - codesMap := a.unionType.TypeCodes() - codes := a.RawTypeCodes() - - for i := 0; i < a.data.length; i++ { - code := codes[i] - if code < 0 || childIDs[code] == arrow.InvalidUnionChildID { - return fmt.Errorf("arrow/array: union value at position %d has invalid type id %d", i, code) - } - } - - if a.unionType.Mode() == arrow.DenseMode { - // validate offsets - - // map logical typeid to child length - var childLengths [256]int64 - for i := range a.unionType.Fields() { - childLengths[codesMap[i]] = int64(a.data.childData[i].Len()) - } - - // check offsets are in bounds - var lastOffsets [256]int64 - offsets := arrow.Int32Traits.CastFromBytes(a.data.buffers[2].Bytes())[a.data.offset:] - for i := int64(0); i < int64(a.data.length); i++ { - code := codes[i] - offset := offsets[i] - switch { - case offset < 0: - return fmt.Errorf("arrow/array: union value at position %d has negative offset %d", i, offset) - case offset >= int32(childLengths[code]): - return fmt.Errorf("arrow/array: union value at position %d has offset larger than child length (%d >= %d)", - i, offset, childLengths[code]) - case offset < int32(lastOffsets[code]): - return fmt.Errorf("arrow/array: union value at position %d has non-monotonic offset %d", i, offset) - } - lastOffsets[code] = int64(offset) - } - } - - return nil -} - -// SparseUnion represents an array where each logical value is taken from -// a single child. A buffer of 8-bit type ids indicates which child a given -// logical value is to be taken from. This is represented as the ChildID, -// which is the index into the list of children. -// -// In a sparse union, each child array will have the same length as the -// union array itself, regardless of how many values in the union actually -// refer to it. -// -// Unlike most other arrays, unions do not have a top-level validity bitmap. -type SparseUnion struct { - union -} - -// NewSparseUnion constructs a union array using the given type, length, list of -// children and buffer of typeIDs with the given offset. -func NewSparseUnion(dt *arrow.SparseUnionType, length int, children []arrow.Array, typeIDs *memory.Buffer, offset int) *SparseUnion { - childData := make([]arrow.ArrayData, len(children)) - for i, c := range children { - childData[i] = c.Data() - } - data := NewData(dt, length, []*memory.Buffer{nil, typeIDs}, childData, 0, offset) - defer data.Release() - return NewSparseUnionData(data) -} - -// NewSparseUnionData constructs a SparseUnion array from the given ArrayData object. -func NewSparseUnionData(data arrow.ArrayData) *SparseUnion { - a := &SparseUnion{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// NewSparseUnionFromArrays constructs a new SparseUnion array with the provided -// values. -// -// typeIDs *must* be an INT8 array with no nulls -// len(codes) *must* be either 0 or equal to len(children). If len(codes) is 0, -// the type codes used will be sequentially numeric starting at 0. -func NewSparseUnionFromArrays(typeIDs arrow.Array, children []arrow.Array, codes ...arrow.UnionTypeCode) (*SparseUnion, error) { - return NewSparseUnionFromArraysWithFieldCodes(typeIDs, children, []string{}, codes) -} - -// NewSparseUnionFromArrayWithFields constructs a new SparseUnion array like -// NewSparseUnionFromArrays, but allows specifying the field names. Type codes -// will be auto-generated sequentially starting at 0. -// -// typeIDs *must* be an INT8 array with no nulls. -// len(fields) *must* either be 0 or equal to len(children). If len(fields) is 0, -// then the fields will be named sequentially starting at "0". -func NewSparseUnionFromArraysWithFields(typeIDs arrow.Array, children []arrow.Array, fields []string) (*SparseUnion, error) { - return NewSparseUnionFromArraysWithFieldCodes(typeIDs, children, fields, []arrow.UnionTypeCode{}) -} - -// NewSparseUnionFromArraysWithFieldCodes combines the other constructors -// for constructing a new SparseUnion array with the provided field names -// and type codes, along with children and type ids. -// -// All the requirements mentioned in NewSparseUnionFromArrays and -// NewSparseUnionFromArraysWithFields apply. -func NewSparseUnionFromArraysWithFieldCodes(typeIDs arrow.Array, children []arrow.Array, fields []string, codes []arrow.UnionTypeCode) (*SparseUnion, error) { - switch { - case typeIDs.DataType().ID() != arrow.INT8: - return nil, errors.New("arrow/array: union array type ids must be signed int8") - case typeIDs.NullN() != 0: - return nil, errors.New("arrow/array: union type ids may not have nulls") - case len(fields) > 0 && len(fields) != len(children): - return nil, errors.New("arrow/array: field names must have the same length as children") - case len(codes) > 0 && len(codes) != len(children): - return nil, errors.New("arrow/array: type codes must have same length as children") - } - - buffers := []*memory.Buffer{nil, typeIDs.Data().Buffers()[1]} - ty := arrow.SparseUnionFromArrays(children, fields, codes) - - childData := make([]arrow.ArrayData, len(children)) - for i, c := range children { - childData[i] = c.Data() - if c.Len() != typeIDs.Len() { - return nil, errors.New("arrow/array: sparse union array must have len(child) == len(typeids) for all children") - } - } - - data := NewData(ty, typeIDs.Len(), buffers, childData, 0, typeIDs.Data().Offset()) - defer data.Release() - return NewSparseUnionData(data), nil -} - -func (a *SparseUnion) setData(data *Data) { - a.union.setData(data) - debug.Assert(a.data.dtype.ID() == arrow.SPARSE_UNION, "arrow/array: invalid data type for SparseUnion") - debug.Assert(len(a.data.buffers) == 2, "arrow/array: sparse unions should have exactly 2 buffers") - debug.Assert(a.data.buffers[0] == nil, "arrow/array: validity bitmap for sparse unions should be nil") -} - -func (a *SparseUnion) GetOneForMarshal(i int) interface{} { - typeID := a.RawTypeCodes()[i] - - childID := a.ChildID(i) - data := a.Field(childID) - - if data.IsNull(i) { - return nil - } - - return []interface{}{typeID, data.GetOneForMarshal(i)} -} - -func (a *SparseUnion) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func (a *SparseUnion) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - - val := a.GetOneForMarshal(i) - if val == nil { - // child is nil - return NullValueStr - } - - data, err := json.Marshal(val) - if err != nil { - panic(err) - } - return string(data) -} - -func (a *SparseUnion) String() string { - var b strings.Builder - b.WriteByte('[') - - fieldList := a.unionType.Fields() - for i := 0; i < a.Len(); i++ { - if i > 0 { - b.WriteString(" ") - } - - field := fieldList[a.ChildID(i)] - f := a.Field(a.ChildID(i)) - fmt.Fprintf(&b, "{%s=%v}", field.Name, f.GetOneForMarshal(i)) - } - b.WriteByte(']') - return b.String() -} - -// GetFlattenedField returns a child array, adjusting its validity bitmap -// where the union array type codes don't match. -// -// ie: the returned array will have a null in every index that it is -// not referenced by union. -func (a *SparseUnion) GetFlattenedField(mem memory.Allocator, index int) (arrow.Array, error) { - if index < 0 || index >= a.NumFields() { - return nil, fmt.Errorf("arrow/array: index out of range: %d", index) - } - - childData := a.data.childData[index] - if a.data.offset != 0 || a.data.length != childData.Len() { - childData = NewSliceData(childData, int64(a.data.offset), int64(a.data.offset+a.data.length)) - // NewSliceData doesn't break the slice reference for buffers - // since we're going to replace the null bitmap buffer we need to break the - // slice reference so that we don't affect a.children's references - newBufs := make([]*memory.Buffer, len(childData.Buffers())) - copy(newBufs, childData.(*Data).buffers) - childData.(*Data).buffers = newBufs - } else { - childData = childData.(*Data).Copy() - } - defer childData.Release() - - // synthesize a null bitmap based on the union discriminant - // make sure the bitmap has extra bits corresponding to the child's offset - flattenedNullBitmap := memory.NewResizableBuffer(mem) - flattenedNullBitmap.Resize(childData.Len() + childData.Offset()) - - var ( - childNullBitmap = childData.Buffers()[0] - childOffset = childData.Offset() - typeCode = a.unionType.TypeCodes()[index] - codes = a.RawTypeCodes() - offset int64 = 0 - ) - bitutils.GenerateBitsUnrolled(flattenedNullBitmap.Bytes(), int64(childOffset), int64(a.data.length), - func() bool { - b := codes[offset] == typeCode - offset++ - return b - }) - - if childNullBitmap != nil { - defer childNullBitmap.Release() - bitutil.BitmapAnd(flattenedNullBitmap.Bytes(), childNullBitmap.Bytes(), - int64(childOffset), int64(childOffset), flattenedNullBitmap.Bytes(), - int64(childOffset), int64(childData.Len())) - } - childData.(*Data).buffers[0] = flattenedNullBitmap - childData.(*Data).nulls = childData.Len() - bitutil.CountSetBits(flattenedNullBitmap.Bytes(), childOffset, childData.Len()) - return MakeFromData(childData), nil -} - -func arraySparseUnionEqual(l, r *SparseUnion) bool { - childIDs := l.unionType.ChildIDs() - leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() - - for i := 0; i < l.data.length; i++ { - typeID := leftCodes[i] - if typeID != rightCodes[i] { - return false - } - - childNum := childIDs[typeID] - eq := SliceEqual(l.children[childNum], int64(i), int64(i+1), - r.children[childNum], int64(i), int64(i+1)) - if !eq { - return false - } - } - return true -} - -func arraySparseUnionApproxEqual(l, r *SparseUnion, opt equalOption) bool { - childIDs := l.unionType.ChildIDs() - leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() - - for i := 0; i < l.data.length; i++ { - typeID := leftCodes[i] - if typeID != rightCodes[i] { - return false - } - - childNum := childIDs[typeID] - eq := sliceApproxEqual(l.children[childNum], int64(i+l.data.offset), int64(i+l.data.offset+1), - r.children[childNum], int64(i+r.data.offset), int64(i+r.data.offset+1), opt) - if !eq { - return false - } - } - return true -} - -// DenseUnion represents an array where each logical value is taken from -// a single child, at a specific offset. A buffer of 8-bit type ids -// indicates which child a given logical value is to be taken from and -// a buffer of 32-bit offsets indicating which physical position in the -// given child array has the logical value for that index. -// -// Unlike a sparse union, a dense union allows encoding only the child values -// which are actually referred to by the union array. This is counterbalanced -// by the additional footprint of the offsets buffer, and the additional -// indirection cost when looking up values. -// -// Unlike most other arrays, unions do not have a top-level validity bitmap. -type DenseUnion struct { - union - offsets []int32 -} - -// NewDenseUnion constructs a union array using the given type, length, list of -// children and buffers of typeIDs and offsets, with the given array offset. -func NewDenseUnion(dt *arrow.DenseUnionType, length int, children []arrow.Array, typeIDs, valueOffsets *memory.Buffer, offset int) *DenseUnion { - childData := make([]arrow.ArrayData, len(children)) - for i, c := range children { - childData[i] = c.Data() - } - - data := NewData(dt, length, []*memory.Buffer{nil, typeIDs, valueOffsets}, childData, 0, offset) - defer data.Release() - return NewDenseUnionData(data) -} - -// NewDenseUnionData constructs a DenseUnion array from the given ArrayData object. -func NewDenseUnionData(data arrow.ArrayData) *DenseUnion { - a := &DenseUnion{} - a.refCount = 1 - a.setData(data.(*Data)) - return a -} - -// NewDenseUnionFromArrays constructs a new DenseUnion array with the provided -// values. -// -// typeIDs *must* be an INT8 array with no nulls -// offsets *must* be an INT32 array with no nulls -// len(codes) *must* be either 0 or equal to len(children). If len(codes) is 0, -// the type codes used will be sequentially numeric starting at 0. -func NewDenseUnionFromArrays(typeIDs, offsets arrow.Array, children []arrow.Array, codes ...arrow.UnionTypeCode) (*DenseUnion, error) { - return NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets, children, []string{}, codes) -} - -// NewDenseUnionFromArrayWithFields constructs a new DenseUnion array like -// NewDenseUnionFromArrays, but allows specifying the field names. Type codes -// will be auto-generated sequentially starting at 0. -// -// typeIDs *must* be an INT8 array with no nulls. -// offsets *must* be an INT32 array with no nulls. -// len(fields) *must* either be 0 or equal to len(children). If len(fields) is 0, -// then the fields will be named sequentially starting at "0". -func NewDenseUnionFromArraysWithFields(typeIDs, offsets arrow.Array, children []arrow.Array, fields []string) (*DenseUnion, error) { - return NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets, children, fields, []arrow.UnionTypeCode{}) -} - -// NewDenseUnionFromArraysWithFieldCodes combines the other constructors -// for constructing a new DenseUnion array with the provided field names -// and type codes, along with children and type ids. -// -// All the requirements mentioned in NewDenseUnionFromArrays and -// NewDenseUnionFromArraysWithFields apply. -func NewDenseUnionFromArraysWithFieldCodes(typeIDs, offsets arrow.Array, children []arrow.Array, fields []string, codes []arrow.UnionTypeCode) (*DenseUnion, error) { - switch { - case offsets.DataType().ID() != arrow.INT32: - return nil, errors.New("arrow/array: union offsets must be signed int32") - case typeIDs.DataType().ID() != arrow.INT8: - return nil, errors.New("arrow/array: union type_ids must be signed int8") - case typeIDs.NullN() != 0: - return nil, errors.New("arrow/array: union typeIDs may not have nulls") - case offsets.NullN() != 0: - return nil, errors.New("arrow/array: nulls are not allowed in offsets for NewDenseUnionFromArrays*") - case len(fields) > 0 && len(fields) != len(children): - return nil, errors.New("arrow/array: fields must be the same length as children") - case len(codes) > 0 && len(codes) != len(children): - return nil, errors.New("arrow/array: typecodes must have the same length as children") - } - - ty := arrow.DenseUnionFromArrays(children, fields, codes) - buffers := []*memory.Buffer{nil, typeIDs.Data().Buffers()[1], offsets.Data().Buffers()[1]} - - childData := make([]arrow.ArrayData, len(children)) - for i, c := range children { - childData[i] = c.Data() - } - - data := NewData(ty, typeIDs.Len(), buffers, childData, 0, typeIDs.Data().Offset()) - defer data.Release() - return NewDenseUnionData(data), nil -} - -func (a *DenseUnion) ValueOffsets() *memory.Buffer { return a.data.buffers[2] } - -func (a *DenseUnion) ValueOffset(i int) int32 { return a.offsets[i+a.data.offset] } - -func (a *DenseUnion) RawValueOffsets() []int32 { return a.offsets[a.data.offset:] } - -func (a *DenseUnion) setData(data *Data) { - a.union.setData(data) - debug.Assert(a.data.dtype.ID() == arrow.DENSE_UNION, "arrow/array: invalid data type for DenseUnion") - debug.Assert(len(a.data.buffers) == 3, "arrow/array: dense unions should have exactly 3 buffers") - debug.Assert(a.data.buffers[0] == nil, "arrow/array: validity bitmap for dense unions should be nil") - - if data.length > 0 { - a.offsets = arrow.Int32Traits.CastFromBytes(a.data.buffers[2].Bytes()) - } else { - a.offsets = []int32{} - } -} - -func (a *DenseUnion) GetOneForMarshal(i int) interface{} { - typeID := a.RawTypeCodes()[i] - - childID := a.ChildID(i) - data := a.Field(childID) - - offset := int(a.RawValueOffsets()[i]) - if data.IsNull(offset) { - return nil - } - - return []interface{}{typeID, data.GetOneForMarshal(offset)} -} - -func (a *DenseUnion) MarshalJSON() ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - - buf.WriteByte('[') - for i := 0; i < a.Len(); i++ { - if i != 0 { - buf.WriteByte(',') - } - if err := enc.Encode(a.GetOneForMarshal(i)); err != nil { - return nil, err - } - } - buf.WriteByte(']') - return buf.Bytes(), nil -} - -func (a *DenseUnion) ValueStr(i int) string { - if a.IsNull(i) { - return NullValueStr - } - - val := a.GetOneForMarshal(i) - if val == nil { - // child in nil - return NullValueStr - } - - data, err := json.Marshal(val) - if err != nil { - panic(err) - } - return string(data) -} - -func (a *DenseUnion) String() string { - var b strings.Builder - b.WriteByte('[') - - offsets := a.RawValueOffsets() - - fieldList := a.unionType.Fields() - for i := 0; i < a.Len(); i++ { - if i > 0 { - b.WriteString(" ") - } - - field := fieldList[a.ChildID(i)] - f := a.Field(a.ChildID(i)) - fmt.Fprintf(&b, "{%s=%v}", field.Name, f.GetOneForMarshal(int(offsets[i]))) - } - b.WriteByte(']') - return b.String() -} - -func arrayDenseUnionEqual(l, r *DenseUnion) bool { - childIDs := l.unionType.ChildIDs() - leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() - leftOffsets, rightOffsets := l.RawValueOffsets(), r.RawValueOffsets() - - for i := 0; i < l.data.length; i++ { - typeID := leftCodes[i] - if typeID != rightCodes[i] { - return false - } - - childNum := childIDs[typeID] - eq := SliceEqual(l.children[childNum], int64(leftOffsets[i]), int64(leftOffsets[i]+1), - r.children[childNum], int64(rightOffsets[i]), int64(rightOffsets[i]+1)) - if !eq { - return false - } - } - return true -} - -func arrayDenseUnionApproxEqual(l, r *DenseUnion, opt equalOption) bool { - childIDs := l.unionType.ChildIDs() - leftCodes, rightCodes := l.RawTypeCodes(), r.RawTypeCodes() - leftOffsets, rightOffsets := l.RawValueOffsets(), r.RawValueOffsets() - - for i := 0; i < l.data.length; i++ { - typeID := leftCodes[i] - if typeID != rightCodes[i] { - return false - } - - childNum := childIDs[typeID] - eq := sliceApproxEqual(l.children[childNum], int64(leftOffsets[i]), int64(leftOffsets[i]+1), - r.children[childNum], int64(rightOffsets[i]), int64(rightOffsets[i]+1), opt) - if !eq { - return false - } - } - return true -} - -// UnionBuilder is a convenience interface for building Union arrays of -// either Dense or Sparse mode. -type UnionBuilder interface { - Builder - // AppendChild allows constructing the union type on the fly by making new - // new array builder available to the union builder. The type code (index) - // of the new child is returned, which should be passed to the Append method - // when adding a new element to the union array. - AppendChild(newChild Builder, fieldName string) (newCode arrow.UnionTypeCode) - // Append adds an element to the UnionArray indicating which typecode the - // new element should use. This *must* be followed up by an append to the - // appropriate child builder. - Append(arrow.UnionTypeCode) - // Mode returns what kind of Union is being built, either arrow.SparseMode - // or arrow.DenseMode - Mode() arrow.UnionMode - // Child returns the builder for the requested child index. - // If an invalid index is requested (e.g. <0 or >len(children)) - // then this will panic. - Child(idx int) Builder -} - -type unionBuilder struct { - builder - - childFields []arrow.Field - codes []arrow.UnionTypeCode - mode arrow.UnionMode - - children []Builder - typeIDtoBuilder []Builder - typeIDtoChildID []int - // for all typeID < denseTypeID, typeIDtoBuilder[typeID] != nil - denseTypeID arrow.UnionTypeCode - typesBuilder *int8BufferBuilder -} - -func newUnionBuilder(mem memory.Allocator, children []Builder, typ arrow.UnionType) unionBuilder { - if children == nil { - children = make([]Builder, 0) - } - b := unionBuilder{ - builder: builder{refCount: 1, mem: mem}, - mode: typ.Mode(), - codes: typ.TypeCodes(), - children: children, - typeIDtoChildID: make([]int, int(typ.MaxTypeCode())+1), // convert to int as int8(127) +1 panics - typeIDtoBuilder: make([]Builder, int(typ.MaxTypeCode())+1), // convert to int as int8(127) +1 panics - childFields: make([]arrow.Field, len(children)), - typesBuilder: newInt8BufferBuilder(mem), - } - - b.typeIDtoChildID[0] = arrow.InvalidUnionChildID - for i := 1; i < len(b.typeIDtoChildID); i *= 2 { - copy(b.typeIDtoChildID[i:], b.typeIDtoChildID[:i]) - } - - debug.Assert(len(children) == len(typ.TypeCodes()), "mismatched typecodes and children") - debug.Assert(len(b.typeIDtoBuilder)-1 <= int(arrow.MaxUnionTypeCode), "too many typeids") - - copy(b.childFields, typ.Fields()) - for i, c := range children { - c.Retain() - typeID := typ.TypeCodes()[i] - b.typeIDtoChildID[typeID] = i - b.typeIDtoBuilder[typeID] = c - } - - return b -} - -func (b *unionBuilder) NumChildren() int { - return len(b.children) -} - -func (b *unionBuilder) Child(idx int) Builder { - if idx < 0 || idx > len(b.children) { - panic("arrow/array: invalid child index for union builder") - } - return b.children[idx] -} - -// Len returns the current number of elements in the builder. -func (b *unionBuilder) Len() int { return b.typesBuilder.Len() } - -func (b *unionBuilder) Mode() arrow.UnionMode { return b.mode } - -func (b *unionBuilder) reserve(elements int, resize func(int)) { - // union has no null bitmap, ever so we can skip that handling - if b.length+elements > b.capacity { - b.capacity = bitutil.NextPowerOf2(b.length + elements) - resize(b.capacity) - } -} - -func (b *unionBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - for _, c := range b.children { - c.Release() - } - b.typesBuilder.Release() - } -} - -func (b *unionBuilder) Type() arrow.DataType { - fields := make([]arrow.Field, len(b.childFields)) - for i, f := range b.childFields { - fields[i] = f - fields[i].Type = b.children[i].Type() - } - - switch b.mode { - case arrow.SparseMode: - return arrow.SparseUnionOf(fields, b.codes) - case arrow.DenseMode: - return arrow.DenseUnionOf(fields, b.codes) - default: - panic("invalid union builder mode") - } -} - -func (b *unionBuilder) AppendChild(newChild Builder, fieldName string) arrow.UnionTypeCode { - newChild.Retain() - b.children = append(b.children, newChild) - newType := b.nextTypeID() - - b.typeIDtoChildID[newType] = len(b.children) - 1 - b.typeIDtoBuilder[newType] = newChild - b.childFields = append(b.childFields, arrow.Field{Name: fieldName, Nullable: true}) - b.codes = append(b.codes, newType) - - return newType -} - -func (b *unionBuilder) nextTypeID() arrow.UnionTypeCode { - // find typeID such that typeIDtoBuilder[typeID] == nil - // use that for the new child. Start searching at denseTypeID - // since typeIDtoBuilder is densely packed up at least to denseTypeID - for ; int(b.denseTypeID) < len(b.typeIDtoBuilder); b.denseTypeID++ { - if b.typeIDtoBuilder[b.denseTypeID] == nil { - id := b.denseTypeID - b.denseTypeID++ - return id - } - } - - debug.Assert(len(b.typeIDtoBuilder) < int(arrow.MaxUnionTypeCode), "too many children typeids") - // typeIDtoBuilder is already densely packed, so just append the new child - b.typeIDtoBuilder = append(b.typeIDtoBuilder, nil) - b.typeIDtoChildID = append(b.typeIDtoChildID, arrow.InvalidUnionChildID) - id := b.denseTypeID - b.denseTypeID++ - return id - -} - -func (b *unionBuilder) newData() *Data { - length := b.typesBuilder.Len() - typesBuffer := b.typesBuilder.Finish() - defer typesBuffer.Release() - childData := make([]arrow.ArrayData, len(b.children)) - for i, b := range b.children { - childData[i] = b.newData() - defer childData[i].Release() - } - - return NewData(b.Type(), length, []*memory.Buffer{nil, typesBuffer}, childData, 0, 0) -} - -// SparseUnionBuilder is used to build a Sparse Union array using the Append -// methods. You can also add new types to the union on the fly by using -// AppendChild. -// -// Keep in mind: All children of a SparseUnion should be the same length -// as the union itself. If you add new children with AppendChild, ensure -// that they have the correct number of preceding elements that have been -// added to the builder beforehand. -type SparseUnionBuilder struct { - unionBuilder -} - -// NewEmptySparseUnionBuilder is a helper to construct a SparseUnionBuilder -// without having to predefine the union types. It creates a builder with no -// children and AppendChild will have to be called before appending any -// elements to this builder. -func NewEmptySparseUnionBuilder(mem memory.Allocator) *SparseUnionBuilder { - return &SparseUnionBuilder{ - unionBuilder: newUnionBuilder(mem, nil, arrow.SparseUnionOf([]arrow.Field{}, []arrow.UnionTypeCode{})), - } -} - -// NewSparseUnionBuilder constructs a new SparseUnionBuilder with the provided -// children and type codes. Builders will be constructed for each child -// using the fields in typ -func NewSparseUnionBuilder(mem memory.Allocator, typ *arrow.SparseUnionType) *SparseUnionBuilder { - children := make([]Builder, typ.NumFields()) - for i, f := range typ.Fields() { - children[i] = NewBuilder(mem, f.Type) - defer children[i].Release() - } - return NewSparseUnionBuilderWithBuilders(mem, typ, children) -} - -// NewSparseUnionWithBuilders returns a new SparseUnionBuilder using the -// provided type and builders. -func NewSparseUnionBuilderWithBuilders(mem memory.Allocator, typ *arrow.SparseUnionType, children []Builder) *SparseUnionBuilder { - return &SparseUnionBuilder{ - unionBuilder: newUnionBuilder(mem, children, typ), - } -} - -func (b *SparseUnionBuilder) Reserve(n int) { - b.reserve(n, b.Resize) -} - -func (b *SparseUnionBuilder) Resize(n int) { - b.typesBuilder.resize(n) -} - -// AppendNull will append a null to the first child and an empty value -// (implementation-defined) to the rest of the children. -func (b *SparseUnionBuilder) AppendNull() { - firstChildCode := b.codes[0] - b.typesBuilder.AppendValue(firstChildCode) - b.typeIDtoBuilder[firstChildCode].AppendNull() - for _, c := range b.codes[1:] { - b.typeIDtoBuilder[c].AppendEmptyValue() - } -} - -// AppendNulls is identical to calling AppendNull() n times, except -// it will pre-allocate with reserve for all the nulls beforehand. -func (b *SparseUnionBuilder) AppendNulls(n int) { - firstChildCode := b.codes[0] - b.Reserve(n) - for _, c := range b.codes { - b.typeIDtoBuilder[c].Reserve(n) - } - for i := 0; i < n; i++ { - b.typesBuilder.AppendValue(firstChildCode) - b.typeIDtoBuilder[firstChildCode].AppendNull() - for _, c := range b.codes[1:] { - b.typeIDtoBuilder[c].AppendEmptyValue() - } - } -} - -// AppendEmptyValue appends an empty value (implementation defined) -// to each child, and appends the type of the first typecode to the typeid -// buffer. -func (b *SparseUnionBuilder) AppendEmptyValue() { - b.typesBuilder.AppendValue(b.codes[0]) - for _, c := range b.codes { - b.typeIDtoBuilder[c].AppendEmptyValue() - } -} - -// AppendEmptyValues is identical to calling AppendEmptyValue() n times, -// except it pre-allocates first so it is more efficient. -func (b *SparseUnionBuilder) AppendEmptyValues(n int) { - b.Reserve(n) - firstChildCode := b.codes[0] - for _, c := range b.codes { - b.typeIDtoBuilder[c].Reserve(n) - } - for i := 0; i < n; i++ { - b.typesBuilder.AppendValue(firstChildCode) - for _, c := range b.codes { - b.typeIDtoBuilder[c].AppendEmptyValue() - } - } -} - -// Append appends an element to the UnionArray and must be followed up -// by an append to the appropriate child builder. The parameter should -// be the type id of the child to which the next value will be appended. -// -// After appending to the corresponding child builder, all other child -// builders should have a null or empty value appended to them (although -// this is not enforced and any value is theoretically allowed and will be -// ignored). -func (b *SparseUnionBuilder) Append(nextType arrow.UnionTypeCode) { - b.typesBuilder.AppendValue(nextType) -} - -func (b *SparseUnionBuilder) NewArray() arrow.Array { - return b.NewSparseUnionArray() -} - -func (b *SparseUnionBuilder) NewSparseUnionArray() (a *SparseUnion) { - data := b.newData() - a = NewSparseUnionData(data) - data.Release() - return -} - -func (b *SparseUnionBuilder) UnmarshalJSON(data []byte) (err error) { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("sparse union builder must unpack from json array, found %s", t) - } - return b.Unmarshal(dec) -} - -func (b *SparseUnionBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (b *SparseUnionBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - b.AppendNull() - return nil - } - dec := json.NewDecoder(strings.NewReader(s)) - return b.UnmarshalOne(dec) -} - -func (b *SparseUnionBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t { - case json.Delim('['): - // should be [type_id, Value] - typeID, err := dec.Token() - if err != nil { - return err - } - - var typeCode int8 - - switch tid := typeID.(type) { - case json.Number: - id, err := tid.Int64() - if err != nil { - return err - } - typeCode = int8(id) - case float64: - if tid != float64(int64(tid)) { - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Type: reflect.TypeOf(int8(0)), - Struct: fmt.Sprint(b.Type()), - Value: "float", - } - } - typeCode = int8(tid) - } - - childNum := b.typeIDtoChildID[typeCode] - if childNum == arrow.InvalidUnionChildID { - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Value: "invalid type code", - } - } - - for i, c := range b.children { - if i != childNum { - c.AppendNull() - } - } - - b.Append(typeCode) - if err := b.children[childNum].UnmarshalOne(dec); err != nil { - return err - } - - endArr, err := dec.Token() - if err != nil { - return err - } - - if endArr != json.Delim(']') { - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Value: "union value array should have exactly 2 elements", - } - } - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Value: fmt.Sprint(t), - Struct: fmt.Sprint(b.Type()), - } - } - return nil -} - -// DenseUnionBuilder is used to build a Dense Union array using the Append -// methods. You can also add new types to the union on the fly by using -// AppendChild. -type DenseUnionBuilder struct { - unionBuilder - - offsetsBuilder *int32BufferBuilder -} - -// NewEmptyDenseUnionBuilder is a helper to construct a DenseUnionBuilder -// without having to predefine the union types. It creates a builder with no -// children and AppendChild will have to be called before appending any -// elements to this builder. -func NewEmptyDenseUnionBuilder(mem memory.Allocator) *DenseUnionBuilder { - return &DenseUnionBuilder{ - unionBuilder: newUnionBuilder(mem, nil, arrow.DenseUnionOf([]arrow.Field{}, []arrow.UnionTypeCode{})), - offsetsBuilder: newInt32BufferBuilder(mem), - } -} - -// NewDenseUnionBuilder constructs a new DenseUnionBuilder with the provided -// children and type codes. Builders will be constructed for each child -// using the fields in typ -func NewDenseUnionBuilder(mem memory.Allocator, typ *arrow.DenseUnionType) *DenseUnionBuilder { - children := make([]Builder, 0, typ.NumFields()) - defer func() { - for _, child := range children { - child.Release() - } - }() - - for _, f := range typ.Fields() { - children = append(children, NewBuilder(mem, f.Type)) - } - return NewDenseUnionBuilderWithBuilders(mem, typ, children) -} - -// NewDenseUnionWithBuilders returns a new DenseUnionBuilder using the -// provided type and builders. -func NewDenseUnionBuilderWithBuilders(mem memory.Allocator, typ *arrow.DenseUnionType, children []Builder) *DenseUnionBuilder { - return &DenseUnionBuilder{ - unionBuilder: newUnionBuilder(mem, children, typ), - offsetsBuilder: newInt32BufferBuilder(mem), - } -} - -func (b *DenseUnionBuilder) Reserve(n int) { - b.reserve(n, b.Resize) -} - -func (b *DenseUnionBuilder) Resize(n int) { - b.typesBuilder.resize(n) - b.offsetsBuilder.resize(n * arrow.Int32SizeBytes) -} - -// AppendNull will only append a null value arbitrarily to the first child -// and use that offset for this element of the array. -func (b *DenseUnionBuilder) AppendNull() { - firstChildCode := b.codes[0] - childBuilder := b.typeIDtoBuilder[firstChildCode] - b.typesBuilder.AppendValue(firstChildCode) - b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) - childBuilder.AppendNull() -} - -// AppendNulls will only append a single null arbitrarily to the first child -// and use the same offset multiple times to point to it. The result is that -// for a DenseUnion this is more efficient than calling AppendNull multiple -// times in a loop -func (b *DenseUnionBuilder) AppendNulls(n int) { - // only append 1 null to the child builder, use the same offset twice - firstChildCode := b.codes[0] - childBuilder := b.typeIDtoBuilder[firstChildCode] - b.Reserve(n) - for i := 0; i < n; i++ { - b.typesBuilder.AppendValue(firstChildCode) - b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) - } - // only append a single null to the child builder, the offsets all refer to the same value - childBuilder.AppendNull() -} - -// AppendEmptyValue only appends an empty value arbitrarily to the first child, -// and then uses that offset to identify the value. -func (b *DenseUnionBuilder) AppendEmptyValue() { - firstChildCode := b.codes[0] - childBuilder := b.typeIDtoBuilder[firstChildCode] - b.typesBuilder.AppendValue(firstChildCode) - b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) - childBuilder.AppendEmptyValue() -} - -// AppendEmptyValues, like AppendNulls, will only append a single empty value -// (implementation defined) to the first child arbitrarily, and then point -// at that value using the offsets n times. That makes this more efficient -// than calling AppendEmptyValue multiple times. -func (b *DenseUnionBuilder) AppendEmptyValues(n int) { - // only append 1 null to the child builder, use the same offset twice - firstChildCode := b.codes[0] - childBuilder := b.typeIDtoBuilder[firstChildCode] - b.Reserve(n) - for i := 0; i < n; i++ { - b.typesBuilder.AppendValue(firstChildCode) - b.offsetsBuilder.AppendValue(int32(childBuilder.Len())) - } - // only append a single empty value to the child builder, the offsets all - // refer to the same value - childBuilder.AppendEmptyValue() -} - -// Append appends the necessary offset and type code to the builder -// and must be followed up with an append to the appropriate child builder -func (b *DenseUnionBuilder) Append(nextType arrow.UnionTypeCode) { - b.typesBuilder.AppendValue(nextType) - bldr := b.typeIDtoBuilder[nextType] - if bldr.Len() == kMaxElems { - panic("a dense UnionArray cannot contain more than 2^31 - 1 elements from a single child") - } - - b.offsetsBuilder.AppendValue(int32(bldr.Len())) -} - -func (b *DenseUnionBuilder) Release() { - debug.Assert(atomic.LoadInt64(&b.refCount) > 0, "too many releases") - - if atomic.AddInt64(&b.refCount, -1) == 0 { - for _, c := range b.children { - c.Release() - } - b.typesBuilder.Release() - b.offsetsBuilder.Release() - } -} - -func (b *DenseUnionBuilder) newData() *Data { - data := b.unionBuilder.newData() - data.buffers = append(data.buffers, b.offsetsBuilder.Finish()) - return data -} - -func (b *DenseUnionBuilder) NewArray() arrow.Array { - return b.NewDenseUnionArray() -} - -func (b *DenseUnionBuilder) NewDenseUnionArray() (a *DenseUnion) { - data := b.newData() - a = NewDenseUnionData(data) - data.Release() - return -} - -func (b *DenseUnionBuilder) UnmarshalJSON(data []byte) (err error) { - dec := json.NewDecoder(bytes.NewReader(data)) - t, err := dec.Token() - if err != nil { - return err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return fmt.Errorf("dense union builder must unpack from json array, found %s", t) - } - return b.Unmarshal(dec) -} - -func (b *DenseUnionBuilder) Unmarshal(dec *json.Decoder) error { - for dec.More() { - if err := b.UnmarshalOne(dec); err != nil { - return err - } - } - return nil -} - -func (d *DenseUnionBuilder) AppendValueFromString(s string) error { - if s == NullValueStr { - d.AppendNull() - return nil - } - dec := json.NewDecoder(strings.NewReader(s)) - return d.UnmarshalOne(dec) -} - -func (b *DenseUnionBuilder) UnmarshalOne(dec *json.Decoder) error { - t, err := dec.Token() - if err != nil { - return err - } - - switch t { - case json.Delim('['): - // should be [type_id, Value] - typeID, err := dec.Token() - if err != nil { - return err - } - - var typeCode int8 - - switch tid := typeID.(type) { - case json.Number: - id, err := tid.Int64() - if err != nil { - return err - } - typeCode = int8(id) - case float64: - if tid != float64(int64(tid)) { - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Type: reflect.TypeOf(int8(0)), - Struct: fmt.Sprint(b.Type()), - Value: "float", - } - } - typeCode = int8(tid) - } - - childNum := b.typeIDtoChildID[typeCode] - if childNum == arrow.InvalidUnionChildID { - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Value: "invalid type code", - } - } - - b.Append(typeCode) - if err := b.children[childNum].UnmarshalOne(dec); err != nil { - return err - } - - endArr, err := dec.Token() - if err != nil { - return err - } - - if endArr != json.Delim(']') { - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Value: "union value array should have exactly 2 elements", - } - } - case nil: - b.AppendNull() - default: - return &json.UnmarshalTypeError{ - Offset: dec.InputOffset(), - Value: fmt.Sprint(t), - Struct: fmt.Sprint(b.Type()), - } - } - return nil -} - -var ( - _ arrow.Array = (*SparseUnion)(nil) - _ arrow.Array = (*DenseUnion)(nil) - _ Union = (*SparseUnion)(nil) - _ Union = (*DenseUnion)(nil) - _ Builder = (*SparseUnionBuilder)(nil) - _ Builder = (*DenseUnionBuilder)(nil) - _ UnionBuilder = (*SparseUnionBuilder)(nil) - _ UnionBuilder = (*DenseUnionBuilder)(nil) -) diff --git a/go/arrow/array/union_test.go b/go/arrow/array/union_test.go deleted file mode 100644 index 43e7afd693b6c..0000000000000 --- a/go/arrow/array/union_test.go +++ /dev/null @@ -1,1117 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "fmt" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/suite" -) - -func uint8ArrFromSlice(ids ...uint8) arrow.Array { - data := array.NewData(arrow.PrimitiveTypes.Uint8, len(ids), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Uint8Traits.CastToBytes(ids))}, nil, 0, 0) - defer data.Release() - return array.MakeFromData(data) -} - -func int32ArrFromSlice(offsets ...int32) arrow.Array { - data := array.NewData(arrow.PrimitiveTypes.Int32, len(offsets), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets))}, nil, 0, 0) - defer data.Release() - return array.MakeFromData(data) -} - -func TestUnionSliceEquals(t *testing.T) { - unionFields := []arrow.Field{ - {Name: "u0", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - {Name: "u1", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}, - } - - typeCodes := []arrow.UnionTypeCode{5, 10} - sparseType := arrow.SparseUnionOf(unionFields, typeCodes) - denseType := arrow.DenseUnionOf(unionFields, typeCodes) - - schema := arrow.NewSchema([]arrow.Field{ - {Name: "sparse", Type: sparseType, Nullable: true}, - {Name: "dense", Type: denseType, Nullable: true}, - }, nil) - - sparseChildren := make([]arrow.Array, 2) - denseChildren := make([]arrow.Array, 2) - - const length = 7 - - typeIDsBuffer := memory.NewBufferBytes(arrow.Uint8Traits.CastToBytes([]uint8{5, 10, 5, 5, 10, 10, 5})) - sparseChildren[0] = int32ArrFromSlice(0, 1, 2, 3, 4, 5, 6) - defer sparseChildren[0].Release() - sparseChildren[1] = uint8ArrFromSlice(10, 11, 12, 13, 14, 15, 16) - defer sparseChildren[1].Release() - - denseChildren[0] = int32ArrFromSlice(0, 2, 3, 7) - defer denseChildren[0].Release() - denseChildren[1] = uint8ArrFromSlice(11, 14, 15) - defer denseChildren[1].Release() - - offsetsBuffer := memory.NewBufferBytes(arrow.Int32Traits.CastToBytes([]int32{0, 0, 1, 2, 1, 2, 3})) - sparse := array.NewSparseUnion(sparseType, length, sparseChildren, typeIDsBuffer, 0) - dense := array.NewDenseUnion(denseType, length, denseChildren, typeIDsBuffer, offsetsBuffer, 0) - - defer sparse.Release() - defer dense.Release() - - batch := array.NewRecord(schema, []arrow.Array{sparse, dense}, -1) - defer batch.Release() - - checkUnion := func(arr arrow.Array) { - size := arr.Len() - slice := array.NewSlice(arr, 2, int64(size)) - defer slice.Release() - assert.EqualValues(t, size-2, slice.Len()) - - slice2 := array.NewSlice(arr, 2, int64(arr.Len())) - defer slice2.Release() - assert.EqualValues(t, size-2, slice2.Len()) - - assert.True(t, array.Equal(slice, slice2)) - assert.True(t, array.SliceEqual(arr, 2, int64(arr.Len()), slice, 0, int64(slice.Len()))) - - // chain slices - slice2 = array.NewSlice(arr, 1, int64(arr.Len())) - defer slice2.Release() - slice2 = array.NewSlice(slice2, 1, int64(slice2.Len())) - defer slice2.Release() - assert.True(t, array.Equal(slice, slice2)) - - slice, slice2 = array.NewSlice(arr, 1, 6), array.NewSlice(arr, 1, 6) - defer slice.Release() - defer slice2.Release() - assert.EqualValues(t, 5, slice.Len()) - - assert.True(t, array.Equal(slice, slice2)) - assert.True(t, array.SliceEqual(arr, 1, 6, slice, 0, 5)) - } - - checkUnion(batch.Column(0)) - checkUnion(batch.Column(1)) -} - -func TestSparseUnionGetFlattenedField(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - ty := arrow.SparseUnionOf([]arrow.Field{ - {Name: "ints", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "strs", Type: arrow.BinaryTypes.String, Nullable: true}, - }, []arrow.UnionTypeCode{2, 7}) - ints, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[0, 1, 2, 3]`)) - defer ints.Release() - strs, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["a", null, "c", "d"]`)) - defer strs.Release() - idsArr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[2, 7, 2, 7]`)) - defer idsArr.Release() - ids := idsArr.Data().Buffers()[1] - - const length = 4 - - t.Run("flattened", func(t *testing.T) { - scoped := memory.NewCheckedAllocatorScope(mem) - defer scoped.CheckSize(t) - - arr := array.NewSparseUnion(ty, length, []arrow.Array{ints, strs}, ids, 0) - defer arr.Release() - - flattened, err := arr.GetFlattenedField(mem, 0) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[0, null, 2, null]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - flattened, err = arr.GetFlattenedField(mem, 1) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[null, null, null, "d"]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - sliced := array.NewSlice(arr, 1, 3).(*array.SparseUnion) - defer sliced.Release() - - flattened, err = sliced.GetFlattenedField(mem, 0) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[null, 2]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - flattened, err = sliced.GetFlattenedField(mem, 1) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[null, null]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - _, err = arr.GetFlattenedField(mem, -1) - assert.Error(t, err) - _, err = arr.GetFlattenedField(mem, 2) - assert.Error(t, err) - }) - - t.Run("offset children", func(t *testing.T) { - scoped := memory.NewCheckedAllocatorScope(mem) - defer scoped.CheckSize(t) - - strSlice, intSlice := array.NewSlice(strs, 1, 3), array.NewSlice(ints, 1, 3) - defer strSlice.Release() - defer intSlice.Release() - - arr := array.NewSparseUnion(ty, length-2, []arrow.Array{intSlice, strSlice}, ids, 0) - defer arr.Release() - - flattened, err := arr.GetFlattenedField(mem, 0) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[1, null]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - flattened, err = arr.GetFlattenedField(mem, 1) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[null, "c"]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - sliced := array.NewSlice(arr, 1, 2).(*array.SparseUnion) - defer sliced.Release() - - flattened, err = sliced.GetFlattenedField(mem, 0) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[null]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - flattened, err = sliced.GetFlattenedField(mem, 1) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["c"]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - }) - - t.Run("empty flattened", func(t *testing.T) { - scoped := memory.NewCheckedAllocatorScope(mem) - defer scoped.CheckSize(t) - - strSlice, intSlice := array.NewSlice(strs, length, length), array.NewSlice(ints, length, length) - defer strSlice.Release() - defer intSlice.Release() - - arr := array.NewSparseUnion(ty, 0, []arrow.Array{intSlice, strSlice}, ids, 0) - defer arr.Release() - - flattened, err := arr.GetFlattenedField(mem, 0) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64, strings.NewReader(`[]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - - flattened, err = arr.GetFlattenedField(mem, 1) - assert.NoError(t, err) - defer flattened.Release() - expected, _, _ = array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[]`)) - defer expected.Release() - - assert.Truef(t, array.Equal(flattened, expected), "expected: %s, got: %s", expected, flattened) - }) -} - -func TestSparseUnionValidate(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - a, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[4, 5]`)) - defer a.Release() - dt := arrow.SparseUnionOf([]arrow.Field{{Name: "a", Type: arrow.PrimitiveTypes.Int32, Nullable: true}}, []arrow.UnionTypeCode{0}) - children := []arrow.Array{a} - - typeIDsArr, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[0, 0, 0]`)) - defer typeIDsArr.Release() - typeIDs := typeIDsArr.Data().Buffers()[1] - - arr := array.NewSparseUnion(dt, 2, children, typeIDs, 0) - assert.NoError(t, arr.ValidateFull()) - arr.Release() - - arr = array.NewSparseUnion(dt, 1, children, typeIDs, 1) - assert.NoError(t, arr.ValidateFull()) - arr.Release() - - arr = array.NewSparseUnion(dt, 0, children, typeIDs, 2) - assert.NoError(t, arr.ValidateFull()) - arr.Release() - - // length + offset < child length but that's ok! - arr = array.NewSparseUnion(dt, 1, children, typeIDs, 0) - assert.NoError(t, arr.ValidateFull()) - arr.Release() - - // length + offset > child length! BAD! - assert.Panics(t, func() { - arr = array.NewSparseUnion(dt, 1, children, typeIDs, 2) - }) - - // offset > child length - assert.Panics(t, func() { - arr = array.NewSparseUnion(dt, 0, children, typeIDs, 3) - }) -} - -type UnionFactorySuite struct { - suite.Suite - - mem *memory.CheckedAllocator - codes []arrow.UnionTypeCode - typeIDs arrow.Array - logicalTypeIDs arrow.Array - invalidTypeIDs arrow.Array - invalidTypeIDs2 arrow.Array -} - -func (s *UnionFactorySuite) typeidsFromSlice(ids ...int8) arrow.Array { - data := array.NewData(arrow.PrimitiveTypes.Int8, len(ids), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int8Traits.CastToBytes(ids))}, nil, 0, 0) - defer data.Release() - return array.MakeFromData(data) -} - -func (s *UnionFactorySuite) offsetsFromSlice(offsets ...int32) arrow.Array { - data := array.NewData(arrow.PrimitiveTypes.Int32, len(offsets), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(offsets))}, nil, 0, 0) - defer data.Release() - return array.MakeFromData(data) -} - -func (s *UnionFactorySuite) SetupTest() { - s.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - s.codes = []arrow.UnionTypeCode{1, 2, 4, 127} - s.typeIDs = s.typeidsFromSlice(0, 1, 2, 0, 1, 3, 2, 0, 2, 1) - s.logicalTypeIDs = s.typeidsFromSlice(1, 2, 4, 1, 2, 127, 4, 1, 4, 2) - s.invalidTypeIDs = s.typeidsFromSlice(1, 2, 4, 1, -2, 127, 4, 1, 4, 2) - s.invalidTypeIDs2 = s.typeidsFromSlice(1, 2, 4, 1, 3, 127, 4, 1, 4, 2) -} - -func (s *UnionFactorySuite) TearDownTest() { - s.typeIDs.Release() - s.logicalTypeIDs.Release() - s.invalidTypeIDs.Release() - s.invalidTypeIDs2.Release() - s.mem.AssertSize(s.T(), 0) -} - -func (s *UnionFactorySuite) checkFields(arr array.Union, fields []string) { - ty := arr.DataType().(arrow.UnionType) - s.Len(ty.Fields(), len(fields)) - for i, f := range ty.Fields() { - s.Equal(fields[i], f.Name) - } -} - -func (s *UnionFactorySuite) checkCodes(arr array.Union, codes []arrow.UnionTypeCode) { - ty := arr.DataType().(arrow.UnionType) - s.Equal(codes, ty.TypeCodes()) -} - -func (s *UnionFactorySuite) checkUnion(arr array.Union, mode arrow.UnionMode, fields []string, codes []arrow.UnionTypeCode) { - s.Equal(mode, arr.Mode()) - s.checkFields(arr, fields) - s.checkCodes(arr, codes) - typeIDs := s.typeIDs.(*array.Int8) - for i := 0; i < typeIDs.Len(); i++ { - s.EqualValues(typeIDs.Value(i), arr.ChildID(i)) - } - s.Nil(arr.Field(-1)) - s.Nil(arr.Field(typeIDs.Len())) -} - -func (s *UnionFactorySuite) TestMakeDenseUnions() { - // typeIDs: {0, 1, 2, 0, 1, 3, 2, 0, 2, 1} - offsets := s.offsetsFromSlice(0, 0, 0, 1, 1, 0, 1, 2, 1, 2) - defer offsets.Release() - - children := make([]arrow.Array, 4) - children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "def", "xyz"]`)) - defer children[0].Release() - children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, strings.NewReader(`[10, 20, 30]`)) - defer children[1].Release() - children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.618, 2.718, 3.142]`)) - defer children[2].Release() - children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[-12]`)) - defer children[3].Release() - - fieldNames := []string{"str", "int1", "real", "int2"} - - s.Run("without fields and codes", func() { - result, err := array.NewDenseUnionFromArrays(s.typeIDs, offsets, children) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.DenseMode, []string{"0", "1", "2", "3"}, []arrow.UnionTypeCode{0, 1, 2, 3}) - }) - - s.Run("with fields", func() { - _, err := array.NewDenseUnionFromArraysWithFields(s.typeIDs, offsets, children, []string{"one"}) - s.Error(err) - result, err := array.NewDenseUnionFromArraysWithFields(s.typeIDs, offsets, children, fieldNames) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.DenseMode, fieldNames, []arrow.UnionTypeCode{0, 1, 2, 3}) - }) - - s.Run("with codes", func() { - _, err := array.NewDenseUnionFromArrays(s.logicalTypeIDs, offsets, children, 0) - s.Error(err) - result, err := array.NewDenseUnionFromArrays(s.logicalTypeIDs, offsets, children, s.codes...) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.DenseMode, []string{"0", "1", "2", "3"}, s.codes) - }) - - s.Run("with fields and codes", func() { - _, err := array.NewDenseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, offsets, children, []string{"one"}, s.codes) - s.Error(err) - result, err := array.NewDenseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, offsets, children, fieldNames, s.codes) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.DenseMode, fieldNames, s.codes) - }) - - s.Run("invalid type codes", func() { - result, err := array.NewDenseUnionFromArrays(s.invalidTypeIDs, offsets, children, s.codes...) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - result, err = array.NewDenseUnionFromArrays(s.invalidTypeIDs2, offsets, children, s.codes...) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - }) - - s.Run("invalid offsets", func() { - // offset out of bounds at index 5 - invalidOffsets := s.offsetsFromSlice(0, 0, 0, 1, 1, 1, 1, 2, 1, 2) - defer invalidOffsets.Release() - result, err := array.NewDenseUnionFromArrays(s.typeIDs, invalidOffsets, children) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - - // negative offset at index 5 - invalidOffsets = s.offsetsFromSlice(0, 0, 0, 1, 1, -1, 1, 2, 1, 2) - defer invalidOffsets.Release() - result, err = array.NewDenseUnionFromArrays(s.typeIDs, invalidOffsets, children) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - - // non-monotonic offset at index 3 - invalidOffsets = s.offsetsFromSlice(1, 0, 0, 0, 1, 0, 1, 2, 1, 2) - defer invalidOffsets.Release() - result, err = array.NewDenseUnionFromArrays(s.typeIDs, invalidOffsets, children) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - }) -} - -func (s *UnionFactorySuite) TestDenseUnionStringRoundTrip() { - // typeIDs: {0, 1, 2, 0, 1, 3, 2, 0, 2, 1} - offsets := s.offsetsFromSlice(0, 0, 0, 1, 1, 0, 1, 2, 1, 2) - defer offsets.Release() - - children := make([]arrow.Array, 4) - children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "def", "xyz"]`)) - defer children[0].Release() - children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, strings.NewReader(`[10, 20, 30]`)) - defer children[1].Release() - children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.618, 2.718, 3.142]`)) - defer children[2].Release() - children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[-12]`)) - defer children[3].Release() - - fields := []string{"str", "int1", "real", "int2"} - - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(s.T(), 0) - - dt := arrow.DenseUnionFromArrays(children, fields, s.codes) - arr, err := array.NewDenseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, offsets, children, fields, s.codes) - s.NoError(err) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewDenseUnionBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - s.NoError(b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.DenseUnion) - defer arr1.Release() - - s.True(array.Equal(arr, arr1)) -} - -func (s *UnionFactorySuite) TestMakeSparse() { - children := make([]arrow.Array, 4) - children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, - strings.NewReader(`["abc", "", "", "def", "", "", "", "xyz", "", ""]`)) - children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, - strings.NewReader(`[0, 10, 0, 0, 20, 0, 0, 0, 0, 30]`)) - children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, - strings.NewReader(`[0.0, 0.0, 1.618, 0.0, 0.0, 0.0, 2.718, 0.0, 3.142, 0.0]`)) - children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, - strings.NewReader(`[0, 0, 0, 0, 0, -12, 0, 0, 0, 0]`)) - for _, c := range children { - defer c.Release() - } - - fieldNames := []string{"str", "int1", "real", "int2"} - - s.Run("without fields and codes", func() { - result, err := array.NewSparseUnionFromArrays(s.typeIDs, children) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.SparseMode, []string{"0", "1", "2", "3"}, []arrow.UnionTypeCode{0, 1, 2, 3}) - }) - - s.Run("with fields", func() { - _, err := array.NewSparseUnionFromArraysWithFields(s.typeIDs, children, []string{"one"}) - s.Error(err) - result, err := array.NewSparseUnionFromArraysWithFields(s.typeIDs, children, fieldNames) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.SparseMode, fieldNames, []arrow.UnionTypeCode{0, 1, 2, 3}) - }) - - s.Run("with codes", func() { - _, err := array.NewSparseUnionFromArrays(s.logicalTypeIDs, children, 0) - s.Error(err) - result, err := array.NewSparseUnionFromArrays(s.logicalTypeIDs, children, s.codes...) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.SparseMode, []string{"0", "1", "2", "3"}, s.codes) - }) - - s.Run("with fields and codes", func() { - _, err := array.NewSparseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, children, []string{"one"}, s.codes) - s.Error(err) - result, err := array.NewSparseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, children, fieldNames, s.codes) - s.NoError(err) - defer result.Release() - s.NoError(result.ValidateFull()) - s.checkUnion(result, arrow.SparseMode, fieldNames, s.codes) - }) - - s.Run("invalid type codes", func() { - result, err := array.NewSparseUnionFromArrays(s.invalidTypeIDs, children, s.codes...) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - result, err = array.NewSparseUnionFromArrays(s.invalidTypeIDs2, children, s.codes...) - s.NoError(err) - defer result.Release() - s.Error(result.ValidateFull()) - }) - - s.Run("invalid child length", func() { - children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, - strings.NewReader(`[0, 0, 0, 0, 0, -12, 0, 0, 0]`)) - defer children[3].Release() - - _, err := array.NewSparseUnionFromArrays(s.typeIDs, children) - s.Error(err) - }) -} - -func (s *UnionFactorySuite) TestSparseUnionStringRoundTrip() { - children := make([]arrow.Array, 4) - children[0], _, _ = array.FromJSON(s.mem, arrow.BinaryTypes.String, - strings.NewReader(`["abc", "", "", "def", "", "", "", "xyz", "", ""]`)) - defer children[0].Release() - children[1], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Uint8, - strings.NewReader(`[0, 10, 0, 0, 20, 0, 0, 0, 0, 30]`)) - defer children[1].Release() - children[2], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, - strings.NewReader(`[0.0, 0.0, 1.618, 0.0, 0.0, 0.0, 2.718, 0.0, 3.142, 0.0]`)) - defer children[2].Release() - children[3], _, _ = array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, - strings.NewReader(`[0, 0, 0, 0, 0, -12, 0, 0, 0, 0]`)) - defer children[3].Release() - - fields := []string{"str", "int1", "real", "int2"} - - // 1. create array - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(s.T(), 0) - - dt := arrow.SparseUnionFromArrays(children, fields, s.codes) - - arr, err := array.NewSparseUnionFromArraysWithFieldCodes(s.logicalTypeIDs, children, fields, s.codes) - s.NoError(err) - defer arr.Release() - - // 2. create array via AppendValueFromString - b1 := array.NewSparseUnionBuilder(mem, dt) - defer b1.Release() - - for i := 0; i < arr.Len(); i++ { - s.NoError(b1.AppendValueFromString(arr.ValueStr(i))) - } - - arr1 := b1.NewArray().(*array.SparseUnion) - defer arr1.Release() - - s.True(array.Equal(arr, arr1)) -} - -type UnionBuilderSuite struct { - suite.Suite - - I8 arrow.UnionTypeCode - STR arrow.UnionTypeCode - DBL arrow.UnionTypeCode - - mem *memory.CheckedAllocator - expectedTypes []arrow.UnionTypeCode - expectedTypesArr arrow.Array - i8Bldr *array.Int8Builder - strBldr *array.StringBuilder - dblBldr *array.Float64Builder - unionBldr array.UnionBuilder - actual array.Union -} - -func (s *UnionBuilderSuite) SetupTest() { - s.I8, s.STR, s.DBL = 8, 13, 7 - - s.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - s.expectedTypes = make([]arrow.UnionTypeCode, 0) - - s.i8Bldr = array.NewInt8Builder(s.mem) - s.strBldr = array.NewStringBuilder(s.mem) - s.dblBldr = array.NewFloat64Builder(s.mem) -} - -func (s *UnionBuilderSuite) TearDownTest() { - if s.expectedTypesArr != nil { - s.expectedTypesArr.Release() - s.expectedTypesArr = nil - } - s.i8Bldr.Release() - s.strBldr.Release() - s.dblBldr.Release() - if s.actual != nil { - s.actual.Release() - s.actual = nil - } - - s.mem.AssertSize(s.T(), 0) -} - -func (s *UnionBuilderSuite) createExpectedTypesArr() { - data := array.NewData(arrow.PrimitiveTypes.Int8, len(s.expectedTypes), - []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int8Traits.CastToBytes(s.expectedTypes))}, nil, 0, 0) - defer data.Release() - s.expectedTypesArr = array.MakeFromData(data) -} - -func (s *UnionBuilderSuite) appendInt(i int8) { - s.expectedTypes = append(s.expectedTypes, s.I8) - s.unionBldr.Append(s.I8) - s.i8Bldr.Append(i) - if s.unionBldr.Mode() == arrow.SparseMode { - s.strBldr.AppendEmptyValue() - s.dblBldr.AppendEmptyValue() - } -} - -func (s *UnionBuilderSuite) appendString(str string) { - s.expectedTypes = append(s.expectedTypes, s.STR) - s.unionBldr.Append(s.STR) - s.strBldr.Append(str) - if s.unionBldr.Mode() == arrow.SparseMode { - s.i8Bldr.AppendEmptyValue() - s.dblBldr.AppendEmptyValue() - } -} - -func (s *UnionBuilderSuite) appendDbl(dbl float64) { - s.expectedTypes = append(s.expectedTypes, s.DBL) - s.unionBldr.Append(s.DBL) - s.dblBldr.Append(dbl) - if s.unionBldr.Mode() == arrow.SparseMode { - s.strBldr.AppendEmptyValue() - s.i8Bldr.AppendEmptyValue() - } -} - -func (s *UnionBuilderSuite) appendBasics() { - s.appendInt(33) - s.appendString("abc") - s.appendDbl(1.0) - s.appendDbl(-1.0) - s.appendString("") - s.appendInt(10) - s.appendString("def") - s.appendInt(-10) - s.appendDbl(0.5) - - s.Equal(9, s.unionBldr.Len()) - - s.actual = s.unionBldr.NewArray().(array.Union) - s.NoError(s.actual.ValidateFull()) - s.createExpectedTypesArr() -} - -func (s *UnionBuilderSuite) appendNullsAndEmptyValues() { - s.appendString("abc") - s.unionBldr.AppendNull() - s.unionBldr.AppendEmptyValue() - s.expectedTypes = append(s.expectedTypes, s.I8, s.I8, s.I8) - s.appendInt(42) - s.unionBldr.AppendNulls(2) - s.unionBldr.AppendEmptyValues(2) - s.expectedTypes = append(s.expectedTypes, s.I8, s.I8, s.I8) - - s.Equal(8, s.unionBldr.Len()) - - s.actual = s.unionBldr.NewArray().(array.Union) - s.NoError(s.actual.ValidateFull()) - s.createExpectedTypesArr() -} - -func (s *UnionBuilderSuite) appendInferred() { - s.I8 = s.unionBldr.AppendChild(s.i8Bldr, "i8") - s.EqualValues(0, s.I8) - s.appendInt(33) - s.appendInt(10) - - s.STR = s.unionBldr.AppendChild(s.strBldr, "str") - s.EqualValues(1, s.STR) - s.appendString("abc") - s.appendString("") - s.appendString("def") - s.appendInt(-10) - - s.DBL = s.unionBldr.AppendChild(s.dblBldr, "dbl") - s.EqualValues(2, s.DBL) - s.appendDbl(1.0) - s.appendDbl(-1.0) - s.appendDbl(0.5) - - s.Equal(9, s.unionBldr.Len()) - - s.actual = s.unionBldr.NewArray().(array.Union) - s.NoError(s.actual.ValidateFull()) - s.createExpectedTypesArr() - - s.EqualValues(0, s.I8) - s.EqualValues(1, s.STR) - s.EqualValues(2, s.DBL) -} - -func (s *UnionBuilderSuite) appendListOfInferred(utyp arrow.UnionType) *array.List { - listBldr := array.NewListBuilder(s.mem, utyp) - defer listBldr.Release() - - s.unionBldr = listBldr.ValueBuilder().(array.UnionBuilder) - - listBldr.Append(true) - s.I8 = s.unionBldr.AppendChild(s.i8Bldr, "i8") - s.EqualValues(0, s.I8) - s.appendInt(10) - - listBldr.Append(true) - s.STR = s.unionBldr.AppendChild(s.strBldr, "str") - s.EqualValues(1, s.STR) - s.appendString("abc") - s.appendInt(-10) - - listBldr.Append(true) - s.DBL = s.unionBldr.AppendChild(s.dblBldr, "dbl") - s.EqualValues(2, s.DBL) - s.appendDbl(0.5) - - s.Equal(4, s.unionBldr.Len()) - - s.createExpectedTypesArr() - return listBldr.NewListArray() -} - -func (s *UnionBuilderSuite) assertArraysEqual(expected, actual arrow.Array) { - s.Truef(array.Equal(expected, actual), "expected: %s, got: %s", expected, actual) -} - -func (s *UnionBuilderSuite) TestDenseUnionBasics() { - s.unionBldr = array.NewDenseUnionBuilderWithBuilders(s.mem, - arrow.DenseUnionOf([]arrow.Field{ - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), - []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) - defer s.unionBldr.Release() - - s.appendBasics() - - expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[33, 10, -10]`)) - expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "", "def"]`)) - expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.0, -1.0, 0.5]`)) - expectedOffsets, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 0, 0, 1, 1, 1, 2, 2, 2]`)) - - defer func() { - expectedI8.Release() - expectedStr.Release() - expectedDbl.Release() - expectedOffsets.Release() - }() - - expected, err := array.NewDenseUnionFromArraysWithFieldCodes(s.expectedTypesArr, - expectedOffsets, - []arrow.Array{expectedI8, expectedStr, expectedDbl}, - []string{"i8", "str", "dbl"}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) - s.NoError(err) - defer expected.Release() - - s.Equal(expected.DataType().String(), s.actual.DataType().String()) - s.assertArraysEqual(expected, s.actual) -} - -func (s *UnionBuilderSuite) TestDenseBuilderNullsAndEmpty() { - s.unionBldr = array.NewDenseUnionBuilderWithBuilders(s.mem, - arrow.DenseUnionOf([]arrow.Field{ - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), - []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) - defer s.unionBldr.Release() - - s.appendNullsAndEmptyValues() - - // four null / empty values (the latter implementation-defined) appended to I8 - expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[null, 0, 42, null, 0]`)) - expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc"]`)) - expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[]`)) - expectedOffsets, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 0, 1, 2, 3, 3, 4, 4]`)) - - defer func() { - expectedI8.Release() - expectedStr.Release() - expectedDbl.Release() - expectedOffsets.Release() - }() - - expected, err := array.NewDenseUnionFromArraysWithFieldCodes(s.expectedTypesArr, - expectedOffsets, - []arrow.Array{expectedI8, expectedStr, expectedDbl}, - []string{"i8", "str", "dbl"}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) - s.NoError(err) - defer expected.Release() - - s.Equal(expected.DataType().String(), s.actual.DataType().String()) - s.assertArraysEqual(expected, s.actual) - - // physical arrays must be as expected - s.assertArraysEqual(expectedI8, s.actual.Field(0)) - s.assertArraysEqual(expectedStr, s.actual.Field(1)) - s.assertArraysEqual(expectedDbl, s.actual.Field(2)) -} - -func (s *UnionBuilderSuite) TestDenseUnionInferredTyped() { - s.unionBldr = array.NewEmptyDenseUnionBuilder(s.mem) - defer s.unionBldr.Release() - - s.appendInferred() - - expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[33, 10, -10]`)) - expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, strings.NewReader(`["abc", "", "def"]`)) - expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, strings.NewReader(`[1.0, -1.0, 0.5]`)) - expectedOffsets, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, 1, 0, 1, 2, 2, 0, 1, 2]`)) - - defer func() { - expectedI8.Release() - expectedStr.Release() - expectedDbl.Release() - expectedOffsets.Release() - }() - - expected, err := array.NewDenseUnionFromArraysWithFieldCodes(s.expectedTypesArr, - expectedOffsets, - []arrow.Array{expectedI8, expectedStr, expectedDbl}, - []string{"i8", "str", "dbl"}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) - s.NoError(err) - defer expected.Release() - - s.Equal(expected.DataType().String(), s.actual.DataType().String()) - s.assertArraysEqual(expected, s.actual) -} - -func (s *UnionBuilderSuite) TestDenseUnionListOfInferredType() { - actual := s.appendListOfInferred(arrow.DenseUnionOf([]arrow.Field{}, []arrow.UnionTypeCode{})) - defer actual.Release() - - expectedType := arrow.ListOf(arrow.DenseUnionOf( - []arrow.Field{ - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL})) - s.Equal(expectedType.String(), actual.DataType().String()) -} - -func (s *UnionBuilderSuite) TestSparseUnionBasics() { - s.unionBldr = array.NewSparseUnionBuilderWithBuilders(s.mem, - arrow.SparseUnionOf([]arrow.Field{ - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), - []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) - defer s.unionBldr.Release() - - s.appendBasics() - - expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, - strings.NewReader(`[33, null, null, null, null, 10, null, -10, null]`)) - expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, - strings.NewReader(`[null, "abc", null, null, "", null, "def", null, null]`)) - expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, - strings.NewReader(`[null, null, 1.0, -1.0, null, null, null, null, 0.5]`)) - - defer func() { - expectedI8.Release() - expectedStr.Release() - expectedDbl.Release() - }() - - expected, err := array.NewSparseUnionFromArraysWithFieldCodes(s.expectedTypesArr, - []arrow.Array{expectedI8, expectedStr, expectedDbl}, - []string{"i8", "str", "dbl"}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) - s.NoError(err) - defer expected.Release() - - s.Equal(expected.DataType().String(), s.actual.DataType().String()) - s.assertArraysEqual(expected, s.actual) -} - -func (s *UnionBuilderSuite) TestSparseBuilderNullsAndEmpty() { - s.unionBldr = array.NewSparseUnionBuilderWithBuilders(s.mem, - arrow.SparseUnionOf([]arrow.Field{ - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "dbl", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}), - []array.Builder{s.i8Bldr, s.strBldr, s.dblBldr}) - defer s.unionBldr.Release() - - s.appendNullsAndEmptyValues() - - // "abc", null, 0, 42, null, null, 0, 0 - // getting 0 for empty values is implementation-defined - expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, - strings.NewReader(`[0, null, 0, 42, null, null, 0, 0]`)) - expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, - strings.NewReader(`["abc", "", "", "", "", "", "", ""]`)) - expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, - strings.NewReader(`[0, 0, 0, 0, 0, 0, 0, 0]`)) - - defer func() { - expectedI8.Release() - expectedStr.Release() - expectedDbl.Release() - }() - - expected, err := array.NewSparseUnionFromArraysWithFieldCodes(s.expectedTypesArr, - []arrow.Array{expectedI8, expectedStr, expectedDbl}, - []string{"i8", "str", "dbl"}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) - s.NoError(err) - defer expected.Release() - - s.Equal(expected.DataType().String(), s.actual.DataType().String()) - s.assertArraysEqual(expected, s.actual) - - // physical arrays must be as expected - s.assertArraysEqual(expectedI8, s.actual.Field(0)) - s.assertArraysEqual(expectedStr, s.actual.Field(1)) - s.assertArraysEqual(expectedDbl, s.actual.Field(2)) -} - -func (s *UnionBuilderSuite) TestSparseUnionInferredType() { - s.unionBldr = array.NewEmptySparseUnionBuilder(s.mem) - defer s.unionBldr.Release() - - s.appendInferred() - - expectedI8, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Int8, - strings.NewReader(`[33, 10, null, null, null, -10, null, null, null]`)) - expectedStr, _, _ := array.FromJSON(s.mem, arrow.BinaryTypes.String, - strings.NewReader(`[null, null, "abc", "", "def", null, null, null, null]`)) - expectedDbl, _, _ := array.FromJSON(s.mem, arrow.PrimitiveTypes.Float64, - strings.NewReader(`[null, null, null, null, null, null,1.0, -1.0, 0.5]`)) - - defer func() { - expectedI8.Release() - expectedStr.Release() - expectedDbl.Release() - }() - - expected, err := array.NewSparseUnionFromArraysWithFieldCodes(s.expectedTypesArr, - []arrow.Array{expectedI8, expectedStr, expectedDbl}, - []string{"i8", "str", "dbl"}, - []arrow.UnionTypeCode{s.I8, s.STR, s.DBL}) - s.NoError(err) - defer expected.Release() - - s.Equal(expected.DataType().String(), s.actual.DataType().String()) - s.assertArraysEqual(expected, s.actual) -} - -func (s *UnionBuilderSuite) TestSparseUnionStructWithUnion() { - bldr := array.NewStructBuilder(s.mem, arrow.StructOf(arrow.Field{Name: "u", Type: arrow.SparseUnionFromArrays(nil, nil, nil)})) - defer bldr.Release() - - unionBldr := bldr.FieldBuilder(0).(array.UnionBuilder) - int32Bldr := array.NewInt32Builder(s.mem) - defer int32Bldr.Release() - - s.EqualValues(0, unionBldr.AppendChild(int32Bldr, "i")) - expectedType := arrow.StructOf(arrow.Field{Name: "u", - Type: arrow.SparseUnionOf([]arrow.Field{{Name: "i", Type: arrow.PrimitiveTypes.Int32, Nullable: true}}, []arrow.UnionTypeCode{0})}) - s.Truef(arrow.TypeEqual(expectedType, bldr.Type()), "expected: %s, got: %s", expectedType, bldr.Type()) -} - -func ExampleSparseUnionBuilder() { - dt1 := arrow.SparseUnionOf([]arrow.Field{ - {Name: "c", Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.BinaryTypes.String}}, - }, []arrow.UnionTypeCode{0}) - dt2 := arrow.StructOf(arrow.Field{Name: "a", Type: dt1}) - - pool := memory.DefaultAllocator - bldr := array.NewStructBuilder(pool, dt2) - defer bldr.Release() - - bldrDt1 := bldr.FieldBuilder(0).(*array.SparseUnionBuilder) - binDictBldr := bldrDt1.Child(0).(*array.BinaryDictionaryBuilder) - - bldr.Append(true) - bldrDt1.Append(0) - binDictBldr.AppendString("foo") - - bldr.Append(true) - bldrDt1.Append(0) - binDictBldr.AppendString("bar") - - out := bldr.NewArray().(*array.Struct) - defer out.Release() - - fmt.Println(out) - - // Output: - // {[{c=foo} {c=bar}]} -} - -func TestUnions(t *testing.T) { - suite.Run(t, new(UnionFactorySuite)) - suite.Run(t, new(UnionBuilderSuite)) -} - -func TestNestedUnionStructDict(t *testing.T) { - // ARROW-18274 - dt1 := arrow.SparseUnionOf([]arrow.Field{ - {Name: "c", Type: &arrow.DictionaryType{ - IndexType: arrow.PrimitiveTypes.Uint16, - ValueType: arrow.BinaryTypes.String, - Ordered: false, - }}, - }, []arrow.UnionTypeCode{0}) - dt2 := arrow.StructOf( - arrow.Field{Name: "b", Type: dt1}, - ) - dt3 := arrow.SparseUnionOf([]arrow.Field{ - {Name: "a", Type: dt2}, - }, []arrow.UnionTypeCode{0}) - pool := memory.NewGoAllocator() - - builder := array.NewSparseUnionBuilder(pool, dt3) - defer builder.Release() - arr := builder.NewArray() - defer arr.Release() - assert.Equal(t, 0, arr.Len()) -} - -func TestNestedUnionDictUnion(t *testing.T) { - dt1 := arrow.SparseUnionOf([]arrow.Field{ - {Name: "c", Type: &arrow.DictionaryType{ - IndexType: arrow.PrimitiveTypes.Uint16, - ValueType: arrow.BinaryTypes.String, - Ordered: false, - }}, - }, []arrow.UnionTypeCode{0}) - dt2 := arrow.SparseUnionOf([]arrow.Field{ - {Name: "a", Type: dt1}, - }, []arrow.UnionTypeCode{0}) - pool := memory.NewGoAllocator() - - builder := array.NewSparseUnionBuilder(pool, dt2) - defer builder.Release() - arr := builder.NewArray() - defer arr.Release() - assert.Equal(t, 0, arr.Len()) -} diff --git a/go/arrow/array/util.go b/go/arrow/array/util.go deleted file mode 100644 index 2b41dadaf4bfc..0000000000000 --- a/go/arrow/array/util.go +++ /dev/null @@ -1,523 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array - -import ( - "errors" - "fmt" - "io" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/hashing" - "github.com/apache/arrow/go/v18/internal/json" -) - -func min(a, b int) int { - if a < b { - return a - } - return b -} - -type fromJSONCfg struct { - multiDocument bool - startOffset int64 - useNumber bool -} - -type FromJSONOption func(*fromJSONCfg) - -func WithMultipleDocs() FromJSONOption { - return func(c *fromJSONCfg) { - c.multiDocument = true - } -} - -// WithStartOffset attempts to start decoding from the reader at the offset -// passed in. If using this option the reader must fulfill the io.ReadSeeker -// interface, or else an error will be returned. -// -// It will call Seek(off, io.SeekStart) on the reader -func WithStartOffset(off int64) FromJSONOption { - return func(c *fromJSONCfg) { - c.startOffset = off - } -} - -// WithUseNumber enables the 'UseNumber' option on the json decoder, using -// the json.Number type instead of assuming float64 for numbers. This is critical -// if you have numbers that are larger than what can fit into the 53 bits of -// an IEEE float64 mantissa and want to preserve its value. -func WithUseNumber() FromJSONOption { - return func(c *fromJSONCfg) { - c.useNumber = true - } -} - -// FromJSON creates an arrow.Array from a corresponding JSON stream and defined data type. If the types in the -// json do not match the type provided, it will return errors. This is *not* the integration test format -// and should not be used as such. This intended to be used by consumers more similarly to the current exposing of -// the csv reader/writer. It also returns the input offset in the reader where it finished decoding since buffering -// by the decoder could leave the reader's cursor past where the parsing finished if attempting to parse multiple json -// arrays from one stream. -// -// All the Array types implement json.Marshaller and thus can be written to json -// using the json.Marshal function -// -// The JSON provided must be formatted in one of two ways: -// -// Default: the top level of the json must be a list which matches the type specified exactly -// Example: `[1, 2, 3, 4, 5]` for any integer type or `[[...], null, [], .....]` for a List type -// Struct arrays are represented a list of objects: `[{"foo": 1, "bar": "moo"}, {"foo": 5, "bar": "baz"}]` -// -// Using WithMultipleDocs: -// If the JSON provided is multiple newline separated json documents, then use this option -// and each json document will be treated as a single row of the array. This is most useful for record batches -// and interacting with other processes that use json. For example: -// `{"col1": 1, "col2": "row1", "col3": ...}\n{"col1": 2, "col2": "row2", "col3": ...}\n.....` -// -// Duration values get formated upon marshalling as a string consisting of their numeric -// value followed by the unit suffix such as "10s" for a value of 10 and unit of Seconds. -// with "ms" for millisecond, "us" for microsecond, and "ns" for nanosecond as the suffixes. -// Unmarshalling duration values is more permissive since it first tries to use Go's -// time.ParseDuration function which means it allows values in the form 3h25m0.3s in addition -// to the same values which are output. -// -// Interval types are marshalled / unmarshalled as follows: -// -// MonthInterval is marshalled as an object with the format: -// { "months": #} -// DayTimeInterval is marshalled using Go's regular marshalling of structs: -// { "days": #, "milliseconds": # } -// MonthDayNanoInterval values are marshalled the same as DayTime using Go's struct marshalling: -// { "months": #, "days": #, "nanoseconds": # } -// -// Times use a format of HH:MM or HH:MM:SS[.zzz] where the fractions of a second cannot -// exceed the precision allowed by the time unit, otherwise unmarshalling will error. -// -// # Dates use YYYY-MM-DD format -// -// Timestamps use RFC3339Nano format except without a timezone, all of the following are valid: -// -// YYYY-MM-DD -// YYYY-MM-DD[T]HH -// YYYY-MM-DD[T]HH:MM -// YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzzzz] -// -// The fractions of a second cannot exceed the precision allowed by the timeunit of the datatype. -// -// When processing structs as objects order of keys does not matter, but keys cannot be repeated. -func FromJSON(mem memory.Allocator, dt arrow.DataType, r io.Reader, opts ...FromJSONOption) (arr arrow.Array, offset int64, err error) { - var cfg fromJSONCfg - for _, o := range opts { - o(&cfg) - } - - if cfg.startOffset != 0 { - seeker, ok := r.(io.ReadSeeker) - if !ok { - return nil, 0, errors.New("using StartOffset option requires reader to be a ReadSeeker, cannot seek") - } - - seeker.Seek(cfg.startOffset, io.SeekStart) - } - - bldr := NewBuilder(mem, dt) - defer bldr.Release() - - dec := json.NewDecoder(r) - defer func() { - if errors.Is(err, io.EOF) { - err = fmt.Errorf("failed parsing json: %w", io.ErrUnexpectedEOF) - } - }() - - if cfg.useNumber { - dec.UseNumber() - } - - if !cfg.multiDocument { - t, err := dec.Token() - if err != nil { - return nil, dec.InputOffset(), err - } - - if delim, ok := t.(json.Delim); !ok || delim != '[' { - return nil, dec.InputOffset(), fmt.Errorf("json doc must be an array, found %s", delim) - } - } - - if err = bldr.Unmarshal(dec); err != nil { - return nil, dec.InputOffset(), err - } - - if !cfg.multiDocument { - // consume the last ']' - if _, err = dec.Token(); err != nil { - return nil, dec.InputOffset(), err - } - } - - return bldr.NewArray(), dec.InputOffset(), nil -} - -// RecordToStructArray constructs a struct array from the columns of the record batch -// by referencing them, zero-copy. -func RecordToStructArray(rec arrow.Record) *Struct { - cols := make([]arrow.ArrayData, rec.NumCols()) - for i, c := range rec.Columns() { - cols[i] = c.Data() - } - - data := NewData(arrow.StructOf(rec.Schema().Fields()...), int(rec.NumRows()), []*memory.Buffer{nil}, cols, 0, 0) - defer data.Release() - - return NewStructData(data) -} - -// RecordFromStructArray is a convenience function for converting a struct array into -// a record batch without copying the data. If the passed in schema is nil, the fields -// of the struct will be used to define the record batch. Otherwise the passed in -// schema will be used to create the record batch. If passed in, the schema must match -// the fields of the struct column. -func RecordFromStructArray(in *Struct, schema *arrow.Schema) arrow.Record { - if schema == nil { - schema = arrow.NewSchema(in.DataType().(*arrow.StructType).Fields(), nil) - } - - return NewRecord(schema, in.fields, int64(in.Len())) -} - -// RecordFromJSON creates a record batch from JSON data. See array.FromJSON for the details -// of formatting and logic. -// -// A record batch from JSON is equivalent to reading a struct array in from json and then -// converting it to a record batch. -func RecordFromJSON(mem memory.Allocator, schema *arrow.Schema, r io.Reader, opts ...FromJSONOption) (arrow.Record, int64, error) { - st := arrow.StructOf(schema.Fields()...) - arr, off, err := FromJSON(mem, st, r, opts...) - if err != nil { - return nil, off, err - } - defer arr.Release() - - return RecordFromStructArray(arr.(*Struct), schema), off, nil -} - -// RecordToJSON writes out the given record following the format of each row is a single object -// on a single line of the output. -func RecordToJSON(rec arrow.Record, w io.Writer) error { - enc := json.NewEncoder(w) - - fields := rec.Schema().Fields() - - cols := make(map[string]interface{}) - for i := 0; int64(i) < rec.NumRows(); i++ { - for j, c := range rec.Columns() { - cols[fields[j].Name] = c.GetOneForMarshal(i) - } - if err := enc.Encode(cols); err != nil { - return err - } - } - return nil -} - -func TableFromJSON(mem memory.Allocator, sc *arrow.Schema, recJSON []string, opt ...FromJSONOption) (arrow.Table, error) { - batches := make([]arrow.Record, len(recJSON)) - for i, batchJSON := range recJSON { - batch, _, err := RecordFromJSON(mem, sc, strings.NewReader(batchJSON), opt...) - if err != nil { - return nil, err - } - defer batch.Release() - batches[i] = batch - } - return NewTableFromRecords(sc, batches), nil -} - -func GetDictArrayData(mem memory.Allocator, valueType arrow.DataType, memoTable hashing.MemoTable, startOffset int) (*Data, error) { - dictLen := memoTable.Size() - startOffset - buffers := []*memory.Buffer{nil, nil} - - buffers[1] = memory.NewResizableBuffer(mem) - defer buffers[1].Release() - - switch tbl := memoTable.(type) { - case hashing.NumericMemoTable: - nbytes := tbl.TypeTraits().BytesRequired(dictLen) - buffers[1].Resize(nbytes) - tbl.WriteOutSubset(startOffset, buffers[1].Bytes()) - case *hashing.BinaryMemoTable: - switch valueType.ID() { - case arrow.BINARY, arrow.STRING: - buffers = append(buffers, memory.NewResizableBuffer(mem)) - defer buffers[2].Release() - - buffers[1].Resize(arrow.Int32Traits.BytesRequired(dictLen + 1)) - offsets := arrow.Int32Traits.CastFromBytes(buffers[1].Bytes()) - tbl.CopyOffsetsSubset(startOffset, offsets) - - valuesz := offsets[len(offsets)-1] - offsets[0] - buffers[2].Resize(int(valuesz)) - tbl.CopyValuesSubset(startOffset, buffers[2].Bytes()) - case arrow.LARGE_BINARY, arrow.LARGE_STRING: - buffers = append(buffers, memory.NewResizableBuffer(mem)) - defer buffers[2].Release() - - buffers[1].Resize(arrow.Int64Traits.BytesRequired(dictLen + 1)) - offsets := arrow.Int64Traits.CastFromBytes(buffers[1].Bytes()) - tbl.CopyLargeOffsetsSubset(startOffset, offsets) - - valuesz := offsets[len(offsets)-1] - offsets[0] - buffers[2].Resize(int(valuesz)) - tbl.CopyValuesSubset(startOffset, buffers[2].Bytes()) - default: // fixed size - bw := int(bitutil.BytesForBits(int64(valueType.(arrow.FixedWidthDataType).BitWidth()))) - buffers[1].Resize(dictLen * bw) - tbl.CopyFixedWidthValues(startOffset, bw, buffers[1].Bytes()) - } - default: - return nil, fmt.Errorf("arrow/array: dictionary unifier unimplemented type: %s", valueType) - } - - var nullcount int - if idx, ok := memoTable.GetNull(); ok && idx >= startOffset { - buffers[0] = memory.NewResizableBuffer(mem) - defer buffers[0].Release() - nullcount = 1 - buffers[0].Resize(int(bitutil.BytesForBits(int64(dictLen)))) - memory.Set(buffers[0].Bytes(), 0xFF) - bitutil.ClearBit(buffers[0].Bytes(), idx) - } - - return NewData(valueType, dictLen, buffers, nil, nullcount, 0), nil -} - -func DictArrayFromJSON(mem memory.Allocator, dt *arrow.DictionaryType, indicesJSON, dictJSON string) (arrow.Array, error) { - indices, _, err := FromJSON(mem, dt.IndexType, strings.NewReader(indicesJSON)) - if err != nil { - return nil, err - } - defer indices.Release() - - dict, _, err := FromJSON(mem, dt.ValueType, strings.NewReader(dictJSON)) - if err != nil { - return nil, err - } - defer dict.Release() - - return NewDictionaryArray(dt, indices, dict), nil -} - -func ChunkedFromJSON(mem memory.Allocator, dt arrow.DataType, chunkStrs []string, opts ...FromJSONOption) (*arrow.Chunked, error) { - chunks := make([]arrow.Array, len(chunkStrs)) - defer func() { - for _, c := range chunks { - if c != nil { - c.Release() - } - } - }() - - var err error - for i, c := range chunkStrs { - chunks[i], _, err = FromJSON(mem, dt, strings.NewReader(c), opts...) - if err != nil { - return nil, err - } - } - - return arrow.NewChunked(dt, chunks), nil -} - -func getMaxBufferLen(dt arrow.DataType, length int) int { - bufferLen := int(bitutil.BytesForBits(int64(length))) - - maxOf := func(bl int) int { - if bl > bufferLen { - return bl - } - return bufferLen - } - - switch dt := dt.(type) { - case *arrow.DictionaryType: - bufferLen = maxOf(getMaxBufferLen(dt.ValueType, length)) - return maxOf(getMaxBufferLen(dt.IndexType, length)) - case *arrow.FixedSizeBinaryType: - return maxOf(dt.ByteWidth * length) - case arrow.FixedWidthDataType: - return maxOf(int(bitutil.BytesForBits(int64(dt.BitWidth()))) * length) - case *arrow.StructType: - for _, f := range dt.Fields() { - bufferLen = maxOf(getMaxBufferLen(f.Type, length)) - } - return bufferLen - case *arrow.SparseUnionType: - // type codes - bufferLen = maxOf(length) - // creates children of the same length of the union - for _, f := range dt.Fields() { - bufferLen = maxOf(getMaxBufferLen(f.Type, length)) - } - return bufferLen - case *arrow.DenseUnionType: - // type codes - bufferLen = maxOf(length) - // offsets - bufferLen = maxOf(arrow.Int32SizeBytes * length) - // create children of length 1 - for _, f := range dt.Fields() { - bufferLen = maxOf(getMaxBufferLen(f.Type, 1)) - } - return bufferLen - case arrow.OffsetsDataType: - return maxOf(dt.OffsetTypeTraits().BytesRequired(length + 1)) - case *arrow.FixedSizeListType: - return maxOf(getMaxBufferLen(dt.Elem(), int(dt.Len())*length)) - case arrow.ExtensionType: - return maxOf(getMaxBufferLen(dt.StorageType(), length)) - default: - panic(fmt.Errorf("arrow/array: arrayofnull not implemented for type %s", dt)) - } -} - -type nullArrayFactory struct { - mem memory.Allocator - dt arrow.DataType - len int - buf *memory.Buffer -} - -func (n *nullArrayFactory) create() *Data { - if n.buf == nil { - bufLen := getMaxBufferLen(n.dt, n.len) - n.buf = memory.NewResizableBuffer(n.mem) - n.buf.Resize(bufLen) - defer n.buf.Release() - } - - var ( - dt = n.dt - bufs = []*memory.Buffer{memory.SliceBuffer(n.buf, 0, int(bitutil.BytesForBits(int64(n.len))))} - childData []arrow.ArrayData - dictData arrow.ArrayData - ) - defer bufs[0].Release() - - if ex, ok := dt.(arrow.ExtensionType); ok { - dt = ex.StorageType() - } - - if nf, ok := dt.(arrow.NestedType); ok { - childData = make([]arrow.ArrayData, nf.NumFields()) - } - - switch dt := dt.(type) { - case *arrow.NullType: - case *arrow.DictionaryType: - bufs = append(bufs, n.buf) - arr := MakeArrayOfNull(n.mem, dt.ValueType, 0) - defer arr.Release() - dictData = arr.Data() - case arrow.FixedWidthDataType: - bufs = append(bufs, n.buf) - case arrow.BinaryDataType: - bufs = append(bufs, n.buf, n.buf) - case arrow.OffsetsDataType: - bufs = append(bufs, n.buf) - childData[0] = n.createChild(dt, 0, 0) - defer childData[0].Release() - case *arrow.FixedSizeListType: - childData[0] = n.createChild(dt, 0, n.len*int(dt.Len())) - defer childData[0].Release() - case *arrow.StructType: - for i := range dt.Fields() { - childData[i] = n.createChild(dt, i, n.len) - defer childData[i].Release() - } - case *arrow.RunEndEncodedType: - bldr := NewBuilder(n.mem, dt.RunEnds()) - defer bldr.Release() - - switch b := bldr.(type) { - case *Int16Builder: - b.Append(int16(n.len)) - case *Int32Builder: - b.Append(int32(n.len)) - case *Int64Builder: - b.Append(int64(n.len)) - } - - childData[0] = bldr.newData() - defer childData[0].Release() - childData[1] = n.createChild(dt.Encoded(), 1, 1) - defer childData[1].Release() - case arrow.UnionType: - bufs[0].Release() - bufs[0] = nil - bufs = append(bufs, n.buf) - // buffer is zeroed, but 0 may not be a valid type code - if dt.TypeCodes()[0] != 0 { - bufs[1] = memory.NewResizableBuffer(n.mem) - bufs[1].Resize(n.len) - defer bufs[1].Release() - memory.Set(bufs[1].Bytes(), byte(dt.TypeCodes()[0])) - } - - // for sparse unions we create children with the same length - childLen := n.len - if dt.Mode() == arrow.DenseMode { - // for dense unions, offsets are all 0 and make children - // with length 1 - bufs = append(bufs, n.buf) - childLen = 1 - } - for i := range dt.Fields() { - childData[i] = n.createChild(dt, i, childLen) - defer childData[i].Release() - } - } - - out := NewData(n.dt, n.len, bufs, childData, n.len, 0) - if dictData != nil { - out.SetDictionary(dictData) - } - return out -} - -func (n *nullArrayFactory) createChild(dt arrow.DataType, i, length int) *Data { - childFactory := &nullArrayFactory{ - mem: n.mem, dt: n.dt.(arrow.NestedType).Fields()[i].Type, - len: length, buf: n.buf} - return childFactory.create() -} - -// MakeArrayOfNull creates an array of size length which is all null of the given data type. -func MakeArrayOfNull(mem memory.Allocator, dt arrow.DataType, length int) arrow.Array { - if dt.ID() == arrow.NULL { - return NewNull(length) - } - - data := (&nullArrayFactory{mem: mem, dt: dt, len: length}).create() - defer data.Release() - return MakeFromData(data) -} diff --git a/go/arrow/array/util_test.go b/go/arrow/array/util_test.go deleted file mode 100644 index 114ea6e546649..0000000000000 --- a/go/arrow/array/util_test.go +++ /dev/null @@ -1,545 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package array_test - -import ( - "bufio" - "bytes" - "fmt" - "io" - "reflect" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/internal/arrdata" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/internal/json" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -var typemap = map[arrow.DataType]reflect.Type{ - arrow.PrimitiveTypes.Int8: reflect.TypeOf(int8(0)), - arrow.PrimitiveTypes.Uint8: reflect.TypeOf(uint8(0)), - arrow.PrimitiveTypes.Int16: reflect.TypeOf(int16(0)), - arrow.PrimitiveTypes.Uint16: reflect.TypeOf(uint16(0)), - arrow.PrimitiveTypes.Int32: reflect.TypeOf(int32(0)), - arrow.PrimitiveTypes.Uint32: reflect.TypeOf(uint32(0)), - arrow.PrimitiveTypes.Int64: reflect.TypeOf(int64(0)), - arrow.PrimitiveTypes.Uint64: reflect.TypeOf(uint64(0)), -} - -func TestIntegerArrsJSON(t *testing.T) { - const N = 10 - types := []arrow.DataType{ - arrow.PrimitiveTypes.Int8, - arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Int16, - arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Int32, - arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Int64, - arrow.PrimitiveTypes.Uint64, - } - - for _, tt := range types { - t.Run(fmt.Sprint(tt), func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - jsontest := make([]int, N) - vals := reflect.MakeSlice(reflect.SliceOf(typemap[tt]), N, N) - for i := 0; i < N; i++ { - vals.Index(i).Set(reflect.ValueOf(i).Convert(typemap[tt])) - jsontest[i] = i - } - - data, _ := json.Marshal(jsontest) - arr, _, err := array.FromJSON(mem, tt, bytes.NewReader(data)) - assert.NoError(t, err) - defer arr.Release() - - assert.EqualValues(t, N, arr.Len()) - assert.Zero(t, arr.NullN()) - - output, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, string(data), string(output)) - }) - t.Run(fmt.Sprint(tt)+" errors", func(t *testing.T) { - _, _, err := array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("")) - assert.Error(t, err) - - _, _, err = array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("[")) - assert.ErrorIs(t, err, io.ErrUnexpectedEOF) - - _, _, err = array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("0")) - assert.Error(t, err) - - _, _, err = array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("{}")) - assert.Error(t, err) - - _, _, err = array.FromJSON(memory.DefaultAllocator, tt, strings.NewReader("[[0]]")) - assert.EqualError(t, err, "json: cannot unmarshal [ into Go value of type "+tt.Name()) - }) - } -} - -func TestStringsJSON(t *testing.T) { - tests := []struct { - jsonstring string - values []string - valids []bool - }{ - {"[]", []string{}, []bool{}}, - {`["", "foo"]`, []string{"", "foo"}, nil}, - {`["", null]`, []string{"", ""}, []bool{true, false}}, - // NUL character in string - {`["", "some\u0000char"]`, []string{"", "some\x00char"}, nil}, - // utf8 sequence in string - {"[\"\xc3\xa9\"]", []string{"\xc3\xa9"}, nil}, - // bytes < 0x20 can be represented as JSON unicode escapes - {`["\u0000\u001f"]`, []string{"\x00\x1f"}, nil}, - } - - for _, tt := range tests { - t.Run("json "+tt.jsonstring, func(t *testing.T) { - bldr := array.NewStringBuilder(memory.DefaultAllocator) - defer bldr.Release() - - bldr.AppendValues(tt.values, tt.valids) - expected := bldr.NewStringArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(tt.jsonstring)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, tt.jsonstring, string(data)) - }) - } - - for _, tt := range tests { - t.Run("large json "+tt.jsonstring, func(t *testing.T) { - bldr := array.NewLargeStringBuilder(memory.DefaultAllocator) - defer bldr.Release() - - bldr.AppendValues(tt.values, tt.valids) - expected := bldr.NewLargeStringArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.LargeString, strings.NewReader(tt.jsonstring)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, tt.jsonstring, string(data)) - }) - } - - t.Run("errors", func(t *testing.T) { - _, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader("[0]")) - assert.Error(t, err) - - _, _, err = array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader("[[]]")) - assert.Error(t, err) - }) -} - -func TestStructArrayFromJSON(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - jsonStr := `[{"hello": 3.5, "world": true, "yo": "foo"},{"hello": 3.25, "world": false, "yo": "bar"}]` - - arr, _, err := array.FromJSON(mem, arrow.StructOf( - arrow.Field{Name: "hello", Type: arrow.PrimitiveTypes.Float64}, - arrow.Field{Name: "world", Type: arrow.FixedWidthTypes.Boolean}, - arrow.Field{Name: "yo", Type: arrow.BinaryTypes.String}, - ), strings.NewReader(jsonStr)) - assert.NoError(t, err) - defer arr.Release() - - output, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, jsonStr, string(output)) -} - -func TestArrayFromJSONMulti(t *testing.T) { - arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.StructOf( - arrow.Field{Name: "hello", Type: arrow.PrimitiveTypes.Float64}, - arrow.Field{Name: "world", Type: arrow.FixedWidthTypes.Boolean}, - arrow.Field{Name: "yo", Type: arrow.BinaryTypes.String}, - ), strings.NewReader("{\"hello\": 3.5, \"world\": true, \"yo\": \"foo\"}\n{\"hello\": 3.25, \"world\": false, \"yo\": \"bar\"}\n"), - array.WithMultipleDocs()) - assert.NoError(t, err) - defer arr.Release() - - assert.EqualValues(t, 2, arr.Len()) - assert.Zero(t, arr.NullN()) -} - -func TestNestedJSONArrs(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - jsonStr := `[{"hello": 1.5, "world": [1, 2, 3, 4], "yo": [{"foo": "2005-05-06", "bar": "15:02:04.123"},{"foo": "1956-01-02", "bar": "02:10:00"}]}]` - - arr, _, err := array.FromJSON(mem, arrow.StructOf( - arrow.Field{Name: "hello", Type: arrow.PrimitiveTypes.Float64}, - arrow.Field{Name: "world", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32)}, - arrow.Field{Name: "yo", Type: arrow.FixedSizeListOf(2, arrow.StructOf( - arrow.Field{Name: "foo", Type: arrow.FixedWidthTypes.Date32}, - arrow.Field{Name: "bar", Type: arrow.FixedWidthTypes.Time32ms}, - ))}, - ), strings.NewReader(jsonStr)) - assert.NoError(t, err) - defer arr.Release() - - v, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, jsonStr, string(v)) -} - -func TestGetNullsFromJSON(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - jsonStr := `[ - {"yo": "thing", "arr": null, "nuf": {"ps": "今日は"}}, - {"yo": null, "nuf": {"ps": null}, "arr": []}, - { "nuf": null, "yo": "今日は", "arr": [1,2,3]} - ]` - - rec, _, err := array.RecordFromJSON(mem, arrow.NewSchema([]arrow.Field{ - {Name: "yo", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "arr", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}, - {Name: "nuf", Type: arrow.StructOf(arrow.Field{Name: "ps", Type: arrow.BinaryTypes.String, Nullable: true}), Nullable: true}, - }, nil), strings.NewReader(jsonStr)) - assert.NoError(t, err) - defer rec.Release() - - assert.EqualValues(t, 3, rec.NumCols()) - assert.EqualValues(t, 3, rec.NumRows()) - - data, err := json.Marshal(rec) - assert.NoError(t, err) - assert.JSONEq(t, jsonStr, string(data)) -} - -func TestDurationsJSON(t *testing.T) { - tests := []struct { - unit arrow.TimeUnit - jsonstr string - values []arrow.Duration - }{ - {arrow.Second, `["1s", "2s", "3s", "4s", "5s"]`, []arrow.Duration{1, 2, 3, 4, 5}}, - {arrow.Millisecond, `["1ms", "2ms", "3ms", "4ms", "5ms"]`, []arrow.Duration{1, 2, 3, 4, 5}}, - {arrow.Microsecond, `["1us", "2us", "3us", "4us", "5us"]`, []arrow.Duration{1, 2, 3, 4, 5}}, - {arrow.Nanosecond, `["1ns", "2ns", "3ns", "4ns", "5ns"]`, []arrow.Duration{1, 2, 3, 4, 5}}, - } - for _, tt := range tests { - dtype := &arrow.DurationType{Unit: tt.unit} - bldr := array.NewDurationBuilder(memory.DefaultAllocator, dtype) - defer bldr.Release() - - bldr.AppendValues(tt.values, nil) - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, dtype, strings.NewReader(tt.jsonstr)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - } -} - -func TestTimestampsJSON(t *testing.T) { - tests := []struct { - unit arrow.TimeUnit - jsonstr string - values []arrow.Timestamp - }{ - {arrow.Second, `["1970-01-01", "2000-02-29", "3989-07-14", "1900-02-28"]`, []arrow.Timestamp{0, 951782400, 63730281600, -2203977600}}, - {arrow.Nanosecond, `["1970-01-01", "2000-02-29", "1900-02-28"]`, []arrow.Timestamp{0, 951782400000000000, -2203977600000000000}}, - } - - for _, tt := range tests { - dtype := &arrow.TimestampType{Unit: tt.unit} - bldr := array.NewTimestampBuilder(memory.DefaultAllocator, dtype) - defer bldr.Release() - - bldr.AppendValues(tt.values, nil) - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, dtype, strings.NewReader(tt.jsonstr)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - } -} - -func TestDateJSON(t *testing.T) { - t.Run("date32", func(t *testing.T) { - bldr := array.NewDate32Builder(memory.DefaultAllocator) - defer bldr.Release() - - jsonstr := `["1970-01-06", null, "1970-02-12", 0]` - jsonExp := `["1970-01-06", null, "1970-02-12", "1970-01-01"]` - - bldr.AppendValues([]arrow.Date32{5, 0, 42, 0}, []bool{true, false, true, true}) - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.FixedWidthTypes.Date32, strings.NewReader(jsonstr)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, jsonExp, string(data)) - }) - t.Run("date64", func(t *testing.T) { - bldr := array.NewDate64Builder(memory.DefaultAllocator) - defer bldr.Release() - - jsonstr := `["1970-01-02", null, "2286-11-20", 86400000]` - jsonExp := `["1970-01-02", null, "2286-11-20", "1970-01-02"]` - - bldr.AppendValues([]arrow.Date64{86400000, 0, 9999936000000, 86400000}, []bool{true, false, true, true}) - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.FixedWidthTypes.Date64, strings.NewReader(jsonstr)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, jsonExp, string(data)) - }) -} - -func TestTimeJSON(t *testing.T) { - tententen := 60*(60*(10)+10) + 10 - tests := []struct { - dt arrow.DataType - jsonstr string - jsonexp string - valueadd int - }{ - {arrow.FixedWidthTypes.Time32s, `[null, "10:10:10", 36610]`, `[null, "10:10:10", "10:10:10"]`, 123}, - {arrow.FixedWidthTypes.Time32ms, `[null, "10:10:10.123", 36610123]`, `[null, "10:10:10.123", "10:10:10.123"]`, 456}, - {arrow.FixedWidthTypes.Time64us, `[null, "10:10:10.123456", 36610123456]`, `[null, "10:10:10.123456", "10:10:10.123456"]`, 789}, - {arrow.FixedWidthTypes.Time64ns, `[null, "10:10:10.123456789", 36610123456789]`, `[null, "10:10:10.123456789", "10:10:10.123456789"]`, 0}, - } - - for _, tt := range tests { - t.Run(fmt.Sprint(tt.dt), func(t *testing.T) { - defer func() { - tententen = 1000*tententen + tt.valueadd - }() - - bldr := array.NewBuilder(memory.DefaultAllocator, tt.dt) - defer bldr.Release() - - switch tt.dt.ID() { - case arrow.TIME32: - bldr.(*array.Time32Builder).AppendValues([]arrow.Time32{0, arrow.Time32(tententen), arrow.Time32(tententen)}, []bool{false, true, true}) - case arrow.TIME64: - bldr.(*array.Time64Builder).AppendValues([]arrow.Time64{0, arrow.Time64(tententen), arrow.Time64(tententen)}, []bool{false, true, true}) - } - - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, tt.dt, strings.NewReader(tt.jsonstr)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, tt.jsonexp, string(data)) - }) - } -} - -func TestDecimal128JSON(t *testing.T) { - dt := &arrow.Decimal128Type{Precision: 10, Scale: 4} - bldr := array.NewDecimal128Builder(memory.DefaultAllocator, dt) - defer bldr.Release() - - bldr.AppendValues([]decimal128.Num{decimal128.FromU64(1234567), {}, decimal128.FromI64(-789000)}, []bool{true, false, true}) - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, dt, strings.NewReader(`["123.4567", null, "-78.9000"]`)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, `["123.4567", null, "-78.9"]`, string(data)) -} - -func TestDecimal256JSON(t *testing.T) { - dt := &arrow.Decimal256Type{Precision: 10, Scale: 4} - bldr := array.NewDecimal256Builder(memory.DefaultAllocator, dt) - defer bldr.Release() - - bldr.AppendValues([]decimal256.Num{decimal256.FromU64(1234567), {}, decimal256.FromI64(-789000)}, []bool{true, false, true}) - expected := bldr.NewArray() - defer expected.Release() - - arr, _, err := array.FromJSON(memory.DefaultAllocator, dt, strings.NewReader(`["123.4567", null, "-78.9000"]`)) - assert.NoError(t, err) - defer arr.Release() - - assert.Truef(t, array.Equal(expected, arr), "expected: %s\ngot: %s\n", expected, arr) - - data, err := json.Marshal(arr) - assert.NoError(t, err) - assert.JSONEq(t, `["123.4567", null, "-78.9"]`, string(data)) -} - -func TestArrRecordsJSONRoundTrip(t *testing.T) { - for k, v := range arrdata.Records { - if k == "decimal128" || k == "decimal256" || k == "fixed_width_types" { - // test these separately since the sample data in the arrdata - // records doesn't lend itself to exactness when going to/from - // json. The fixed_width_types one uses negative values for - // time32 and time64 which correctly get interpreted into times, - // but re-encoding them in json produces the normalized positive - // values instead of re-creating negative ones. - // the decimal128/decimal256 values don't get parsed *exactly* due to fun - // float weirdness due to their size, so smaller tests will work fine. - continue - } - t.Run(k, func(t *testing.T) { - var buf bytes.Buffer - assert.NotPanics(t, func() { - enc := json.NewEncoder(&buf) - for _, r := range v { - if err := enc.Encode(r); err != nil { - panic(err) - } - } - }) - - rdr := bytes.NewReader(buf.Bytes()) - var cur int64 - - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - for _, r := range v { - rec, off, err := array.RecordFromJSON(mem, r.Schema(), rdr, array.WithStartOffset(cur)) - assert.NoError(t, err) - defer rec.Release() - - assert.Truef(t, array.RecordApproxEqual(r, rec), "expected: %s\ngot: %s\n", r, rec) - cur += off - } - }) - } -} - -func TestStructBuilderJSONUnknownNested(t *testing.T) { - dt := arrow.StructOf( - arrow.Field{Name: "region", Type: arrow.BinaryTypes.String}, - arrow.Field{Name: "model", Type: arrow.PrimitiveTypes.Int32}, - arrow.Field{Name: "sales", Type: arrow.PrimitiveTypes.Float32}) - - const data = `[ - {"region": "NY", "model": "3", "sales": 742.0}, - {"region": "CT", "model": "5", "sales": 742.0} - ]` - - const dataWithExtra = `[ - {"region": "NY", "model": "3", "sales": 742.0, "extra": 1234}, - {"region": "CT", "model": "5", "sales": 742.0, "extra_array": [1234], "extra_obj": {"nested": ["deeply"]}} - ]` - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - arr, _, err := array.FromJSON(mem, dt, strings.NewReader(data)) - require.NoError(t, err) - require.NotNil(t, arr) - defer arr.Release() - - arr2, _, err := array.FromJSON(mem, dt, strings.NewReader(dataWithExtra)) - require.NoError(t, err) - require.NotNil(t, arr2) - defer arr2.Release() - - assert.Truef(t, array.Equal(arr, arr2), "expected: %s\n actual: %s", arr, arr2) -} - -func TestRecordBuilderUnmarshalJSONExtraFields(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema([]arrow.Field{ - {Name: "region", Type: arrow.BinaryTypes.String}, - {Name: "model", Type: arrow.PrimitiveTypes.Int32}, - {Name: "sales", Type: arrow.PrimitiveTypes.Float32}, - }, nil) - - bldr := array.NewRecordBuilder(mem, schema) - defer bldr.Release() - - const data = `{"region": "NY", "model": "3", "sales": 742.0, "extra": 1234} - {"region": "NY", "model": "3", "sales": 742.0, "extra_array": [1234], "extra_obj": {"nested": ["deeply"]}}` - - s := bufio.NewScanner(strings.NewReader(data)) - require.True(t, s.Scan()) - require.NoError(t, bldr.UnmarshalJSON(s.Bytes())) - - rec1 := bldr.NewRecord() - defer rec1.Release() - - require.True(t, s.Scan()) - require.NoError(t, bldr.UnmarshalJSON(s.Bytes())) - - rec2 := bldr.NewRecord() - defer rec2.Release() - - assert.Truef(t, array.RecordEqual(rec1, rec2), "expected: %s\nactual: %s", rec1, rec2) -} diff --git a/go/arrow/arrio/arrio.go b/go/arrow/arrio/arrio.go deleted file mode 100644 index 53215c81f75eb..0000000000000 --- a/go/arrow/arrio/arrio.go +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package arrio exposes functions to manipulate records, exposing and using -// interfaces not unlike the ones defined in the stdlib io package. -package arrio - -import ( - "errors" - "io" - - "github.com/apache/arrow/go/v18/arrow" -) - -// Reader is the interface that wraps the Read method. -type Reader interface { - // Read reads the current record from the underlying stream and an error, if any. - // When the Reader reaches the end of the underlying stream, it returns (nil, io.EOF). - Read() (arrow.Record, error) -} - -// ReaderAt is the interface that wraps the ReadAt method. -type ReaderAt interface { - // ReadAt reads the i-th record from the underlying stream and an error, if any. - ReadAt(i int64) (arrow.Record, error) -} - -// Writer is the interface that wraps the Write method. -type Writer interface { - Write(rec arrow.Record) error -} - -// Copy copies all the records available from src to dst. -// Copy returns the number of records copied and the first error -// encountered while copying, if any. -// -// A successful Copy returns err == nil, not err == EOF. Because Copy is -// defined to read from src until EOF, it does not treat an EOF from Read as an -// error to be reported. -func Copy(dst Writer, src Reader) (n int64, err error) { - for { - rec, err := src.Read() - if err != nil { - if errors.Is(err, io.EOF) { - return n, nil - } - return n, err - } - err = dst.Write(rec) - if err != nil { - return n, err - } - n++ - } -} - -// CopyN copies n records (or until an error) from src to dst. It returns the -// number of records copied and the earliest error encountered while copying. On -// return, written == n if and only if err == nil. -func CopyN(dst Writer, src Reader, n int64) (written int64, err error) { - for ; written < n; written++ { - rec, err := src.Read() - if err != nil { - if errors.Is(err, io.EOF) && written == n { - return written, nil - } - return written, err - } - err = dst.Write(rec) - if err != nil { - return written, err - } - } - - if written != n && err == nil { - err = io.EOF - } - return written, err -} diff --git a/go/arrow/arrio/arrio_test.go b/go/arrow/arrio/arrio_test.go deleted file mode 100644 index 26863ec252bf7..0000000000000 --- a/go/arrow/arrio/arrio_test.go +++ /dev/null @@ -1,197 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package arrio_test - -import ( - "fmt" - "io" - "os" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/arrio" - "github.com/apache/arrow/go/v18/arrow/internal/arrdata" - "github.com/apache/arrow/go/v18/arrow/ipc" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -type copyKind int - -const ( - fileKind copyKind = iota - streamKind -) - -func (k copyKind) write(t *testing.T, f *os.File, mem memory.Allocator, schema *arrow.Schema, recs []arrow.Record) { - t.Helper() - - switch k { - case fileKind: - arrdata.WriteFile(t, f, mem, schema, recs) - case streamKind: - arrdata.WriteStream(t, f, mem, schema, recs) - default: - panic("invalid copyKind") - } -} - -func (k copyKind) check(t *testing.T, f *os.File, mem memory.Allocator, schema *arrow.Schema, recs []arrow.Record) { - t.Helper() - - switch k { - case fileKind: - arrdata.CheckArrowFile(t, f, mem, schema, recs) - case streamKind: - arrdata.CheckArrowStream(t, f, mem, schema, recs) - default: - panic("invalid copyKind") - } -} - -func TestCopy(t *testing.T) { - tempDir := t.TempDir() - - for _, tc := range []struct { - name string - src, dst copyKind - }{ - {name: "file2file", src: fileKind, dst: fileKind}, - {name: "file2stream", src: fileKind, dst: streamKind}, - {name: "stream2file", src: streamKind, dst: fileKind}, - {name: "stream2stream", src: streamKind, dst: streamKind}, - } { - t.Run(tc.name, func(t *testing.T) { - for name, recs := range arrdata.Records { - t.Run(name, func(t *testing.T) { - for _, tcopy := range []struct { - n int - want int - err error - }{ - {-1, len(recs), nil}, - {1, 1, nil}, - {0, 0, nil}, - {len(recs), len(recs), nil}, - {len(recs) + 1, len(recs), io.EOF}, - } { - t.Run(fmt.Sprintf("-copy-n=%d", tcopy.n), func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - f, err := os.CreateTemp(tempDir, "go-arrow-copy-") - if err != nil { - t.Fatal(err) - } - defer f.Close() - - o, err := os.CreateTemp(tempDir, "go-arrow-copy-") - if err != nil { - t.Fatal(err) - } - defer o.Close() - - tc.src.write(t, f, mem, recs[0].Schema(), recs) - tc.src.check(t, f, mem, recs[0].Schema(), recs) - - _, err = f.Seek(0, io.SeekStart) - if err != nil { - t.Fatal(err) - } - - var r arrio.Reader - switch tc.src { - case fileKind: - rr, err := ipc.NewFileReader(f, ipc.WithSchema(recs[0].Schema()), ipc.WithAllocator(mem)) - if err != nil { - t.Fatal(err) - } - defer rr.Close() - r = rr - case streamKind: - rr, err := ipc.NewReader(f, ipc.WithSchema(recs[0].Schema()), ipc.WithAllocator(mem)) - if err != nil { - t.Fatal(err) - } - defer rr.Release() - r = rr - default: - t.Fatalf("invalid src type %v", tc.src) - } - - var w interface { - arrio.Writer - io.Closer - } - - switch tc.dst { - case fileKind: - w, err = ipc.NewFileWriter(o, ipc.WithSchema(recs[0].Schema()), ipc.WithAllocator(mem)) - if err != nil { - t.Fatal(err) - } - case streamKind: - w = ipc.NewWriter(o, ipc.WithSchema(recs[0].Schema()), ipc.WithAllocator(mem)) - default: - t.Fatalf("invalid dst type %v", tc.dst) - } - defer w.Close() - - var ( - n int64 - ) - switch tcopy.n { - case -1: - n, err = arrio.Copy(w, r) - case len(recs) + 1: - n, err = arrio.CopyN(w, r, int64(tcopy.n)) - default: - n, err = arrio.CopyN(w, r, int64(tcopy.n)) - } - - switch err { - case nil: - if tcopy.err != nil { - t.Fatalf("got a nil error, want=%v", tcopy.err) - } - default: - switch tcopy.err { - case nil: - t.Fatalf("invalid error: got=%v, want=%v", err, tcopy.err) - default: - if tcopy.err.Error() != err.Error() { - t.Fatalf("invalid error: got=%v, want=%v", err, tcopy.err) - } - } - } - - if got, want := n, int64(tcopy.want); got != want { - t.Fatalf("invalid number of records copied: got=%d, want=%d", got, want) - } - - err = w.Close() - if err != nil { - t.Fatal(err) - } - - tc.dst.check(t, o, mem, recs[0].Schema(), recs[:tcopy.want]) - }) - } - }) - } - }) - } -} diff --git a/go/arrow/avro/avro2parquet/main.go b/go/arrow/avro/avro2parquet/main.go deleted file mode 100644 index ae514c5ed1fda..0000000000000 --- a/go/arrow/avro/avro2parquet/main.go +++ /dev/null @@ -1,119 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "bufio" - "bytes" - "flag" - "fmt" - "log" - "os" - "runtime/pprof" - "time" - - "github.com/apache/arrow/go/v18/arrow/avro" - "github.com/apache/arrow/go/v18/parquet" - "github.com/apache/arrow/go/v18/parquet/compress" - pq "github.com/apache/arrow/go/v18/parquet/pqarrow" -) - -var ( - cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") - filepath = flag.String("file", "", "avro ocf to convert") -) - -func main() { - flag.Parse() - if *cpuprofile != "" { - f, err := os.Create(*cpuprofile) - if err != nil { - log.Fatal("could not create CPU profile: ", err) - } - defer f.Close() // error handling omitted for example - if err := pprof.StartCPUProfile(f); err != nil { - log.Fatal("could not start CPU profile: ", err) - } - defer pprof.StopCPUProfile() - } - if *filepath == "" { - fmt.Println("no file specified") - } - chunk := 1024 * 8 - ts := time.Now() - log.Println("starting:") - info, err := os.Stat(*filepath) - if err != nil { - fmt.Println(err) - os.Exit(1) - } - filesize := info.Size() - data, err := os.ReadFile(*filepath) - if err != nil { - fmt.Println(err) - os.Exit(2) - } - fmt.Printf("file : %v\nsize: %v MB\n", filepath, float64(filesize)/1024/1024) - - r := bytes.NewReader(data) - ior := bufio.NewReaderSize(r, 4096*8) - av2arReader, err := avro.NewOCFReader(ior, avro.WithChunk(chunk)) - if err != nil { - fmt.Println(err) - os.Exit(3) - } - fp, err := os.OpenFile(*filepath+".parquet", os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644) - if err != nil { - fmt.Println(err) - os.Exit(4) - } - defer fp.Close() - pwProperties := parquet.NewWriterProperties(parquet.WithDictionaryDefault(true), - parquet.WithVersion(parquet.V2_LATEST), - parquet.WithCompression(compress.Codecs.Snappy), - parquet.WithBatchSize(1024*32), - parquet.WithDataPageSize(1024*1024), - parquet.WithMaxRowGroupLength(64*1024*1024), - ) - awProperties := pq.NewArrowWriterProperties(pq.WithStoreSchema()) - pr, err := pq.NewFileWriter(av2arReader.Schema(), fp, pwProperties, awProperties) - if err != nil { - fmt.Println(err) - os.Exit(5) - } - defer pr.Close() - fmt.Printf("parquet version: %v\n", pwProperties.Version()) - for av2arReader.Next() { - if av2arReader.Err() != nil { - fmt.Println(err) - os.Exit(6) - } - recs := av2arReader.Record() - err = pr.WriteBuffered(recs) - if err != nil { - fmt.Println(err) - os.Exit(7) - } - recs.Release() - } - if av2arReader.Err() != nil { - fmt.Println(av2arReader.Err()) - } - - pr.Close() - log.Printf("time to convert: %v\n", time.Since(ts)) -} diff --git a/go/arrow/avro/loader.go b/go/arrow/avro/loader.go deleted file mode 100644 index 26d8678e8e2be..0000000000000 --- a/go/arrow/avro/loader.go +++ /dev/null @@ -1,85 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "errors" - "fmt" - "io" -) - -func (r *OCFReader) decodeOCFToChan() { - defer close(r.avroChan) - for r.r.HasNext() { - select { - case <-r.readerCtx.Done(): - r.err = fmt.Errorf("avro decoding cancelled, %d records read", r.avroDatumCount) - return - default: - var datum any - err := r.r.Decode(&datum) - if err != nil { - if errors.Is(err, io.EOF) { - r.err = nil - return - } - r.err = err - return - } - r.avroChan <- datum - r.avroDatumCount++ - } - } -} - -func (r *OCFReader) recordFactory() { - defer close(r.recChan) - r.primed = true - recChunk := 0 - switch { - case r.chunk < 1: - for data := range r.avroChan { - err := r.ldr.loadDatum(data) - if err != nil { - r.err = err - return - } - } - r.recChan <- r.bld.NewRecord() - r.bldDone <- struct{}{} - case r.chunk >= 1: - for data := range r.avroChan { - if recChunk == 0 { - r.bld.Reserve(r.chunk) - } - err := r.ldr.loadDatum(data) - if err != nil { - r.err = err - return - } - recChunk++ - if recChunk >= r.chunk { - r.recChan <- r.bld.NewRecord() - recChunk = 0 - } - } - if recChunk != 0 { - r.recChan <- r.bld.NewRecord() - } - r.bldDone <- struct{}{} - } -} diff --git a/go/arrow/avro/reader.go b/go/arrow/avro/reader.go deleted file mode 100644 index 1463041499de2..0000000000000 --- a/go/arrow/avro/reader.go +++ /dev/null @@ -1,337 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "context" - "errors" - "fmt" - "io" - "sync/atomic" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/hamba/avro/v2/ocf" - "github.com/tidwall/sjson" - - avro "github.com/hamba/avro/v2" -) - -var ErrMismatchFields = errors.New("arrow/avro: number of records mismatch") - -// Option configures an Avro reader/writer. -type ( - Option func(config) - config *OCFReader -) - -type schemaEdit struct { - method string - path string - value any -} - -// Reader wraps goavro/OCFReader and creates array.Records from a schema. -type OCFReader struct { - r *ocf.Decoder - avroSchema string - avroSchemaEdits []schemaEdit - schema *arrow.Schema - - refs int64 - bld *array.RecordBuilder - bldMap *fieldPos - ldr *dataLoader - cur arrow.Record - err error - - primed bool - readerCtx context.Context - readCancel func() - maxOCF int - maxRec int - - avroChan chan any - avroDatumCount int64 - avroChanSize int - recChan chan arrow.Record - - bldDone chan struct{} - - recChanSize int - chunk int - mem memory.Allocator -} - -// NewReader returns a reader that reads from an Avro OCF file and creates -// arrow.Records from the converted avro data. -func NewOCFReader(r io.Reader, opts ...Option) (*OCFReader, error) { - ocfr, err := ocf.NewDecoder(r) - if err != nil { - return nil, fmt.Errorf("%w: could not create avro ocfreader", arrow.ErrInvalid) - } - - rr := &OCFReader{ - r: ocfr, - refs: 1, - chunk: 1, - avroChanSize: 500, - recChanSize: 10, - } - for _, opt := range opts { - opt(rr) - } - - rr.avroChan = make(chan any, rr.avroChanSize) - rr.recChan = make(chan arrow.Record, rr.recChanSize) - rr.bldDone = make(chan struct{}) - schema, err := avro.Parse(string(ocfr.Metadata()["avro.schema"])) - if err != nil { - return nil, fmt.Errorf("%w: could not parse avro header", arrow.ErrInvalid) - } - rr.avroSchema = schema.String() - if len(rr.avroSchemaEdits) > 0 { - // execute schema edits - for _, e := range rr.avroSchemaEdits { - err := rr.editAvroSchema(e) - if err != nil { - return nil, fmt.Errorf("%w: could not edit avro schema", arrow.ErrInvalid) - } - } - // validate edited schema - schema, err = avro.Parse(rr.avroSchema) - if err != nil { - return nil, fmt.Errorf("%w: could not parse modified avro schema", arrow.ErrInvalid) - } - } - rr.schema, err = ArrowSchemaFromAvro(schema) - if err != nil { - return nil, fmt.Errorf("%w: could not convert avro schema", arrow.ErrInvalid) - } - if rr.mem == nil { - rr.mem = memory.DefaultAllocator - } - rr.readerCtx, rr.readCancel = context.WithCancel(context.Background()) - go rr.decodeOCFToChan() - - rr.bld = array.NewRecordBuilder(rr.mem, rr.schema) - rr.bldMap = newFieldPos() - rr.ldr = newDataLoader() - for idx, fb := range rr.bld.Fields() { - mapFieldBuilders(fb, rr.schema.Field(idx), rr.bldMap) - } - rr.ldr.drawTree(rr.bldMap) - go rr.recordFactory() - return rr, nil -} - -// Reuse allows the OCFReader to be reused to read another Avro file provided the -// new Avro file has an identical schema. -func (rr *OCFReader) Reuse(r io.Reader, opts ...Option) error { - rr.Close() - rr.err = nil - ocfr, err := ocf.NewDecoder(r) - if err != nil { - return fmt.Errorf("%w: could not create avro ocfreader", arrow.ErrInvalid) - } - schema, err := avro.Parse(string(ocfr.Metadata()["avro.schema"])) - if err != nil { - return fmt.Errorf("%w: could not parse avro header", arrow.ErrInvalid) - } - if rr.avroSchema != schema.String() { - return fmt.Errorf("%w: avro schema mismatch", arrow.ErrInvalid) - } - - rr.r = ocfr - for _, opt := range opts { - opt(rr) - } - - rr.maxOCF = 0 - rr.maxRec = 0 - rr.avroDatumCount = 0 - rr.primed = false - - rr.avroChan = make(chan any, rr.avroChanSize) - rr.recChan = make(chan arrow.Record, rr.recChanSize) - rr.bldDone = make(chan struct{}) - - rr.readerCtx, rr.readCancel = context.WithCancel(context.Background()) - go rr.decodeOCFToChan() - go rr.recordFactory() - return nil -} - -// Err returns the last error encountered during the iteration over the -// underlying Avro file. -func (r *OCFReader) Err() error { return r.err } - -// AvroSchema returns the Avro schema of the Avro OCF -func (r *OCFReader) AvroSchema() string { return r.avroSchema } - -// Schema returns the converted Arrow schema of the Avro OCF -func (r *OCFReader) Schema() *arrow.Schema { return r.schema } - -// Record returns the current record that has been extracted from the -// underlying Avro OCF file. -// It is valid until the next call to Next. -func (r *OCFReader) Record() arrow.Record { return r.cur } - -// Metrics returns the maximum queue depth of the Avro record read cache and of the -// converted Arrow record cache. -func (r *OCFReader) Metrics() string { - return fmt.Sprintf("Max. OCF queue depth: %d/%d Max. record queue depth: %d/%d", r.maxOCF, r.avroChanSize, r.maxRec, r.recChanSize) -} - -// OCFRecordsReadCount returns the number of Avro datum that were read from the Avro file. -func (r *OCFReader) OCFRecordsReadCount() int64 { return r.avroDatumCount } - -// Close closes the OCFReader's Avro record read cache and converted Arrow record cache. OCFReader must -// be closed if the Avro OCF's records have not been read to completion. -func (r *OCFReader) Close() { - r.readCancel() - r.err = r.readerCtx.Err() -} - -func (r *OCFReader) editAvroSchema(e schemaEdit) error { - var err error - switch e.method { - case "set": - r.avroSchema, err = sjson.Set(r.avroSchema, e.path, e.value) - if err != nil { - return fmt.Errorf("%w: schema edit 'set %s = %v' failure - %v", arrow.ErrInvalid, e.path, e.value, err) - } - case "delete": - r.avroSchema, err = sjson.Delete(r.avroSchema, e.path) - if err != nil { - return fmt.Errorf("%w: schema edit 'delete' failure - %v", arrow.ErrInvalid, err) - } - default: - return fmt.Errorf("%w: schema edit method must be 'set' or 'delete'", arrow.ErrInvalid) - } - return nil -} - -// Next returns whether a Record can be received from the converted record queue. -// The user should check Err() after call to Next that return false to check -// if an error took place. -func (r *OCFReader) Next() bool { - if r.cur != nil { - r.cur.Release() - r.cur = nil - } - if r.maxOCF < len(r.avroChan) { - r.maxOCF = len(r.avroChan) - } - if r.maxRec < len(r.recChan) { - r.maxRec = len(r.recChan) - } - select { - case r.cur = <-r.recChan: - case <-r.bldDone: - if len(r.recChan) > 0 { - r.cur = <-r.recChan - } - } - if r.err != nil { - return false - } - - return r.cur != nil -} - -// WithAllocator specifies the Arrow memory allocator used while building records. -func WithAllocator(mem memory.Allocator) Option { - return func(cfg config) { - cfg.mem = mem - } -} - -// WithReadCacheSize specifies the size of the OCF record decode queue, default value -// is 500. -func WithReadCacheSize(n int) Option { - return func(cfg config) { - if n < 1 { - cfg.avroChanSize = 500 - } else { - cfg.avroChanSize = n - } - } -} - -// WithRecordCacheSize specifies the size of the converted Arrow record queue, default -// value is 1. -func WithRecordCacheSize(n int) Option { - return func(cfg config) { - if n < 1 { - cfg.recChanSize = 1 - } else { - cfg.recChanSize = n - } - } -} - -// WithSchemaEdit specifies modifications to the Avro schema. Supported methods are 'set' and -// 'delete'. Set sets the value for the specified path. Delete deletes the value for the specified path. -// A path is in dot syntax, such as "fields.1" or "fields.0.type". The modified Avro schema is -// validated before conversion to Arrow schema - NewOCFReader will return an error if the modified schema -// cannot be parsed. -func WithSchemaEdit(method, path string, value any) Option { - return func(cfg config) { - var e schemaEdit - e.method = method - e.path = path - e.value = value - cfg.avroSchemaEdits = append(cfg.avroSchemaEdits, e) - } -} - -// WithChunk specifies the chunk size used while reading Avro OCF files. -// -// If n is zero or 1, no chunking will take place and the reader will create -// one record per row. -// If n is greater than 1, chunks of n rows will be read. -// If n is negative, the reader will load the whole Avro OCF file into memory and -// create one big record with all the rows. -func WithChunk(n int) Option { - return func(cfg config) { - cfg.chunk = n - } -} - -// Retain increases the reference count by 1. -// Retain may be called simultaneously from multiple goroutines. -func (r *OCFReader) Retain() { - atomic.AddInt64(&r.refs, 1) -} - -// Release decreases the reference count by 1. -// When the reference count goes to zero, the memory is freed. -// Release may be called simultaneously from multiple goroutines. -func (r *OCFReader) Release() { - debug.Assert(atomic.LoadInt64(&r.refs) > 0, "too many releases") - - if atomic.AddInt64(&r.refs, -1) == 0 { - if r.cur != nil { - r.cur.Release() - } - } -} - -var _ array.RecordReader = (*OCFReader)(nil) diff --git a/go/arrow/avro/reader_test.go b/go/arrow/avro/reader_test.go deleted file mode 100644 index 2cb1a7caa801c..0000000000000 --- a/go/arrow/avro/reader_test.go +++ /dev/null @@ -1,364 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "fmt" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - hamba "github.com/hamba/avro/v2" -) - -func TestEditSchemaStringEqual(t *testing.T) { - tests := []struct { - avroSchema string - arrowSchema []arrow.Field - }{ - { - avroSchema: `{ - "fields": [ - { - "name": "inheritNull", - "type": { - "name": "Simple", - "symbols": [ - "a", - "b" - ], - "type": "enum" - } - }, - { - "name": "explicitNamespace", - "type": { - "name": "test", - "namespace": "org.hamba.avro", - "size": 12, - "type": "fixed" - } - }, - { - "name": "fullName", - "type": { - "type": "record", - "name": "fullName_data", - "namespace": "ignored", - "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", - "fields": [{ - "name": "inheritNamespace", - "type": { - "type": "enum", - "name": "Understanding", - "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", - "symbols": ["d", "e"] - } - }, { - "name": "md5", - "type": { - "name": "md5_data", - "type": "fixed", - "size": 16, - "namespace": "ignored" - } - } - ] - } - }, - { - "name": "id", - "type": "int" - }, - { - "name": "bigId", - "type": "long" - }, - { - "name": "temperature", - "type": [ - "null", - "float" - ] - }, - { - "name": "fraction", - "type": [ - "null", - "double" - ] - }, - { - "name": "is_emergency", - "type": "boolean" - }, - { - "name": "remote_ip", - "type": [ - "null", - "bytes" - ] - }, - { - "name": "person", - "type": { - "fields": [ - { - "name": "lastname", - "type": "string" - }, - { - "name": "address", - "type": { - "fields": [ - { - "name": "streetaddress", - "type": "string" - }, - { - "name": "city", - "type": "string" - } - ], - "name": "AddressUSRecord", - "type": "record" - } - }, - { - "name": "mapfield", - "type": { - "default": { - }, - "type": "map", - "values": "long" - } - }, - { - "name": "arrayField", - "type": { - "default": [ - ], - "items": "string", - "type": "array" - } - } - ], - "name": "person_data", - "type": "record" - } - }, - { - "name": "decimalField", - "type": { - "logicalType": "decimal", - "precision": 4, - "scale": 2, - "type": "bytes" - } - }, - { - "logicalType": "uuid", - "name": "uuidField", - "type": "string" - }, - { - "name": "timemillis", - "type": { - "type": "int", - "logicalType": "time-millis" - } - }, - { - "name": "timemicros", - "type": { - "type": "long", - "logicalType": "time-micros" - } - }, - { - "name": "timestampmillis", - "type": { - "type": "long", - "logicalType": "timestamp-millis" - } - }, - { - "name": "timestampmicros", - "type": { - "type": "long", - "logicalType": "timestamp-micros" - } - }, - { - "name": "duration", - "type": { - "name": "duration", - "namespace": "whyowhy", - "logicalType": "duration", - "size": 12, - "type": "fixed" - } - }, - { - "name": "date", - "type": { - "logicalType": "date", - "type": "int" - } - } - ], - "name": "Example", - "type": "record" - }`, - arrowSchema: []arrow.Field{ - { - Name: "explicitNamespace", - Type: &arrow.FixedSizeBinaryType{ByteWidth: 12}, - }, - { - Name: "fullName", - Type: arrow.StructOf( - arrow.Field{ - Name: "inheritNamespace", - Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String, Ordered: false}, - }, - arrow.Field{ - Name: "md5", - Type: &arrow.FixedSizeBinaryType{ByteWidth: 16}, - }, - ), - }, - { - Name: "id", - Type: arrow.PrimitiveTypes.Int32, - }, - { - Name: "bigId", - Type: arrow.PrimitiveTypes.Int64, - }, - { - Name: "temperature", - Type: arrow.PrimitiveTypes.Float32, - Nullable: true, - }, - { - Name: "fraction", - Type: arrow.PrimitiveTypes.Float64, - Nullable: true, - }, - { - Name: "is_emergency", - Type: arrow.FixedWidthTypes.Boolean, - }, - { - Name: "remote_ip", - Type: arrow.BinaryTypes.Binary, - Nullable: true, - }, - { - Name: "person", - Type: arrow.StructOf( - arrow.Field{ - Name: "lastname", - Type: arrow.BinaryTypes.String, - }, - arrow.Field{ - Name: "address", - Type: arrow.StructOf( - arrow.Field{ - Name: "streetaddress", - Type: arrow.BinaryTypes.String, - }, - arrow.Field{ - Name: "city", - Type: arrow.BinaryTypes.String, - }, - ), - }, - arrow.Field{ - Name: "mapfield", - Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64), - Nullable: true, - }, - arrow.Field{ - Name: "arrayField", - Type: arrow.ListOfNonNullable(arrow.BinaryTypes.String), - }, - ), - }, - { - Name: "decimalField", - Type: &arrow.Decimal128Type{Precision: 4, Scale: 2}, - }, - { - Name: "uuidField", - Type: arrow.BinaryTypes.String, - }, - { - Name: "timemillis", - Type: arrow.FixedWidthTypes.Time32ms, - }, - { - Name: "timemicros", - Type: arrow.FixedWidthTypes.Time64us, - }, - { - Name: "timestampmillis", - Type: arrow.FixedWidthTypes.Timestamp_ms, - }, - { - Name: "timestampmicros", - Type: arrow.FixedWidthTypes.Timestamp_us, - }, - { - Name: "duration", - Type: arrow.FixedWidthTypes.MonthDayNanoInterval, - }, - { - Name: "date", - Type: arrow.FixedWidthTypes.Date32, - }, - }, - }, - } - - for _, test := range tests { - t.Run("", func(t *testing.T) { - want := arrow.NewSchema(test.arrowSchema, nil) - - schema, err := hamba.ParseBytes([]byte(test.avroSchema)) - if err != nil { - t.Fatalf("%v", err) - } - r := new(OCFReader) - r.avroSchema = schema.String() - r.editAvroSchema(schemaEdit{method: "delete", path: "fields.0"}) - schema, err = hamba.Parse(r.avroSchema) - if err != nil { - t.Fatalf("%v: could not parse modified avro schema", arrow.ErrInvalid) - } - got, err := ArrowSchemaFromAvro(schema) - if err != nil { - t.Fatalf("%v", err) - } - if !(fmt.Sprintf("%+v", want.String()) == fmt.Sprintf("%+v", got.String())) { - t.Fatalf("got=%v,\n want=%v", got.String(), want.String()) - } else { - t.Logf("schema.String() comparison passed") - } - }) - } -} diff --git a/go/arrow/avro/reader_types.go b/go/arrow/avro/reader_types.go deleted file mode 100644 index dab2b33dce601..0000000000000 --- a/go/arrow/avro/reader_types.go +++ /dev/null @@ -1,875 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "bytes" - "encoding/binary" - "errors" - "fmt" - "math/big" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/extensions" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -type dataLoader struct { - idx, depth int32 - list *fieldPos - item *fieldPos - mapField *fieldPos - mapKey *fieldPos - mapValue *fieldPos - fields []*fieldPos - children []*dataLoader -} - -var ( - ErrNullStructData = errors.New("null struct data") -) - -func newDataLoader() *dataLoader { return &dataLoader{idx: 0, depth: 0} } - -// drawTree takes the tree of field builders produced by mapFieldBuilders() -// and produces another tree structure and aggregates fields whose values can -// be retrieved from a `map[string]any` into a slice of builders, and creates a hierarchy to -// deal with nested types (lists and maps). -func (d *dataLoader) drawTree(field *fieldPos) { - for _, f := range field.children() { - if f.isList || f.isMap { - if f.isList { - c := d.newListChild(f) - if !f.childrens[0].isList { - c.item = f.childrens[0] - c.drawTree(f.childrens[0]) - } else { - c.drawTree(f.childrens[0].childrens[0]) - } - } - if f.isMap { - c := d.newMapChild(f) - if !arrow.IsNested(f.childrens[1].builder.Type().ID()) { - c.mapKey = f.childrens[0] - c.mapValue = f.childrens[1] - } else { - c.mapKey = f.childrens[0] - m := c.newChild() - m.mapValue = f.childrens[1] - m.drawTree(f.childrens[1]) - } - } - } else { - d.fields = append(d.fields, f) - if len(f.children()) > 0 { - d.drawTree(f) - } - } - } -} - -// loadDatum loads decoded Avro data to the schema fields' builder functions. -// Since array.StructBuilder.AppendNull() will recursively append null to all of the -// struct's fields, in the case of nil being passed to a struct's builderFunc it will -// return a ErrNullStructData error to signal that all its sub-fields can be skipped. -func (d *dataLoader) loadDatum(data any) error { - if d.list == nil && d.mapField == nil { - if d.mapValue != nil { - d.mapValue.appendFunc(data) - } - var NullParent *fieldPos - for _, f := range d.fields { - if f.parent == NullParent { - continue - } - if d.mapValue == nil { - err := f.appendFunc(f.getValue(data)) - if err != nil { - if err == ErrNullStructData { - NullParent = f - continue - } - return err - } - } else { - switch dt := data.(type) { - case nil: - err := f.appendFunc(dt) - if err != nil { - if err == ErrNullStructData { - NullParent = f - continue - } - return err - } - case []any: - if len(d.children) < 1 { - for _, e := range dt { - err := f.appendFunc(e) - if err != nil { - if err == ErrNullStructData { - NullParent = f - continue - } - return err - } - } - } else { - for _, e := range dt { - d.children[0].loadDatum(e) - } - } - case map[string]any: - err := f.appendFunc(f.getValue(dt)) - if err != nil { - if err == ErrNullStructData { - NullParent = f - continue - } - return err - } - } - - } - } - for _, c := range d.children { - if c.list != nil { - c.loadDatum(c.list.getValue(data)) - } - if c.mapField != nil { - switch dt := data.(type) { - case nil: - c.loadDatum(dt) - case map[string]any: - c.loadDatum(c.mapField.getValue(dt)) - default: - c.loadDatum(c.mapField.getValue(data)) - } - } - } - } else { - if d.list != nil { - switch dt := data.(type) { - case nil: - d.list.appendFunc(dt) - case []any: - d.list.appendFunc(dt) - for _, e := range dt { - if d.item != nil { - d.item.appendFunc(e) - } - var NullParent *fieldPos - for _, f := range d.fields { - if f.parent == NullParent { - continue - } - err := f.appendFunc(f.getValue(e)) - if err != nil { - if err == ErrNullStructData { - NullParent = f - continue - } - return err - } - } - for _, c := range d.children { - if c.list != nil { - c.loadDatum(c.list.getValue(e)) - } - if c.mapField != nil { - c.loadDatum(c.mapField.getValue(e)) - } - } - } - case map[string]any: - d.list.appendFunc(dt["array"]) - for _, e := range dt["array"].([]any) { - if d.item != nil { - d.item.appendFunc(e) - } - var NullParent *fieldPos - for _, f := range d.fields { - if f.parent == NullParent { - continue - } - err := f.appendFunc(f.getValue(e)) - if err != nil { - if err == ErrNullStructData { - NullParent = f - continue - } - return err - } - } - for _, c := range d.children { - c.loadDatum(c.list.getValue(e)) - } - } - default: - d.list.appendFunc(data) - d.item.appendFunc(dt) - } - } - if d.mapField != nil { - switch dt := data.(type) { - case nil: - d.mapField.appendFunc(dt) - case map[string]any: - - d.mapField.appendFunc(dt) - for k, v := range dt { - d.mapKey.appendFunc(k) - if d.mapValue != nil { - d.mapValue.appendFunc(v) - } else { - d.children[0].loadDatum(v) - } - } - } - } - } - return nil -} - -func (d *dataLoader) newChild() *dataLoader { - var child *dataLoader = &dataLoader{ - depth: d.depth + 1, - } - d.children = append(d.children, child) - return child -} - -func (d *dataLoader) newListChild(list *fieldPos) *dataLoader { - var child *dataLoader = &dataLoader{ - list: list, - item: list.childrens[0], - depth: d.depth + 1, - } - d.children = append(d.children, child) - return child -} - -func (d *dataLoader) newMapChild(mapField *fieldPos) *dataLoader { - var child *dataLoader = &dataLoader{ - mapField: mapField, - depth: d.depth + 1, - } - d.children = append(d.children, child) - return child -} - -type fieldPos struct { - parent *fieldPos - fieldName string - builder array.Builder - path []string - isList bool - isItem bool - isStruct bool - isMap bool - typeName string - appendFunc func(val interface{}) error - metadatas arrow.Metadata - childrens []*fieldPos - index, depth int32 -} - -func newFieldPos() *fieldPos { return &fieldPos{index: -1} } - -func (f *fieldPos) children() []*fieldPos { return f.childrens } - -func (f *fieldPos) newChild(childName string, childBuilder array.Builder, meta arrow.Metadata) *fieldPos { - var child fieldPos = fieldPos{ - parent: f, - fieldName: childName, - builder: childBuilder, - metadatas: meta, - index: int32(len(f.childrens)), - depth: f.depth + 1, - } - if f.isList { - child.isItem = true - } - child.path = child.buildNamePath() - f.childrens = append(f.childrens, &child) - return &child -} - -func (f *fieldPos) buildNamePath() []string { - var path []string - var listPath []string - cur := f - for i := f.depth - 1; i >= 0; i-- { - if cur.typeName == "" { - path = append([]string{cur.fieldName}, path...) - } else { - path = append([]string{cur.fieldName, cur.typeName}, path...) - } - if !cur.parent.isMap { - cur = cur.parent - } - } - if f.parent.parent != nil && f.parent.parent.isList { - for i := len(path) - 1; i >= 0; i-- { - if path[i] != "item" { - listPath = append([]string{path[i]}, listPath...) - } else { - return listPath - } - } - } - if f.parent != nil && f.parent.fieldName == "value" { - for i := len(path) - 1; i >= 0; i-- { - if path[i] != "value" { - listPath = append([]string{path[i]}, listPath...) - } else { - return listPath - } - } - } - return path -} - -// NamePath returns a slice of keys making up the path to the field -func (f *fieldPos) namePath() []string { return f.path } - -// GetValue retrieves the value from the map[string]any -// by following the field's key path -func (f *fieldPos) getValue(m any) any { - if _, ok := m.(map[string]any); !ok { - return m - } - for _, key := range f.namePath() { - valueMap, ok := m.(map[string]any) - if !ok { - if key == "item" { - return m - } - return nil - } - m, ok = valueMap[key] - if !ok { - return nil - } - } - return m -} - -// Avro data is loaded to Arrow arrays using the following type mapping: -// -// Avro Go Arrow -// null nil Null -// boolean bool Boolean -// bytes []byte Binary -// float float32 Float32 -// double float64 Float64 -// long int64 Int64 -// int int32 Int32 -// string string String -// array []interface{} List -// enum string Dictionary -// fixed []byte FixedSizeBinary -// map and record map[string]any Struct -// -// mapFieldBuilders builds a tree of field builders matching the Arrow schema -func mapFieldBuilders(b array.Builder, field arrow.Field, parent *fieldPos) { - f := parent.newChild(field.Name, b, field.Metadata) - switch bt := b.(type) { - case *array.BinaryBuilder: - f.appendFunc = func(data interface{}) error { - appendBinaryData(bt, data) - return nil - } - case *array.BinaryDictionaryBuilder: - // has metadata for Avro enum symbols - f.appendFunc = func(data interface{}) error { - appendBinaryDictData(bt, data) - return nil - } - // add Avro enum symbols to builder - sb := array.NewStringBuilder(memory.DefaultAllocator) - for _, v := range field.Metadata.Values() { - sb.Append(v) - } - sa := sb.NewStringArray() - bt.InsertStringDictValues(sa) - case *array.BooleanBuilder: - f.appendFunc = func(data interface{}) error { - appendBoolData(bt, data) - return nil - } - case *array.Date32Builder: - f.appendFunc = func(data interface{}) error { - appendDate32Data(bt, data) - return nil - } - case *array.Decimal128Builder: - f.appendFunc = func(data interface{}) error { - err := appendDecimal128Data(bt, data) - if err != nil { - return err - } - return nil - } - case *array.Decimal256Builder: - f.appendFunc = func(data interface{}) error { - err := appendDecimal256Data(bt, data) - if err != nil { - return err - } - return nil - } - case *extensions.UUIDBuilder: - f.appendFunc = func(data interface{}) error { - switch dt := data.(type) { - case nil: - bt.AppendNull() - case string: - err := bt.AppendValueFromString(dt) - if err != nil { - return err - } - case []byte: - err := bt.AppendValueFromString(string(dt)) - if err != nil { - return err - } - } - return nil - } - case *array.FixedSizeBinaryBuilder: - f.appendFunc = func(data interface{}) error { - appendFixedSizeBinaryData(bt, data) - return nil - } - case *array.Float32Builder: - f.appendFunc = func(data interface{}) error { - appendFloat32Data(bt, data) - return nil - } - case *array.Float64Builder: - f.appendFunc = func(data interface{}) error { - appendFloat64Data(bt, data) - return nil - } - case *array.Int32Builder: - f.appendFunc = func(data interface{}) error { - appendInt32Data(bt, data) - return nil - } - case *array.Int64Builder: - f.appendFunc = func(data interface{}) error { - appendInt64Data(bt, data) - return nil - } - case *array.LargeListBuilder: - vb := bt.ValueBuilder() - f.isList = true - mapFieldBuilders(vb, field.Type.(*arrow.LargeListType).ElemField(), f) - f.appendFunc = func(data interface{}) error { - switch dt := data.(type) { - case nil: - bt.AppendNull() - case []interface{}: - if len(dt) == 0 { - bt.AppendEmptyValue() - } else { - bt.Append(true) - } - default: - bt.Append(true) - } - return nil - } - case *array.ListBuilder: - vb := bt.ValueBuilder() - f.isList = true - mapFieldBuilders(vb, field.Type.(*arrow.ListType).ElemField(), f) - f.appendFunc = func(data interface{}) error { - switch dt := data.(type) { - case nil: - bt.AppendNull() - case []interface{}: - if len(dt) == 0 { - bt.AppendEmptyValue() - } else { - bt.Append(true) - } - default: - bt.Append(true) - } - return nil - } - case *array.MapBuilder: - // has metadata for objects in values - f.isMap = true - kb := bt.KeyBuilder() - ib := bt.ItemBuilder() - mapFieldBuilders(kb, field.Type.(*arrow.MapType).KeyField(), f) - mapFieldBuilders(ib, field.Type.(*arrow.MapType).ItemField(), f) - f.appendFunc = func(data interface{}) error { - switch data.(type) { - case nil: - bt.AppendNull() - default: - bt.Append(true) - } - return nil - } - case *array.MonthDayNanoIntervalBuilder: - f.appendFunc = func(data interface{}) error { - appendDurationData(bt, data) - return nil - } - case *array.StringBuilder: - f.appendFunc = func(data interface{}) error { - appendStringData(bt, data) - return nil - } - case *array.StructBuilder: - // has metadata for Avro Union named types - f.typeName, _ = field.Metadata.GetValue("typeName") - f.isStruct = true - // create children - for i, p := range field.Type.(*arrow.StructType).Fields() { - mapFieldBuilders(bt.FieldBuilder(i), p, f) - } - f.appendFunc = func(data interface{}) error { - switch data.(type) { - case nil: - bt.AppendNull() - return ErrNullStructData - default: - bt.Append(true) - } - return nil - } - case *array.Time32Builder: - f.appendFunc = func(data interface{}) error { - appendTime32Data(bt, data) - return nil - } - case *array.Time64Builder: - f.appendFunc = func(data interface{}) error { - appendTime64Data(bt, data) - return nil - } - case *array.TimestampBuilder: - f.appendFunc = func(data interface{}) error { - appendTimestampData(bt, data) - return nil - } - } -} - -func appendBinaryData(b *array.BinaryBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case map[string]any: - switch ct := dt["bytes"].(type) { - case nil: - b.AppendNull() - default: - b.Append(ct.([]byte)) - } - default: - b.Append(fmt.Append([]byte{}, data)) - } -} - -func appendBinaryDictData(b *array.BinaryDictionaryBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case string: - b.AppendString(dt) - case map[string]any: - switch v := dt["string"].(type) { - case nil: - b.AppendNull() - case string: - b.AppendString(v) - } - } -} - -func appendBoolData(b *array.BooleanBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case bool: - b.Append(dt) - case map[string]any: - switch v := dt["boolean"].(type) { - case nil: - b.AppendNull() - case bool: - b.Append(v) - } - } -} - -func appendDate32Data(b *array.Date32Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case int32: - b.Append(arrow.Date32(dt)) - case map[string]any: - switch v := dt["int"].(type) { - case nil: - b.AppendNull() - case int32: - b.Append(arrow.Date32(v)) - } - } -} - -func appendDecimal128Data(b *array.Decimal128Builder, data interface{}) error { - switch dt := data.(type) { - case nil: - b.AppendNull() - case []byte: - buf := bytes.NewBuffer(dt) - if len(dt) <= 38 { - var intData int64 - err := binary.Read(buf, binary.BigEndian, &intData) - if err != nil { - return err - } - b.Append(decimal128.FromI64(intData)) - } else { - var bigIntData big.Int - b.Append(decimal128.FromBigInt(bigIntData.SetBytes(buf.Bytes()))) - } - case map[string]any: - buf := bytes.NewBuffer(dt["bytes"].([]byte)) - if len(dt["bytes"].([]byte)) <= 38 { - var intData int64 - err := binary.Read(buf, binary.BigEndian, &intData) - if err != nil { - return err - } - b.Append(decimal128.FromI64(intData)) - } else { - var bigIntData big.Int - b.Append(decimal128.FromBigInt(bigIntData.SetBytes(buf.Bytes()))) - } - } - return nil -} - -func appendDecimal256Data(b *array.Decimal256Builder, data interface{}) error { - switch dt := data.(type) { - case nil: - b.AppendNull() - case []byte: - var bigIntData big.Int - buf := bytes.NewBuffer(dt) - b.Append(decimal256.FromBigInt(bigIntData.SetBytes(buf.Bytes()))) - case map[string]any: - var bigIntData big.Int - buf := bytes.NewBuffer(dt["bytes"].([]byte)) - b.Append(decimal256.FromBigInt(bigIntData.SetBytes(buf.Bytes()))) - } - return nil -} - -// Avro duration logical type annotates Avro fixed type of size 12, which stores three little-endian -// unsigned integers that represent durations at different granularities of time. The first stores -// a number in months, the second stores a number in days, and the third stores a number in milliseconds. -func appendDurationData(b *array.MonthDayNanoIntervalBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case []byte: - dur := new(arrow.MonthDayNanoInterval) - dur.Months = int32(binary.LittleEndian.Uint16(dt[:3])) - dur.Days = int32(binary.LittleEndian.Uint16(dt[4:7])) - dur.Nanoseconds = int64(binary.LittleEndian.Uint32(dt[8:]) * 1000000) - b.Append(*dur) - case map[string]any: - switch dtb := dt["bytes"].(type) { - case nil: - b.AppendNull() - case []byte: - dur := new(arrow.MonthDayNanoInterval) - dur.Months = int32(binary.LittleEndian.Uint16(dtb[:3])) - dur.Days = int32(binary.LittleEndian.Uint16(dtb[4:7])) - dur.Nanoseconds = int64(binary.LittleEndian.Uint32(dtb[8:]) * 1000000) - b.Append(*dur) - } - } -} - -func appendFixedSizeBinaryData(b *array.FixedSizeBinaryBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case []byte: - b.Append(dt) - case map[string]any: - switch v := dt["bytes"].(type) { - case nil: - b.AppendNull() - case []byte: - b.Append(v) - } - } -} - -func appendFloat32Data(b *array.Float32Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case float32: - b.Append(dt) - case map[string]any: - switch v := dt["float"].(type) { - case nil: - b.AppendNull() - case float32: - b.Append(v) - } - } -} - -func appendFloat64Data(b *array.Float64Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case float64: - b.Append(dt) - case map[string]any: - switch v := dt["double"].(type) { - case nil: - b.AppendNull() - case float64: - b.Append(v) - } - } -} - -func appendInt32Data(b *array.Int32Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case int: - b.Append(int32(dt)) - case int32: - b.Append(dt) - case map[string]any: - switch v := dt["int"].(type) { - case nil: - b.AppendNull() - case int: - b.Append(int32(v)) - case int32: - b.Append(v) - } - } -} - -func appendInt64Data(b *array.Int64Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case int: - b.Append(int64(dt)) - case int64: - b.Append(dt) - case map[string]any: - switch v := dt["long"].(type) { - case nil: - b.AppendNull() - case int: - b.Append(int64(v)) - case int64: - b.Append(v) - } - } -} - -func appendStringData(b *array.StringBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case string: - b.Append(dt) - case map[string]any: - switch v := dt["string"].(type) { - case nil: - b.AppendNull() - case string: - b.Append(v) - } - default: - b.Append(fmt.Sprint(data)) - } -} - -func appendTime32Data(b *array.Time32Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case int32: - b.Append(arrow.Time32(dt)) - case map[string]any: - switch v := dt["int"].(type) { - case nil: - b.AppendNull() - case int32: - b.Append(arrow.Time32(v)) - } - } -} - -func appendTime64Data(b *array.Time64Builder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case int64: - b.Append(arrow.Time64(dt)) - case map[string]any: - switch v := dt["long"].(type) { - case nil: - b.AppendNull() - case int64: - b.Append(arrow.Time64(v)) - } - } -} - -func appendTimestampData(b *array.TimestampBuilder, data interface{}) { - switch dt := data.(type) { - case nil: - b.AppendNull() - case int64: - b.Append(arrow.Timestamp(dt)) - case map[string]any: - switch v := dt["long"].(type) { - case nil: - b.AppendNull() - case int64: - b.Append(arrow.Timestamp(v)) - } - } -} diff --git a/go/arrow/avro/schema.go b/go/arrow/avro/schema.go deleted file mode 100644 index a6de3718d3ccf..0000000000000 --- a/go/arrow/avro/schema.go +++ /dev/null @@ -1,423 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package avro reads Avro OCF files and presents the extracted data as records -package avro - -import ( - "fmt" - "math" - "strconv" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/extensions" - "github.com/apache/arrow/go/v18/internal/utils" - avro "github.com/hamba/avro/v2" -) - -type schemaNode struct { - name string - parent *schemaNode - schema avro.Schema - union bool - nullable bool - childrens []*schemaNode - arrowField arrow.Field - schemaCache *avro.SchemaCache - index, depth int32 -} - -func newSchemaNode() *schemaNode { - var schemaCache avro.SchemaCache - return &schemaNode{name: "", index: -1, schemaCache: &schemaCache} -} - -func (node *schemaNode) schemaPath() string { - var path string - n := node - for n.parent != nil { - path = "." + n.name + path - n = n.parent - } - return path -} - -func (node *schemaNode) newChild(n string, s avro.Schema) *schemaNode { - child := &schemaNode{ - name: n, - parent: node, - schema: s, - schemaCache: node.schemaCache, - index: int32(len(node.childrens)), - depth: node.depth + 1, - } - node.childrens = append(node.childrens, child) - return child -} -func (node *schemaNode) children() []*schemaNode { return node.childrens } - -// func (node *schemaNode) nodeName() string { return node.name } - -// ArrowSchemaFromAvro returns a new Arrow schema from an Avro schema -func ArrowSchemaFromAvro(schema avro.Schema) (s *arrow.Schema, err error) { - defer func() { - if r := recover(); r != nil { - s = nil - err = utils.FormatRecoveredError("invalid avro schema", r) - } - }() - n := newSchemaNode() - n.schema = schema - c := n.newChild(n.schema.(avro.NamedSchema).Name(), n.schema) - arrowSchemafromAvro(c) - var fields []arrow.Field - for _, g := range c.children() { - fields = append(fields, g.arrowField) - } - s = arrow.NewSchema(fields, nil) - return s, nil -} - -func arrowSchemafromAvro(n *schemaNode) { - if ns, ok := n.schema.(avro.NamedSchema); ok { - n.schemaCache.Add(ns.Name(), ns) - } - switch st := n.schema.Type(); st { - case "record": - iterateFields(n) - case "enum": - n.schemaCache.Add(n.schema.(avro.NamedSchema).Name(), n.schema.(*avro.EnumSchema)) - symbols := make(map[string]string) - for index, symbol := range n.schema.(avro.PropertySchema).(*avro.EnumSchema).Symbols() { - k := strconv.FormatInt(int64(index), 10) - symbols[k] = symbol - } - var dt arrow.DictionaryType = arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: false} - sl := int64(len(symbols)) - switch { - case sl <= math.MaxUint8: - dt.IndexType = arrow.PrimitiveTypes.Uint8 - case sl > math.MaxUint8 && sl <= math.MaxUint16: - dt.IndexType = arrow.PrimitiveTypes.Uint16 - case sl > math.MaxUint16 && sl <= math.MaxUint32: - dt.IndexType = arrow.PrimitiveTypes.Uint32 - } - n.arrowField = buildArrowField(n, &dt, arrow.MetadataFrom(symbols)) - case "array": - // logical items type - c := n.newChild(n.name, n.schema.(*avro.ArraySchema).Items()) - if isLogicalSchemaType(n.schema.(*avro.ArraySchema).Items()) { - avroLogicalToArrowField(c) - } else { - arrowSchemafromAvro(c) - } - switch c.arrowField.Nullable { - case true: - n.arrowField = arrow.Field{Name: n.name, Type: arrow.ListOfField(c.arrowField), Metadata: c.arrowField.Metadata} - case false: - n.arrowField = arrow.Field{Name: n.name, Type: arrow.ListOfNonNullable(c.arrowField.Type), Metadata: c.arrowField.Metadata} - } - case "map": - n.schemaCache.Add(n.schema.(*avro.MapSchema).Values().(avro.NamedSchema).Name(), n.schema.(*avro.MapSchema).Values()) - c := n.newChild(n.name, n.schema.(*avro.MapSchema).Values()) - arrowSchemafromAvro(c) - n.arrowField = buildArrowField(n, arrow.MapOf(arrow.BinaryTypes.String, c.arrowField.Type), c.arrowField.Metadata) - case "union": - if n.schema.(*avro.UnionSchema).Nullable() { - if len(n.schema.(*avro.UnionSchema).Types()) > 1 { - n.schema = n.schema.(*avro.UnionSchema).Types()[1] - n.union = true - n.nullable = true - arrowSchemafromAvro(n) - } - } - // Avro "fixed" field type = Arrow FixedSize Primitive BinaryType - case "fixed": - n.schemaCache.Add(n.schema.(avro.NamedSchema).Name(), n.schema.(*avro.FixedSchema)) - if isLogicalSchemaType(n.schema) { - avroLogicalToArrowField(n) - } else { - n.arrowField = buildArrowField(n, &arrow.FixedSizeBinaryType{ByteWidth: n.schema.(*avro.FixedSchema).Size()}, arrow.Metadata{}) - } - case "string", "bytes", "int", "long": - if isLogicalSchemaType(n.schema) { - avroLogicalToArrowField(n) - } else { - n.arrowField = buildArrowField(n, avroPrimitiveToArrowType(string(st)), arrow.Metadata{}) - } - case "float", "double", "boolean": - n.arrowField = arrow.Field{Name: n.name, Type: avroPrimitiveToArrowType(string(st)), Nullable: n.nullable} - case "": - refSchema := n.schemaCache.Get(string(n.schema.(*avro.RefSchema).Schema().Name())) - if refSchema == nil { - panic(fmt.Errorf("could not find schema for '%v' in schema cache - %v", n.schemaPath(), n.schema.(*avro.RefSchema).Schema().Name())) - } - n.schema = refSchema - arrowSchemafromAvro(n) - case "null": - n.schemaCache.Add(n.schema.(*avro.MapSchema).Values().(avro.NamedSchema).Name(), &avro.NullSchema{}) - n.nullable = true - n.arrowField = buildArrowField(n, arrow.Null, arrow.Metadata{}) - } -} - -// iterate record Fields() -func iterateFields(n *schemaNode) { - for _, f := range n.schema.(*avro.RecordSchema).Fields() { - switch ft := f.Type().(type) { - // Avro "array" field type - case *avro.ArraySchema: - n.schemaCache.Add(f.Name(), ft.Items()) - // logical items type - c := n.newChild(f.Name(), ft.Items()) - if isLogicalSchemaType(ft.Items()) { - avroLogicalToArrowField(c) - } else { - arrowSchemafromAvro(c) - } - switch c.arrowField.Nullable { - case true: - c.arrowField = arrow.Field{Name: c.name, Type: arrow.ListOfField(c.arrowField), Metadata: c.arrowField.Metadata} - case false: - c.arrowField = arrow.Field{Name: c.name, Type: arrow.ListOfNonNullable(c.arrowField.Type), Metadata: c.arrowField.Metadata} - } - // Avro "enum" field type = Arrow dictionary type - case *avro.EnumSchema: - n.schemaCache.Add(f.Type().(*avro.EnumSchema).Name(), f.Type()) - c := n.newChild(f.Name(), f.Type()) - symbols := make(map[string]string) - for index, symbol := range ft.Symbols() { - k := strconv.FormatInt(int64(index), 10) - symbols[k] = symbol - } - var dt arrow.DictionaryType = arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: arrow.BinaryTypes.String, Ordered: false} - sl := len(symbols) - switch { - case sl <= math.MaxUint8: - dt.IndexType = arrow.PrimitiveTypes.Uint8 - case sl > math.MaxUint8 && sl <= math.MaxUint16: - dt.IndexType = arrow.PrimitiveTypes.Uint16 - case sl > math.MaxUint16 && sl <= math.MaxInt: - dt.IndexType = arrow.PrimitiveTypes.Uint32 - } - c.arrowField = buildArrowField(c, &dt, arrow.MetadataFrom(symbols)) - // Avro "fixed" field type = Arrow FixedSize Primitive BinaryType - case *avro.FixedSchema: - n.schemaCache.Add(f.Name(), f.Type()) - c := n.newChild(f.Name(), f.Type()) - if isLogicalSchemaType(f.Type()) { - avroLogicalToArrowField(c) - } else { - arrowSchemafromAvro(c) - } - case *avro.RecordSchema: - n.schemaCache.Add(f.Name(), f.Type()) - c := n.newChild(f.Name(), f.Type()) - iterateFields(c) - // Avro "map" field type - KVP with value of one type - keys are strings - case *avro.MapSchema: - n.schemaCache.Add(f.Name(), ft.Values()) - c := n.newChild(f.Name(), ft.Values()) - arrowSchemafromAvro(c) - c.arrowField = buildArrowField(c, arrow.MapOf(arrow.BinaryTypes.String, c.arrowField.Type), c.arrowField.Metadata) - case *avro.UnionSchema: - if ft.Nullable() { - if len(ft.Types()) > 1 { - n.schemaCache.Add(f.Name(), ft.Types()[1]) - c := n.newChild(f.Name(), ft.Types()[1]) - c.union = true - c.nullable = true - arrowSchemafromAvro(c) - } - } - default: - n.schemaCache.Add(f.Name(), f.Type()) - if isLogicalSchemaType(f.Type()) { - c := n.newChild(f.Name(), f.Type()) - avroLogicalToArrowField(c) - } else { - c := n.newChild(f.Name(), f.Type()) - arrowSchemafromAvro(c) - } - - } - } - var fields []arrow.Field - for _, child := range n.children() { - fields = append(fields, child.arrowField) - } - - namedSchema, ok := isNamedSchema(n.schema) - - var md arrow.Metadata - if ok && namedSchema != n.name+"_data" && n.union { - md = arrow.NewMetadata([]string{"typeName"}, []string{namedSchema}) - } - n.arrowField = buildArrowField(n, arrow.StructOf(fields...), md) -} - -func isLogicalSchemaType(s avro.Schema) bool { - lts, ok := s.(avro.LogicalTypeSchema) - if !ok { - return false - } - if lts.Logical() != nil { - return true - } - return false -} - -func isNamedSchema(s avro.Schema) (string, bool) { - if ns, ok := s.(avro.NamedSchema); ok { - return ns.FullName(), ok - } - return "", false -} - -func buildArrowField(n *schemaNode, t arrow.DataType, m arrow.Metadata) arrow.Field { - return arrow.Field{ - Name: n.name, - Type: t, - Metadata: m, - Nullable: n.nullable, - } -} - -// Avro primitive type. -// -// NOTE: Arrow Binary type is used as a catchall to avoid potential data loss. -func avroPrimitiveToArrowType(avroFieldType string) arrow.DataType { - switch avroFieldType { - // int: 32-bit signed integer - case "int": - return arrow.PrimitiveTypes.Int32 - // long: 64-bit signed integer - case "long": - return arrow.PrimitiveTypes.Int64 - // float: single precision (32-bit) IEEE 754 floating-point number - case "float": - return arrow.PrimitiveTypes.Float32 - // double: double precision (64-bit) IEEE 754 floating-point number - case "double": - return arrow.PrimitiveTypes.Float64 - // bytes: sequence of 8-bit unsigned bytes - case "bytes": - return arrow.BinaryTypes.Binary - // boolean: a binary value - case "boolean": - return arrow.FixedWidthTypes.Boolean - // string: unicode character sequence - case "string": - return arrow.BinaryTypes.String - } - return nil -} - -func avroLogicalToArrowField(n *schemaNode) { - var dt arrow.DataType - // Avro logical types - switch lt := n.schema.(avro.LogicalTypeSchema).Logical(); lt.Type() { - // The decimal logical type represents an arbitrary-precision signed decimal number of the form unscaled × 10-scale. - // A decimal logical type annotates Avro bytes or fixed types. The byte array must contain the two’s-complement - // representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified - // using an attribute. - // - // The following attributes are supported: - // scale, a JSON integer representing the scale (optional). If not specified the scale is 0. - // precision, a JSON integer representing the (maximum) precision of decimals stored in this type (required). - case "decimal": - id := arrow.DECIMAL128 - if lt.(*avro.DecimalLogicalSchema).Precision() > decimal128.MaxPrecision { - id = arrow.DECIMAL256 - } - dt, _ = arrow.NewDecimalType(id, int32(lt.(*avro.DecimalLogicalSchema).Precision()), int32(lt.(*avro.DecimalLogicalSchema).Scale())) - - // The uuid logical type represents a random generated universally unique identifier (UUID). - // A uuid logical type annotates an Avro string. The string has to conform with RFC-4122 - case "uuid": - dt = extensions.NewUUIDType() - - // The date logical type represents a date within the calendar, with no reference to a particular - // time zone or time of day. - // A date logical type annotates an Avro int, where the int stores the number of days from the unix epoch, - // 1 January 1970 (ISO calendar). - case "date": - dt = arrow.FixedWidthTypes.Date32 - - // The time-millis logical type represents a time of day, with no reference to a particular calendar, - // time zone or date, with a precision of one millisecond. - // A time-millis logical type annotates an Avro int, where the int stores the number of milliseconds - // after midnight, 00:00:00.000. - case "time-millis": - dt = arrow.FixedWidthTypes.Time32ms - - // The time-micros logical type represents a time of day, with no reference to a particular calendar, - // time zone or date, with a precision of one microsecond. - // A time-micros logical type annotates an Avro long, where the long stores the number of microseconds - // after midnight, 00:00:00.000000. - case "time-micros": - dt = arrow.FixedWidthTypes.Time64us - - // The timestamp-millis logical type represents an instant on the global timeline, independent of a - // particular time zone or calendar, with a precision of one millisecond. Please note that time zone - // information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, - // but not the original representation. In practice, such timestamps are typically displayed to users in - // their local time zones, therefore they may be displayed differently depending on the execution environment. - // A timestamp-millis logical type annotates an Avro long, where the long stores the number of milliseconds - // from the unix epoch, 1 January 1970 00:00:00.000 UTC. - case "timestamp-millis": - dt = arrow.FixedWidthTypes.Timestamp_ms - - // The timestamp-micros logical type represents an instant on the global timeline, independent of a - // particular time zone or calendar, with a precision of one microsecond. Please note that time zone - // information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, - // but not the original representation. In practice, such timestamps are typically displayed to users - // in their local time zones, therefore they may be displayed differently depending on the execution environment. - // A timestamp-micros logical type annotates an Avro long, where the long stores the number of microseconds - // from the unix epoch, 1 January 1970 00:00:00.000000 UTC. - case "timestamp-micros": - dt = arrow.FixedWidthTypes.Timestamp_us - - // The local-timestamp-millis logical type represents a timestamp in a local timezone, regardless of - // what specific time zone is considered local, with a precision of one millisecond. - // A local-timestamp-millis logical type annotates an Avro long, where the long stores the number of - // milliseconds, from 1 January 1970 00:00:00.000. - // Note: not implemented in hamba/avro - // case "local-timestamp-millis": - // dt = &arrow.TimestampType{Unit: arrow.Millisecond} - - // The local-timestamp-micros logical type represents a timestamp in a local timezone, regardless of - // what specific time zone is considered local, with a precision of one microsecond. - // A local-timestamp-micros logical type annotates an Avro long, where the long stores the number of - // microseconds, from 1 January 1970 00:00:00.000000. - // case "local-timestamp-micros": - // Note: not implemented in hamba/avro - // dt = &arrow.TimestampType{Unit: arrow.Microsecond} - - // The duration logical type represents an amount of time defined by a number of months, days and milliseconds. - // This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the - // duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other - // standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. - - // A duration logical type annotates Avro fixed type of size 12, which stores three little-endian unsigned integers - // that represent durations at different granularities of time. The first stores a number in months, the second - // stores a number in days, and the third stores a number in milliseconds. - case "duration": - dt = arrow.FixedWidthTypes.MonthDayNanoInterval - } - n.arrowField = buildArrowField(n, dt, arrow.Metadata{}) -} diff --git a/go/arrow/avro/schema_test.go b/go/arrow/avro/schema_test.go deleted file mode 100644 index 395abcb694d84..0000000000000 --- a/go/arrow/avro/schema_test.go +++ /dev/null @@ -1,362 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package avro - -import ( - "fmt" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - hamba "github.com/hamba/avro/v2" -) - -func TestSchemaStringEqual(t *testing.T) { - tests := []struct { - avroSchema string - arrowSchema []arrow.Field - }{ - { - avroSchema: `{ - "fields": [ - { - "name": "inheritNull", - "type": { - "name": "Simple", - "symbols": [ - "a", - "b" - ], - "type": "enum" - } - }, - { - "name": "explicitNamespace", - "type": { - "name": "test", - "namespace": "org.hamba.avro", - "size": 12, - "type": "fixed" - } - }, - { - "name": "fullName", - "type": { - "type": "record", - "name": "fullName_data", - "namespace": "ignored", - "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", - "fields": [{ - "name": "inheritNamespace", - "type": { - "type": "enum", - "name": "Understanding", - "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", - "symbols": ["d", "e"] - } - }, { - "name": "md5", - "type": { - "name": "md5_data", - "type": "fixed", - "size": 16, - "namespace": "ignored" - } - } - ] - } - }, - { - "name": "id", - "type": "int" - }, - { - "name": "bigId", - "type": "long" - }, - { - "name": "temperature", - "type": [ - "null", - "float" - ] - }, - { - "name": "fraction", - "type": [ - "null", - "double" - ] - }, - { - "name": "is_emergency", - "type": "boolean" - }, - { - "name": "remote_ip", - "type": [ - "null", - "bytes" - ] - }, - { - "name": "person", - "type": { - "fields": [ - { - "name": "lastname", - "type": "string" - }, - { - "name": "address", - "type": { - "fields": [ - { - "name": "streetaddress", - "type": "string" - }, - { - "name": "city", - "type": "string" - } - ], - "name": "AddressUSRecord", - "type": "record" - } - }, - { - "name": "mapfield", - "type": { - "default": { - }, - "type": "map", - "values": "long" - } - }, - { - "name": "arrayField", - "type": { - "default": [ - ], - "items": "string", - "type": "array" - } - } - ], - "name": "person_data", - "type": "record" - } - }, - { - "name": "decimalField", - "type": { - "logicalType": "decimal", - "precision": 4, - "scale": 2, - "type": "bytes" - } - }, - { - "logicalType": "uuid", - "name": "uuidField", - "type": "string" - }, - { - "name": "timemillis", - "type": { - "type": "int", - "logicalType": "time-millis" - } - }, - { - "name": "timemicros", - "type": { - "type": "long", - "logicalType": "time-micros" - } - }, - { - "name": "timestampmillis", - "type": { - "type": "long", - "logicalType": "timestamp-millis" - } - }, - { - "name": "timestampmicros", - "type": { - "type": "long", - "logicalType": "timestamp-micros" - } - }, - { - "name": "duration", - "type": { - "name": "duration", - "namespace": "whyowhy", - "logicalType": "duration", - "size": 12, - "type": "fixed" - } - }, - { - "name": "date", - "type": { - "logicalType": "date", - "type": "int" - } - } - ], - "name": "Example", - "type": "record" - }`, - arrowSchema: []arrow.Field{ - { - Name: "inheritNull", - Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String, Ordered: false}, - Metadata: arrow.MetadataFrom(map[string]string{"0": "a", "1": "b"}), - }, - { - Name: "explicitNamespace", - Type: &arrow.FixedSizeBinaryType{ByteWidth: 12}, - }, - { - Name: "fullName", - Type: arrow.StructOf( - arrow.Field{ - Name: "inheritNamespace", - Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint8, ValueType: arrow.BinaryTypes.String, Ordered: false}, - }, - arrow.Field{ - Name: "md5", - Type: &arrow.FixedSizeBinaryType{ByteWidth: 16}, - }, - ), - }, - { - Name: "id", - Type: arrow.PrimitiveTypes.Int32, - }, - { - Name: "bigId", - Type: arrow.PrimitiveTypes.Int64, - }, - { - Name: "temperature", - Type: arrow.PrimitiveTypes.Float32, - Nullable: true, - }, - { - Name: "fraction", - Type: arrow.PrimitiveTypes.Float64, - Nullable: true, - }, - { - Name: "is_emergency", - Type: arrow.FixedWidthTypes.Boolean, - }, - { - Name: "remote_ip", - Type: arrow.BinaryTypes.Binary, - Nullable: true, - }, - { - Name: "person", - Type: arrow.StructOf( - arrow.Field{ - Name: "lastname", - Type: arrow.BinaryTypes.String, - Nullable: true, - }, - arrow.Field{ - Name: "address", - Type: arrow.StructOf( - arrow.Field{ - Name: "streetaddress", - Type: arrow.BinaryTypes.String, - }, - arrow.Field{ - Name: "city", - Type: arrow.BinaryTypes.String, - }, - ), - }, - arrow.Field{ - Name: "mapfield", - Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int64), - Nullable: true, - }, - arrow.Field{ - Name: "arrayField", - Type: arrow.ListOfNonNullable(arrow.BinaryTypes.String), - }, - ), - }, - { - Name: "decimalField", - Type: &arrow.Decimal128Type{Precision: 4, Scale: 2}, - }, - { - Name: "uuidField", - Type: arrow.BinaryTypes.String, - }, - { - Name: "timemillis", - Type: arrow.FixedWidthTypes.Time32ms, - }, - { - Name: "timemicros", - Type: arrow.FixedWidthTypes.Time64us, - }, - { - Name: "timestampmillis", - Type: arrow.FixedWidthTypes.Timestamp_ms, - }, - { - Name: "timestampmicros", - Type: arrow.FixedWidthTypes.Timestamp_us, - }, - { - Name: "duration", - Type: arrow.FixedWidthTypes.MonthDayNanoInterval, - }, - { - Name: "date", - Type: arrow.FixedWidthTypes.Date32, - }, - }, - }, - } - - for _, test := range tests { - t.Run("", func(t *testing.T) { - want := arrow.NewSchema(test.arrowSchema, nil) - schema, err := hamba.ParseBytes([]byte(test.avroSchema)) - if err != nil { - t.Fatalf("%v", err) - } - got, err := ArrowSchemaFromAvro(schema) - if err != nil { - t.Fatalf("%v", err) - } - if !(fmt.Sprintf("%+v", want.String()) == fmt.Sprintf("%+v", got.String())) { - t.Fatalf("got=%v,\n want=%v", got.String(), want.String()) - } else { - t.Logf("schema.String() comparison passed") - } - }) - } -} diff --git a/go/arrow/avro/testdata/arrayrecordmap.avro b/go/arrow/avro/testdata/arrayrecordmap.avro deleted file mode 100644 index 84a8b59b427b5597866fb1df8dd2e805df722386..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 582 zcmeZI%3@?b#V(duR+O(-oSc!Go7l%*t(2FTo2q1`lwXvtmywv8l&A-h&&(~zNmbHO zDyb|0Nf)Ii=NF{_`DvM{IVr_TR?%>siA6<;m2j2SFdZ=A%#zexpnAA+G>gE>;HDE{ z8{8(P%)An%S{=B-NpKllZcr*NDay=CSE@zQm77?AsShGpmY7qTN}SV?O~Dsdd8N5Y zsYP%L(Y*kU3s{&F_C;)MZ7l?gfFmtAKP5GpBd;_khao>y>(M5Q>$5qRCvShebea!S w9%Dubvlv%WVsd^SM@fEKDFdU63{v0&eQ@jlA`WH=uB_C&yh;ekz<^;80Ap0g=>Px# diff --git a/go/arrow/avro/testdata/githubsamplecommits.avro b/go/arrow/avro/testdata/githubsamplecommits.avro deleted file mode 100644 index f16d17d29e991f540ad18946375e5ed19c70f8c0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 95131 zcmeFadwg7Fx%R(iI#VXo$!ao9Lt4_X(xyj}$vVxNS+UlpCtB(!-Fm9N4=(C1gCb{+qz4}Rd4`uhvwM+aYc{M6!LUmssq zs^vm))6^5&r}{zY4E9y0c8Q;CZ~yR>)p8L0J+B*Z%0cnZpZMV=JLR%7#2Xf>K|h!H z!4+)(FCXa)R(nh3k9_EdZ}As*T^Vq^(MNIc*57{+-5PMID!zVcU>h`I9rA-CT>05hn zGgZ0HO4aEf=;$waT`Kkt--d#oD#V=K3Cbf+GCm;Zn9*x4T}`Tw_z zR~d9q4!r+nqy6s@f?ZQ_HhpxreQ>ltvWxz_WvXS))PLD9AKv3%MTrl;|Nn6G*M9h( zAKgBi0}*=u<`cZd=^OmJ?(c7m87FT4uVK(HgkeAsCA@ub%^yC8f8hBn`mb;M(MMG3 z^EdXBc|QEi5B49?;{P74iow>u^OfQ=2b^l}M?dp_-+kg!|K8V%&#m-2+TZ)RfA4F> z=az$k(%<{Jf7@&Sr(LzNS{^KVPEx^G8&tgNnD zx}yhPmalWFYvyx->YG{34}9OuX?fLhtZY__M$>Bs1dgg4GZ5XN+qUVbLEZ{2!_Y0) zHocHH)lAC|eO+_Ze9p?-x<9`^0OIvWf4KkQ-zKBOk?85qJ@MnazHwQcw|i@q)bQ-c zvwT0YYIxl{55D~TxB33=$noxAq0jHAc;&)C^+aV^kpQ7|bT~!7Tq^j=K%Y|$OXYqg z>~l6L?vOH24k{vmD5X$wluFg_E4Uq1r@SesMlXpRKeAGqv~pH5D)FDO(HG8%#bZrv zgVjP`r8DD5s~e%erCM`p{#!Y_!J>^G?p; z!K+SA)pX17@>)*M@vL>z4?|D$JlzcatRzJrJ3AJQ#H4|mvl}W~3t@F*{Iks)D}w_A zrE--Im8#Pxr7D?$p|`rE;bVFNS9R>bwDXSXS+?qCL(exf$21(zaP2UgwRG1p@+_^V z>4B>Gx@(%cA(kMQHv`kjhG91E`uV)=u?;NW=Mo}MaseB{XQjF-n5PmSD{Y+e{RcE@ zcy*5&gshgGRjnYfoNVA^E#Gro9ekX30?o`?fo|A=8tpNC&$4XSGBwZD?Y!-~s#sY* zD9oxk&+xbsP0I(}hc!hTLGgXT>}|tOOS~{^X-1YCGjloHaCFa8O(&N%O}2`v1x}#r zZr*b8(VmhnN&p;qJ%`Zpd4-1 z$4iBirP#99u3i~z16B3~l}^{G6ub_vui$O&@JfS4U|#34q91I_^j7=(qF2q9ZshUs zFmri7tDCN?WqngMse=seEN7~L?dR1%3$m(V>&td0r09Z)XStu2&YLFPyj5DnK8`kr z9B$&*=FW29c-==2|7TXSIMRluYn*w{;K18jkat2Ymk)9($C4#8tSl?4I|L3d&koQW z?w!N#_&RY!^?Xb9L(}zKFUa~{v?tU&PP0vjaa5Hf?-)b`%Og0LwwcpyKc{&n8=l8K zV_|Q(P^p#%dO55Mdjmgk2BRBPwt#BeY7k@{N3{*Z(DH`LJ~e#RG_s)@`l{hu?)=al z4HGwf;g3H_Mjss;J^h!jo^#2q|ICB`e|zIc{zB~W_GB@Y}IU47LYY1!pcblK%ieSXQSbZ(5Cq*gk0Rm*m$*$!Rrn0|Y#qZr>D z>nPXouYsD)iA=x$VqQ9(m9w<0!>LpGbv5URXQ(-v%Z-MfX}Wuj23yZ%U1NQBbS}@0 zbK-{q$JfwoqkDLGE{D}1db?I$&pEc5byS^4ZRiG%I3HT7nb&>d5NDGEVTrYLjGU3z zJxg=+^mU2hvsL1gk>^qJ$-bSK-e=e?X|h5xf@#O8FefHp5{M+ykU%hV^Yw?%wcTbO$ z`tdieeYU3O{(ElSzwdE=Wyxt>~*^3(pKK5bFk3qm-;(NAp@l%rFNxrd7-#D z@Q)Q>^%yxwBr+#go-{;ABMv)ZKASg9E$8Oloacu*OEpAd@&dl^I8N4d0-|`{^bG>J z?F4>~1V_NjnjAdU(Lxd=QP5xwjC^oYI{uJ!!j^}mV4!)?sujzM1B2Db-tVd6(=1+R zo1v|V9c^iv9=dvH=kuP&E(jb$_wtTwWHsWTh;D|b1>_4e@Hn5Ul?xryGBnHfRa-YK zm!BI`z9((i{XMB+)j+jSDn{-jP(R&cn^s^2ggN4ap5;)5Io3$GxkBH|s=BXo0a|Df z8a*#dm<>J8S=dVgFgdn))v!2X#CKbB)sWlvq+Q#(7hHCM6k8K?fAi-1rAZpu$9DW& zo|tRpeJ6BX-OA=Q@}Xwsv>b_u%OfP&E&*7}<~XmWqq|vt$2WLdsuMWA7P`LfIXOQd z%=(@wvd?!Tu~VaCj-J=NHrPfw4xGpXj~mjg9w#)pdooZczmxENi=7v!p2ty8&Ct@- zz|HZdhAZ!`oo{KXn)OW8bId@*X)T{C_P-vgIoI`U+a(6&98RGIzG6*D z5bPGsH9a%$a2WagkXKn&mgkXE&Gq)!;j{IeZCgf8z$J&JaCs$9-zO~O{hY1Yp5X^p z&R}g!(jCXo${S|R@f`lg&lx;4-DBfqEmh6yDw|U0`KmlnFAqo~4!gQ%x};y5trLPU z0+ryP5v?8dzZ~vo?Xjt+FM8_5o8D}AT9i!ol!GmW(qN@;s9o7go{JpY z5co>9H>g=BmE>Q@kz=0@INd{v*XtBF@gt`SyXFp7gA7Pb>E{QmbTwGq7xZ&p6;a={ zE6bGn&bmTXsg`O^oOHZ2D{{i3*~ErTPT418Rzz?pgpt-%IZ(Dn;x+SA6#!q&$M-eG z3e}p6Q>9|x(4@`;duU=NR;Wa7Yc48oP$jYcsz!?L9JAvPX}z=X&E7;%uDq6LpkgTp z{-7x>eCJR{zo%0gUec3u+|c7(dI7k{%7zYD+O%^HyUz~EwE>6HV8?^9!55n0vtd{a zgTxqyItS1*tbDNN(PqsT$ISNDw~lGzEgg16_ZJo%Jy-E-B)$W1`rfG zXv#D1ES&hmXS34PoMT7Jsi5D`$>ptV02B>C!T>~-qeA3xfp@+Z$cvH}5`;Y0wh5T7 zmj|d4V?$1mspYk7Hfw|0{hXUwa7gT|i2Hr1ZfhCnx}#yqk%!bR7_3%H_H`qRrTF6B z9wX4n6h!hcN3bJ40vA~|!!YGr>VN73J991pnXHn_l6E|5DA29#Smf_q=;j@RQY#2- zK93k|utNxq-KPP}&v^C?DgCi=+51KoNs&c$Jtiny{`j~7Zp9~*5jD{%ph5gf4=$>T6z(J8@LD8&D$Wde_eU|2zzbFCaJV%vE$oAYw6%?DC2WG$W# zIEu}lGeL+sQle*CAQ)HATUriuAKC;waATM?vR9s*9vo+#9Mkt4GV36JzpMz2*D|M; zbh1vo4G^01VyZb}y5XvZ<_6ujO0Ilds?YZw&Eup4v8)i}=~3vZszG^b`SM_DQK0h( zERce2WKBvN$IN8n(zYc*toETLC&Z?f64|dDm?!x?p~WE$Y%8y3+2lm*AgifF5^vvd zo+KR-7YsiKytXa*mc+SMjuj+B=fsk$wx7-UIn%dw%62k_MnNJCm8Ginf>irln;`s( zo*)UoACLbyRY5k0*T`O-yu4=rT*(AN*nw}TB>S9crZ1R0JkQbC_-tm~61at1SE=O4 z&!HM*4IKy+dc+J1K%WaNk0@jVyB#WjE`s&QG>(-{?>cGBlINwayCV~Sn*HYfg;MQ8 zme1BfueJ*i(rt(2$nG^k55QG6H33sK*cwz%?03KczmjzbohsFYWq{~i(i=BTR^x*- z-N|~!_I1)lFTW}&%L`$!aLDTmCjM$Ib>Pzi_B+*yNIsOCA!S(o!@fQvW+`8;*jTVmh|sL-#Z{3yIgun z&>LCuXuzq}wa|9S&jbq2m(Mp$@jR%Ez|qvzx)C^e&(4ROyg)YUdiV%EFOq_0bECWm zckE`=vC?_pdplCM0v)N4{%mq@cH(bFmP++YPXax#;*P27R*>g4zUx^abR)a>oi?I4 z^_?0xrhNVQs-1^l@C()}j zAJg)1L$P@j(hzhZcuWdJ7Fy$802W55vr?opl|2xKsulV?IcfwEZr!Y4YY2&sPBB6> zGQz;HSz&5Un-jbKtF^=1JtBbtwF48vlH~22kGtLQ%hz;avIm-5(_-4MoZ_3vZ2U!JZT%;&t~M<*+vjZ(F0G` zrOx=xo@W}rTHCPeJ7@0r+^&(Y)-LIhV(rJv&o`w`RE`;tzf&hEj@FL$j(XcnRSU9e*N1O=hXf3f#=Wu=Kgb{i_ekn zoY<4N`O+8TJI~dOWU^&1A!v>0|-J15J??ARGK#`UFMt<#Wcx zJfnM`d~C}}PGFH0Y%|CM_$>l8r&J|%=j2rrC0#a})^|qb@l%I~9FtWefK#}^<)}84 zqi1WR1Ba}i4=AYfgbwyEtC$N6-3iQm;Dx-#@*S8<+qBuZ90-l#1@v$F#l|`-u};be(m!kcf}GTKlu^x>pvd* z?JakFJ|11$6O_X)FfnCpHQhO}w*vYd=eHoqX@&eHMLu!dRkOtYa(zFbdWZjot_rCIsk~G;M^Fg$ z1LaLXDW7*l2DiCY+s@`GMlEih;tFiyLU!JuSYs(t3DKThoU)Wj#6mpi8ptwn)vJ>#yzrXnWUhR-2V2{uqFp;LRo-nXSiOXTR}TT{{^`SL@ha^&3e+oZWY z@ED*cTkwM#bcdX5hMxtIXC0es`a+0{2bd4d4I_cogba14PPg2}nY@EtuWV z&B|``lpBspMrvWMsGm zAb@GGk)X&)$`(nL%K(^YS@9u)?Qt?%>eI{Xq*dwT>h=32fQ`Z$QU^fQ$!{O#Lkj|M z2)dNsAUBFl$b5)3&yjDLel=7qXP?Mn_*C*dByhOL_drtIG_lyrfg?3dI`+Y73a^C~ z&^S(CPqrCu9tzKfJeKvQ`~5r*+lMtIPLnz~r@#SL*9-ves>f0XR7Ye(INBV{Q<(0Z zvXo@N!f?tcb%CTdn^V4I{8BjEkaK8}o3h|OP`I+=G4!ONn}~kE%geeJgSLf zn)1c9P3${A2=ekP6EDoEeBaa@!Z{I!J;2i=v;uZv5VEWg>WRMhNX6mkL4O)%=Dhj? ze@ufW=k~yTlln=vehy?{%WK-?uO_ONt%JM*u#+l?Se@f!0~z52DE%$@#zULK(1soG zLiu0iW`Mf^kz~n{X)#q=fAN@02n%Qeuugf-zYSTD1Azc^$w`15n`27cP(At0`exz- z@6QcRZ3IM-K+F(DgN>!hhfi9|nT2%Bvj?a;p~|=_svqaYbp#(OVIV|~=V21nL$E^L zkZ+p(Se=h^$3d%!Zh%xHV31n5dSWlp$$)|O2%?}cgOUnz4lX*|vSQ|{Pz5Pz)4JRUGz4(AO9FctgLlIZ&$3{nVg=Q_iHH?3l5uMVgcT)3|uN{!8+I zj$41IB=yLZaXn;R)uZ}{Akusw7$C_A_uVBqvNCxt;f5Fq;iy4L0xjh0WyPi%VNEMIb z2rAuu>7#0-1KK|iSIk4mwO5^ru30+|vND}!bE#Z>nXQ&3<2Yi6t*Jp9n&m!;(7s4BIm|2n)4lr<$Jq<*J3=twF96UlRO{s^g*UNP?2MoHzAqUMBPA)WR9_~fNf*lqx-2l9jbE#y5- zK7WErs3u}SxuSK2@|t==yMQ;E3qzsj_h|~)2`)2q+&P~Z*VKgRY_qdTJ|-0_6}x0g zZNuX;Y!kLzh37z$({*&^eaqzbmP@Fxgb1hmpbsHW0=bcnZIU3UD@0y&Vc&B4J_th8 z2B9y7d#`PcNTg4%0Mzfh9P@{XXH;ew!y(T1YXjfJbuP&zleFc$Y+Xg}cYOVZFP+1;K7aR;b&}+#-r_zd;F4#SI=u{j(l_HwUMu(cdY9nju;S8 z7AX$-IS(0`V^!o&cC-Z)8k*1n3^^g+NbvzY1WBPb0IK0U@&p;4nuRbb3321u)>3ql` zL}QN{n&oCM=HyRz#_UjDZxY^)HV05V@~W3b6(NM`ra`%B!23WF23cFapYESz>C6#h%qa$|R=4gr7(w}jS+Vj^+M1C*b#@(~-BDgUdz782 zNlHa|W@Bt5LGEcvCNr7LU@@JToq{MInmSzD-0L7aI?C3@E80LH-9(Lc`LJ)U?yynfdqV3x4bIAeBmsaQcl4T`V5953XzIzttu4}n+dE0BxQwKF46 zNXdATe^(~UiG)y323wo91n~I11=&c*cPFHR+?17X7{~uwS-I`d^z#!^iXT*8ySJ$n z5?q|V>E@(038hkyPn@%kI$7}uVZ{=k1C?JUW|C?nUzdNC;93Gg<+5VDkO~K@gX!iu zF{MASZWcEYl#@Qu%iD2I`lsWhAbvsXgOQVB zig)q#vB65~fYM7i8B(}0`O2C)?60Mc=^NyEiCCeSzC}wQ0i+*FG`?gkj-6NID>Sf(CSm18q$TC3XbV z29{~)c^FX$F?Km&QHZ1nv5lD{qvhu03>13qc=rX(C1hM(C>Yr-XC-OZMnpjU>?$uP-KyY!+zygue<;SuLA(jf3hb5ms zvj<8t0Bq|VXOIz6s}H)BPnn!DF^9;n$a68^P}=2HYCi}*NeX(MzSr(J`@2#)GdprG zflTf3La=Tg85}o_+?+`fAzgW9HifR4lVh335Zd5f@(31L^mq$w?7+{!aC&(p9i$p; zPpAP7&l3rl>&;1%zIRzobbrB|Wm51!6QP3m@-4c&XzmH52k5mBaX+9O_Q-;;kmN`C zXK8s!%OW@__yisys)UaKspjOimNq9J5Hu}UlODKVYJ6mN>t`-|2c0(6qmsqnXiC{g zrt)2l4ak%^-}WLn5IE*(Y#>j25u(a~GjD*cli$i+cc z&rF+{9^Z8Kyr^85#`QPlEOL_Y`ghc%LXJT z45iLev2ks;b@5!s!WM>TN0|kGlG8GG#ii8~uj#t>knDn~ho){m0xcVZQ%>lymY$xY z%l94b`!J%^_N=B4tTK3{y2EWi^~#@UKY{v;41|J&UJ#<=A%*LZG}(PO)lp65jE}!9 z`Q2xA+{j_fr_>Xb*)-7SloSDqTtACc-7m&>Jc_FPDwr+bjp+?)`s z1N0jWnh3cA2H%B(CxRh^V+}xp6#JKY#HC(zT{-o!wj7nEkL}D<<)7tK@MF33f<(g8 ztSL|L{K?9vrCD%bRR@FKQl}{AvlAa9xWsx?dAZgETEn(w%ZsKgMP!GCfh@>EbKqu! zW|Ch&Ov=fBs%?Ngg*e)C*Z>r=fudmu4HiNh3sep+pOO?dQ5w26{YP!({gSj|>gHL} z>K3bg;zQ_$v7S(VcD#H_+cwT3mIYW5R3)?+!jlaJgW>?VK~9i=lSrYLWaWnw4e<3~ zDEY^YODVW5*JEwq$SI-Nb3x_@9Xt2Sdc#dg`KJ@5W70QGi=A94H?!r)9r(JiFgPBi zJ;WnYG0!hg1y7Zi!_fof7^L09^XP_%IaE$m&mr8}r#qHr$uRJuzQ>Nv302H@GaDj1 z--z!{|KSsZd+HW%8wk8_C(Bb&PS|k(8h91RO4E=lb58Q0-zh6y!6>;Dq=+>HL3|;| z32pf-4h`0o(*3hg_h4dT4d+xxZy}Fic38cp=|_KT{`_CNPaEF*eB|MCBEv5tkl_77 z)B;cWFx#s1+3`sFd-am5NTEFbtNJNP3Z|YPX`x3m>hZH3GkW%Nv=4=G5px)9A?u z9cjYG_JJi$@6gQ-08rrc!&UIw4*u#@JA%IWBbZ40{TFfvdD9$XH>HY~w*(DD1iRgufzj?!(kCrZc>GJ2FNk-p@Mo%BU;E}83Q=a#kiaQ7rcZRyJtZv0js!*6$%cVYrnstzcec#d>ePHa!QBv(u zw?i=W!c^ea$8mGe3&Tnmd83V6HxCZ5aj>oM9z~pRO80%S?iG={+uBFPfd0~!;Fw=Z z$?ku-@B{h+9=;$fmGBPAps>;@g)02P5OqSo()D90fuAQ*g{O%3#QbE9Dpez2a%Smg zNGZWo)SMP6Dr*+5P`tj9w^=h%T$mR;((EUzC1tP@#Ah~9cj9OD2Qa-l#h=jl)36%tD<%DlF{g79c?ap>Ti0a2cC(>q~y+p zJC3Y_$@K+v$}1J4^k}Kxdzsscx$Z=iV{EY`m1Z z4&E}6GlU-mdXfAA4g(_KSI^_5!beJEp@c&Rwp9~K8n%?0oRSj90f7~NpAVi00xzEz z_Otu(ScEq}B&7z*UVr=%G64lM037IsRfjlZaV9t?pkZ<>#j~I)*^;=CLr^AIgNuUT z66yfnXzB$xeUI7})s8rYc~YQ*+?2=amO5pxcX~;|k6Z0zBX#Z?Cv_~oprMlCi^63y z#0L`*A;|*Ll`sc!17rhE0WL$T1lT{~w!q}5Q1JVRjX0HnURdv7->{=Wt(X{k+5C_v zm;o`SO}Sa>+HkW}bM2{ziw__q8$b&pfMdf8=K{)Y(l@WeeS#*-DMDe)iw^*L;8dk9 z*SIT68r;VMUVybZx2EqnRPIm(tFMsi`r#`&y6=hUou69=DKq7Y)rU*lgg*rR3Dbm$ zPaUf(1v=$Az)A>HVfXF#qw(;ppeVTx0C|YiBvuJ@gm!~eFA_TRA>o}T;@ZBn@0-I; zGo&s}NPeZ0AUC*;phT5qk+OiX_+q)ttT5JLZ?!_K`P?T^9TTQ-sk0pTbOP24D-3Q= z7rPN82VU45AOU2FHzxF2e{dt|7po^eA$TG{U7!&hF~3kwXaP@<5CcjiKM|7-qz|ck z`Sshro_sCc^z9oj*!i2^$3~8pQjb6N&F{RKRzCOGSDt--Bo^K9@T1MER&@6mT}scs zDSOxM-9L+?DOwVQ-7PYyP4UYVu;u7uWA4$VC-y0vL5JW`4)zR71A8w_R7!oln^9TeYy)A;S0NNXExG%-KfvwFq8DSNUFoK1+s+AodF9eeQ4g1KX* zvDa7kY?JaAkKO*s=7E00+_o*_A-!JR0|AKa4A%oV)Gb(KA1OaaA!pI)fbxV1Tv!FX z9hpnqye+^3&|3u`Q#67N90xls_7Obc_++poSyJPqagocUuGN=GsdB%uEq?j?8!(IO zSrC3m%Npn{FNzSrD@G`45ePfn?k1`Oxdy`xdUYP24aL>4@gj2-qBb#D0%QVqFb*Z{ z(^tppuaJ7TTp``qixPpKig!b33lTJ2U=J{w<3L@K5ApTk7ZS2Ue$U(+ks|STpgWL$ zFtO-R>?|wy4K+w@${k^qRz*Wdbjj7R;T;f@a8q1?Ag6>xifafn2)9hz0=*4zeSD%$ zR6xk2ga-*x@Cx^z3lK8FhCu-3iq{Eq8+*?ZHXsqA05%lifFdlR)G^{bgs>+WnJz_Fo;_b#>zW=fC{9tCc@pdEu9yV5b!?F4pN3 zq+N|hQt29HYsv+G$7D$5ZNzB5u;p|A6sr!&+mf;9C70fKSC=#kYEg7Jkvj$&uTLs$ zF!r8XsHQ99ghaM65f1jHm&<7*(X8MDb(BSG<@OY;Qn>52rTekR5wmzi`7Bi|UKI8W-VQJ zLZ^%~kxV(?R7B`Y9Az*+z$LtLtYodeRg%A+;v=?95s?kI9{vL1QA1;6jS#+F=ocJn z#2!pGl(L5K-4Jeo-4q8Dp76f>wqSEzwg?P3kOn3OCQ|6vyDyQX-r^-v9otINca9xi zMQo=^1|oal4Qv>ggov>$I%*!s#pcaj%xt*A33F^twg`SsVmeGZE;qzghds!?*PtE5 zwz@2KF6Eg(IHVb1u&%*}!hhrb5LTp7^QD0X7jF2x_or z@zaDTwwY*}fXG2+70xoYC>|fG9}A=x3V$LF8=u$)(~teZl1AQ4q|Q3~ktcRt)^yPY zzx>?I=fnwJ(u-4<*QI`O&8y#%k(!rZdYO7e7xHNrG$6vLB99wqZWLw@WwaGEdQoim zY)PI~gR3^!;WWV$7K6Z-PpyHmbo`y?U=-LS2+MNg?z&kAO~HlYmXfD0s(~~tdh*-p zSbK7^H$Hw=WNjYEBdS2q%i7+-N_*{B<%h@DPOB57YDL~7w|RUjnjXT=$Rn|Xe-=Cv zlcMRn6Y_TxlDcwoELHF`-pP~2o}gn5?+9CsMH1ahtTBZ&A~iM;N)ncux`YFVG@HkZ z%Pt|*U_BDt4u=wwT(rkP{6hu6eR0?uADgh?uGB25Nq&s5i4gizIU#QN*u=YYA^=?+ z+#FphVyGEXIopl;36~qef}KLGgUCmb1T{`+Md8O%05BlWNVymZ@J7)ZfH@O~T!5L7 zUjyi1G@)c9pJ}cgh&rcbA)P8i${MuP!4OAavUvu8BS$i=O`KFMjm)b6<ZR^-_emJSyC5Uu<0 zhZlqD)>3)1(knkY@wG>4)2T}I$CKZhClp)C>&X)u(shaRt5Q!UNtaFmdA>Jrwg`86 zsi^d1)3KSA$d&PX>er`0qYGOmHKZ?^kU%R5Mq)~5{N5CMx3BgW@qArIIj&rgk8Frl zOSMxIoyDN$uvD_@Y;OGGgkFlT{;p(F;XmD(1(}7JMauH-1&fy_OXbW&Db~5N_Rv^z z>&~m{+o8^%s_j@ZOVnnC*?aD;(lbtj#CqB z`knO08WP-bX-7`kl(=$aC?!+KU=8%3_L)*1kH7?8$!FpIFE1-Jy7!)Q{z%xdIVT2n$}lE_KF_ zY#wk5i%_2Q)1Wsk@*1(=9yPP^CWManUX$B>bckFLa!A)KV$3I94e|^1RtGm8oY=ns}9z zT-+7gQke6fBd_!x`DaPU_ zq_VtuQvH4*XjG)qin-k@8XS0nzP?>2)$vJ6HTW84wl^9tPCJjJmi4SzHT!y`;saGRvWi&C-Xo5#FEIacA8`+}OKHPr;w-GT-we%ol%L*!`v%}6Xc zaP*4C6^X?Y(K(W%i{ zT6gI2NNN>}*FC@Wa7ocWCe^=^5Y0vBu6Ru<&;CwbI~TK6{*OfZ7fm(0WBpG2=ab7# zDA&6WiZ1U<_k+cc6qVQWV?nha?Y5cYRGNEAN!*}%aRv9ygr z0s`0(g~6GD$6dG(HW)X6^$=|^{4^}kRfcUCuN{dhOMQTNlFzN*J#Ic_I2CIs1ZE?L zCoX8+f&qZkK>)9)1s6-o{`l2d#-P$jQ~i+iBRP6`e;1dXIehTzF6rJJm!Up^0)1 z{~O z^dK#yyQxwt@}>xMcsn62>3V1T*ffaHLG0rTfNa1UOqLfbF5Wbc|a#AU`@G{fC;^swLk)&|V5R z3fJ4l(!jvR_=OF#W|JHHN%^rUkw5T(g+BR$L`0K1O;3qUpYzsMl${T@x%-L^_B!l|HPRwW$ryQqYLgBYfXzn z4QY2xiyn2?Sn0F_d!$F+i*{W#Hgw8P zmNr;9n1sm*nFMy_-gBh+?sZZk*wP=r7B#512MY;RDD@dELOzT52^%5QAtx`tI>2UNzt^sgI zvvljc)r%)Cr903B-csjK?21-QqQy&-=5tK(a0B%5=)+z(q5NXw@M_VrW7D8Qp-G=B zQG{MoT2!E3AY&kFHC;z;qTu0a(!hqXRJT#H5eB$TEL|ZKK6sVrM8Kh`6D+!aU;O%g zb?3kP=p!}hGg=z%yL$hZ|M192cI@74-910O^2wUL*`+(LzVsVUJsyjm7ZD^#m(oAD z?fad3vawRRadmynD_N&cPYv`HsoDgnu2MpMvjRU zH_6x4M%Kj4_C32($^ake-c+&#U?jo%TMCoeg? zsk9Yj=a=NCQj(IsqJHvkCN`0jN<}}pBqzbvZw>|rGRcnDzVPsO6lcJ33*64o-t8$M zq9U&xUsoLLcZ2jNYA4=@+hVZXT9`s64=P4e)ntoVmeK3L9JaTpV(nH0!I^&PTI_FM& zF*Pq)2xr&D2FugG6>-6m{~?^R>6W_I_69&5mBI82(giu?6gO8Q_q4Aq6^CfsDf*kr z!M?*2s;bG67C2D~RPdnArZnZ)q;<>^5Y5$8u&{rAFrV1b4}C1ZH!C(zk-ydu!H`+% z>nm+-JvmnBmk*uUxO$G%)$xt`z0yW`W+GywFrHK+9g&Yse6sP~L=z4>G2=sieL|Dc zM_t*MoP1VIYgXPcUVfxbZkW}O>`>?&k1tG>w`H}CSK>3I>Wl|w0-?7m-95`9nb@Mz z^oCfa+IpsSsucO+sOf5YclwNaaP@L5H7%c3cG*{q(%W1@nrI2PXxa#Y zGmPxl1-o8o4APB`I)5 zu84MD5lgO37bXztFPz8PPW#;YI`~P@tN&#qBtPt4{WVC#o&nek;C&p37ub5mKOSOE-G^&2Tj|>f93|b zupmz%vnYPyunDk?A%IH7ex|weAtewLO4Z7cJoX5wGN~uO1{$DL=~dQHnH2grw;re? zH2QgYXG_d6qstu%Ni?#EW@+#jCHsN+CGT}bV@Y}BmgN3hl9O*q&O1|z zof{ch6760Ri}pS_DISZakJ%oJZri^-D!}IASW`5%(2JfGNk5kU&Fy^8f8@1je7iuW zuSZWLk8j&QeN6nq*x8Z&Gsg7K5U*J`CjKeu)9h-S3Bp^ zGt=D(sbgQK)G?>G6PoIt3@Hq!y=P){IXXRnl1Y~X$W6HQ*(^Fl4aP|gC$!~9=A7U= zx(e@r(*Q0O{6S9uvvv^k`&!#LM0g8P*1|at*Gl@zd&E(fv$IY&Va>u3ZB&i^7z zSolS09ZVSJUixv7@R%(jdUV)`q5P@CQ>0=?mS*;VAh8z*06CljtUcG9)#YzbZGd%U zf`t5yiHf1pUk!I)$Zc&>F7ric^;c#k2te`^ZSoBh*B=?FJ-mT#c$1X%TFrbhWrntQ z$P=8T*Z{-U>zkqPF&4SD{L2Y9BDA1*h^`$n9=VjE33N|D#sUX{ihx`+9E3~YMfa(Z zSh!u149Md6;eLi}O8V>+c(c_+Z-`+}XpcfbYy|5|$t? zo*r1xiRhnUH-LAU?2x4?5P$)Oz$?aJ0-idh2yRYnj=*|^9G4zIcx=Wq1fT_ZZT;0? z4SHU5Xiy3$(dy2m50!ZYbQe=9;7Wj+$!1IX?(qrOtn|Mdz<_5u(ps}rXuOCQkNhcl)Sr^Fmwe;M zGI4=?vsri==MoZadVHv|kc*i{LA+o+&D=-rCuKB znB10bfEMvZg?I?DQ#l(x1sMw2<1oW69c6I`RGZp zz#rKXJ8DZT)eZmCsEi-Eb&PZC&WB(4!??+j`dD)8yoYN??u$KnUreggnnoUu9e6l) z{gD(HM@I*&KOn#JCuV5BPXI5Aq!^ z9F4$4c(o6dXQMEYN(?bz)&%}ZN&+$1jl!NG0w|h93;I)$PchcxlF3n=i)LXOO_?l0 z)lYf^wX>7(Cn|VYU3al-!OEox5oyof0z46@$fa=NxwbFoGmFJ9i=Uo(e_tq$t6dmA1FQhnw4I-#D%NGdD95V9(*Jz|;%k)IA7 z6f6u3{)e`e=l0+5Zp|o3apv&n&VS`+|0qczDE|j`pL?K2uoO3a`HV-NBvcoL+w{|gZSuZYpXet;f5cyS*!rTpbGD0BbDWtjKZ zCYQhDi6Axp2A+r|fc%V3{tMqs^z?{zM6}`HmEC+qG&V2#z=_eeKmQV(I={0Z$gGbF zLS=n4F%RIWOLfqtkDXl;GZzVm$*6focucx4m_KTw`QX6_o5@9O!byX7Qm9jmh{0$A zi70Xt5kbR~js1?zi6fNQhb%@*Aio*Ve~m9#0(dIIgsug7Z;WImiq;LU}`=hkfKd9bpLs(ZSaMjIc$Ms8LPD97&)Y zuY^Y7D#1Xb)1@$ojf$opHGnWD1jWpPoS{1op$_SvOB7>YH0&Q(gOL8g1Rx?TF0db% zUXaDGGx~^SD1i*uqD6_{gdju>#z@1RkGiYzlQ5J6#)VEhG(iSL5EMnF#WXGQD-E=q z@TqGKw!w693DUlB#wzepqR#`v@2T`79; zyX#lRq-=R*thPlstwzOkHUO&lXiEfL7Z^Xp^e+wsLzPI}aLuGs+A7)m!ubeshlGyX zozoziw=p84i<0Tl*)@l^AKt+kp#dGfj$O|_q8F0d4gpv2_@cQ9(k;N!0KtZ3TufV_ zs$f&laS269JCx`ErT3fy4k(DlMYxur=kSxzkVncAj0$BH&1o2i=|o{}1O+945CY=h zR_uBz`pALEqPITWP(Od=jL{9Xe`RdfO6ld#uWbAFN16r6-4qO{3CwO1 zEp@_vO-7_%6#d848l+h4Ab9!!)D)h%|KV_RhUo~#Q$$}G9%z%t#VjQOW*N}~ET;0q zBV(~p6mf_z8d?Mm^Mp3YVx}ok)aHO;r~;6zEUFONp=0oCi*Z_T`{Y_`1X4Y|J1ADs zia{7)_Lk@IMC-b-Gr&R|8{I%HNOUsrr&5rz07MMZIAS;k4&nC8a)&Nod~l}UKJSO` z98BD=zkSO~_x@`vy8p=7t|L<~ef^oUc3rLv|LL*cKZ@kA<?cV-m_@Y zYW;&OxTLjKG8?~C3k7<|IMi^O*%z&Jqyz+f&3SbdKt=zYMwu~aG)KKx8#9wru5aiM zaE%Ss-l~$St5spy5P7nuz4n&jbpm5^u!5}^frL@>AE>2*D$+g0)V!n^IaAy7)8(bj zqt9uOA1HG%H02loiUR>yO7j!plX(O*f)GLRFk(cYV~G)X^Bbp+Ul6HD@RZ{#kp_9j zw`HSG&r2RZO7;kc5DSEy_CJz6??3G>WwpGwW)U~*Y!U|Rs$yiUj+yo+m={uPxZ z__TXAO9+t1-*ZRpEW#i$Y1E0(`k#$I8ZTf42p(O3*~~gI39qu?HfiKUnNtyMJW)$T zDH`=?w=RmtlcR}fdhO9C$D*elIC+#wx&niC*WKf#>g!jGe*I*0-f5BuN;{(~c8Z|% zt?0b7rICB;p1h|nmXOkqj+0h1%4bo~H&F1MjSPLm4n${^S8RNG+zm%a%c&Na!h#VL z3kD-y@Q%zpg@m4OlE_46LZRlIHLO# z#mm&ETqn)jfOb;x2jbTp>@yX8Zh%|K1R__sFV!Cm+n8f$AqKxQgMwHLNyKO# ztYS3TF+4-eT*X$d<6otJhq;N29}=CdFpWf6y5+fVCRPgXAs`%yP__5~1+gQ~GtOC& zrM+A(O}rFZ!bye|k5rGRmsriH$yd_DH$%-(fzca*tU_!WZKK7Ph6|T4z|b$+N%>UD z4hT;oK1heDaLAEjlo*5#Bq-PlZUPC>;*{9Ln6}jgJUJlh~{~IkGq|Ah4{ME z7r%VQzQ&%KpI-L#UH3kDAtPr7-9i`QVg)P70HbHbIO5SEjPS4ipciaJ?h7^!1n#Wa z(X$eR*K~+=UuKj&~1ojI@iLA;iJ&PBN!gB`=u+*1+sHXjx1m%KiW=uTrk>iS`KiYAN8n& z^nu4C{jHi*u~h7+eXm_PZaGDXA3$qY8_$)^8$ zG=I(s6P-l{#jeHLQ>X%Tp~-lTE@p61<%jdP@YFM#=`RoZGNtk+Wtw7QgW1+W#1zB0 ziI^Ox4lx0Mzsmb-l%pQJi(mU>nSrI-<_aS=Qwy1g3K7XRQ^klOjAo34g6DM>U~B|0 zTk``%Akoi}VaD+0%7PneVyg@DE{&#?xrTnnrO~!|$+4q6^cj);|0%`)lc9g{MX~nP zs~1EP(a%Ok7Di?-q*!}ldt~0uLj-yC=}7CRA6OR4tePnzW?=^I6 z?aO#k+CWo1blqLwJ-iIJD)Js8UpBNDPbs{FAYHV5U@%i&fc&5xTH+zN==sMlfWMcv zY=}loYjhqlc3P!gz=*c~-nc^5fGmkZFaPS$1TT}{m^8ecN;wBqf|25^F+-I%;ceN> z*wl#zytlA7A_y?uYgDkLRHjW(Own?Mvz^V(P(hA4)-qS#mT1G=EeK0G@a4WH1!y5$ z@qT{f-uTz=P2GR_uYY?}puF^ly%#+y=Ed{f>+el`clSu`I_3PYKYQ(ye6#ENd;Nox zW89$uF;Ajn9$JzZNy-*~6b976-!kNMq7gT ztfqtFSv7Nq6jKV%d`5Ei{A+`Nvn54-t(HgPiwUPCw@Utste3ZpkGalGm(+X%E4QQg z8=)gN#v2l|W+(S_r?!NZjoFD0_tz{%5@b@%PmcddWBX>TlU30IJ?K@x`WF4<%CQCd zW#-CXoV?tI$nl{X>Ae!|1VlF)Hm4o76q<>_9dP(i`SVW=iiItibY=3Mf3M37Zl0IC z_|`qIjI5Dr*67zY$GVgeKeogF>dUe7A|uyG@oT^?Zy6E!M{;bS^jd4o9U8ev;!n** zT^r<)=i@IvAD3e3jkWT&DN<(M^YNz9iTxQboFu%N%*7$LaV!~8&(K_|J>jvTxq~W9 zaH-ImSi(go0us4sqNPa0cnxL4m?;`0odBOL>LTtJP&>2gyg6+tseRssQp09vfQ}zL zLVOHr1ZSTHKFpCm`3W|dY)OdE!94+~0h9oIn;i<qoV)(`vBMi^q~&YOi99q9_14&f8WF>RM( zlsec4c*Ds<6QR!o3@VH-Vt749cfQc%3u{iInFF$kLV_VVxYTikiasX{`Xk4WedhT5 zix+(7sZpcJFW%nu$E(gASwHrf^{HpSeBCqgsLb4E zxxMugS~{>-LzXg=lvRbnMjWK_0on+!G8{H?g&xSCOSMA{V0vKwo@%-4TgD9ci+)^6 zaAE?37s;h!RFRlqC8pO=&J$CyhA?-7I715%_)@34jHZ@6;hlCu_7b*Lia4e&GqaM+ zNf7{#&WI)m0f9}4-VPkFC>ZE~eXX^P&O!qN1ftwX%a~JIQg`k+^SZ{Nnx>m?e&t^t z{P)<%P|f6_n$&e~Jal7l_$Al8dHxm8|CafC{d<67vT!PIqMJh56H*Xz@Nw6wnCL@>LDhGKpvA+ zl{L$bT(kJdV{uoPl*wVT4f7|(16I=@Qbck2Q4JFsXgS0X8x)i2Pu4W|<8Y`lm|Lzn zbZ%*|%+&TxTL@=G1~QlBSyPpg7{*29+Sqt`nDP|+@jLDJhD)>;4eeTM{+H2IOHA(YbAyQN@Tx^J`{tYxbdSxSqbj8fAi zrIl_bDT_J|ML}=@Lz~}QXXmMdFXX8C(6X7! z0x27{ocN8c_j>Xv7NLvPR_ol@eI=@iF_K0DP0O8J4%h@}4|9VVjr&MkI9L`678RWbSfd^O6I<5b=iQkGTXd-K2Tr znzW+$YaN~8+^K3qs+Pa15!$Cbvc?s(e{0+T8av3p-zGf~D|?L^+;&Ijq|T+sZHu#2_r@Nf@ed|YN~vu%fjaPiYJB(Nq#QsW66 z!1zYuw^-A#gNt1o)fnFhM6(uc82DO)W96#fzq+Tv^Xb|zuRO6L`m!zcW!v~UTH?91 z!Ij2%^@69o(m3V##Yo8wtm+0AG_@^g3|-tv^W7MpuMQ84CN5Hvz-I_egiqlfX*;MGe*uq%(8+jiZBSSP|7nS2)Yu7PE5u}&;$w4n6xR;ML|0_;ObG@xpw=MZZTxzWW~>vi;p&DuvM!QF{8A1 zCVlF5ban+h+~+2urj8j&I*=wI+&_`#>W}rZC!A7hXJ-u4Lh(0h5%qOqCVI!P>~lda z9X)F8|G4}p8&~%HcJ~!p$(vVP?6dOSrRF*%cefe_4s9*<0|<-_K@@M&A34qFXtcuWZqz4o+ToQm5npnIQ0P6vD&IsD(3fY^&(_c)#KDWRbp{PPOaixGQ;{_%C z+fN1pA*nRhs5piuPldq3RKn~l1`U9A0I*UL@%q}u+JtEcjR2hn3JuwF0Vc+y!9Tbd z$+Ltm#}<#h`uk%$?517BzHR^RiXWf)do(iO;)vtNhd+J+z3tIQ z-Z}pEGK=X#-}KA?mJm?sZrsmmtKH#akAT`&UK_Qp3*p^Rc3K@SN4SLY>|7TjCSZj*ZMS=T@oXUaiqN`mn{7W2YleYICUQkdzy( zIYt;0ne%S7Eji^C7tMH4mZjV|t>zug5L*CNj$lX9<<1iERdBAd88dxt+0G%>5S}Hc zQ-$L_MmCRfj{3sk5E-t1mljT+6i;59x+s2Rp8MLNI6abH7@7exC0v!g6=nP^vIWC_ zjv~GX8KzO;k`tYim#;Glha>77l^DwuC>*JVTN5Ll1XtSI-M<@Ev)HUnKJRkapS_ox zqPHf`zBSowMn5~mjDB_tG*DpgAx+Sw4DUx7UfiBjdZ%mSAD*6Ctdg#9X7Bk$mW{)Qh6Lffgzl-^r%eLcf&IQ5DD7lJ|nrp?%==z+QQ#}E&~HMU$joMcDFa`o2IPOh9%MH zld?|N$m!#mfXyLIKsGr+R7sXJMTL!HXllzi@?2wRa(I}KiZl2SIZP-Hkh|ENF%9x$ z7D+&gZ|>58eFqqNsoIPvtVi=ppe2tlY5}tRB>@Eo7-&4A9X>67E+}J!ag4J#zgaGW zEJ=7?WP#+40gl55@N|QSQ*FWHh%4s7ku@^(_lyzDv={_Lt_89Lm>__@F^&CDz8CIq zGa7ttz89YEFtzY3GrEX+2(*j@5^iI{FOc$(Llxk1*^!!_%_U_>+C-EsJiUHtaM-Sx^;;KK0PW~k!$0-6&J zd|O_N__kDYlGU^?am7f@dscy!3wT^yH9V|8(j5xsA^=6d+7VEJgt8Zsd(XDgyrpi4 zAGXkA=M&%`=}t&W$wqK$_^u^fY>6sJ(G2~z4AJ>=x6<#buZW=BzS9w|v}fz1^mO%p zva7L!89gPzL6kz98kABrL`!CLZwwMhQ#M-Dz4WVm^^QjIBC5iaVZSzqgK9|J+!Rf( zm$twv{P;i8K)ea!46%~s;@x6px>rCPB^e>qV7q1?K-yiC30F1lG)GuR^%#@rP? z>Kk>YHFsB1W1<#z8yS|F5&PtEvnMr7L2{WeEHP+Nloi(W*4Qh8dzTC1p z*RAC_FH-M#5<9|Ov2@14spZq~L`0TZBn#2)m@yb)X0gmTHpY~ z##wE_*2RQi;>uC=!yOck5|tH~L5#aZx5AF8cZavo_L7M&xGo9U)#AG++oGd&ab6&3 zTTe-z--%bStwTMTy8~5{tTG_jncf@MWh^QHKQkCU=3IJ7QlIca7gh7hFkq^W?Y0`n z#g_i142d1kaq_599G_0rgpp~|8G(O%P}-hO3OZQ+Sm^{iL5q^iR< z^J+c%-hlS^G}C(%2PKL+cBww)M@7YH*+0o_`AzhQ{lg=6%Vp-ZdUT2T`Xy#l+k20g zEmxTVFCR58f7EPh-uDH6PRNgA=If40c{`|X4c#%2WwN@O>vb)sYz$XrQVEFyyceDwez7<*h>PPNYXjk z*~Rd!w8!{6mZy_9Nx~XhWfnCOUYz$~k+&gv`q4N>nDRRQ3psK^8ju>`SLeGj3Cjj8 zAnF!*^p4THpg1M#hMumFG$#fl`e3>^*+&JYKr;ks2!}%%1Cpj6EUTYBc<$)WkN!{X zQqr}*y?>?tt)avGQVdZTlEsPG7V3v*8yJ)khtG{vO1NgkQRRHiW+p|*2$Vz&htx<^ z(D+C=@0iFXpO91@$+x8Vpf=+XjRkr#z(T8myT(?30(SVQnGija+b+dNok9>zAyIDp zZwR8OLjDCoY>ss0$^EccFmxb9)4kUm|1_pFI==LVs~>$AUBmwsw&Sr?S0A_ObKcmt z`u>#|;s+ch6zX2OnBTZ0dI|%s3Fd$begE4cNMDVdf2ekm{eY7mHbjMFA z7vT#MAIoC1DKCDJr=O*md7~DCQjd~%1Jg7HHji}uBA7eLvGq#=Bk61m7zJSVDEVTp z9yE%iZliRj9Rmgj;fA>Q~OA>@7pV1J(0N`Fq5E zl>pySap3%M{b31=E;3)Y$o1BB9H$;5j02r7`HK zyKZtKEfAee{>)UlKuGCf5gyDoo_FX&unVx6QYb$k-5`@2iE%6^0;mw-BxGtV!vqiz z`8@;(_6u0InUpQ?QO*XF zJCYORO`yWSpFkF9jGML((6n{G*34xkw_K@NvdZgNiNS6o3?m3d$fN(kI*Ugd(WC5w zlBE9vSq4jQ-}C+SAIyCRh|GeGWs43PB|2qZaipSTFRGzTgOzzn@S$gQ*#2pmCWA)tdYR%Xuf zv3%J$TWC9UKTYEi6#9@!5>toYq>xdUFgdXy0OLY)b73KPW(gFdDTj8ak^e{Gxl`#TTZ`s6yalT(qPcgA#aaGl2-^a5Hyy^TEgVOP(wjI zzpkUx-X4S-?r06Si!4C^}%{IigPhs4fRM0eBhrl>zc7tVUV$ z?iXDqQ8||Fj8kU}?ps1XC_9@ZWeo5VsEBMXx^p5b>5-vV`CAZ8NRP00L#2?1M;sq6 z2t-amg>WiJO0a1Y;zVOevq1Qtbb{#AQ4|ne%>6}I9*ey!kc*ur7wuvI^>Vaz;G3-z z*X{cHlmhw9W3Q~g3b}CqqxIt3&wp>5YkYnG4No59H!n+j;m`{k`<;+HET9_w!7m){ zn97i+qLI9-MtK4oZKn%tw=oX2*`9CC#iB@t1u6HPg0VqBa0J0`Ui*u!t>^}Assu+6 zi2xNapIMP0I(v6`H=*le+2Ji_J}?wcE)0X7oye#u!3=xOHV1>d|7`X}^f$$@409o7 zP0lUsN&TmqBgY6Xw3JwGL`~DNAh#;qOs%XkE>q@6WbGzZ+Qj9a&vf z7sP$v29=H+c0YVcC5*;96aVY<(Jn&wqO@(L!R{&1LE%to8 z*;lCUbT8#?+yDjCllo?${*$e_Fr*l$sfiWv2eROY zK(zo0A!uj8G8k=&$5058r?=9fbs$5Xh6f+(J);~YgvJK`1e(-eqA!~~hH}6}h$kDu zhh$bRl%yWJ%k=(p2amoS?|OCpbvJ&YvfNtc^v`U2AGetw_+x6G)3M^G!_TNZCooyh zqSxBIS36t=ZqU)P{J9YK~jx%WASxZ|@e z&y|oFBgy~*75Mx)D#K~+3hl1Th}gqj1YI!R?P)Ye!n+@Ghz&Z;;_OQ{cqAf7WNY4V zl>L=VjIlfas*f+XO#fDo8qR-wEHs>&y(+^Zh~Q1JyX++$^JDaP1NnIEchP&C|Knr- zJx*NVi1PsWnT2G(A+{sgfkz{Q02Y5D%1QW90vm9`fkji(kvl?nA^0Q`kYEk=&fv=+ zsEc>Hh(VWPt*JkIYWUPW3{^gOE3)L+2n;lam=iPs#uh#vCI-#{Ng47gv8%zOfij3E zgH2roLJ%z>UKHv+jV8Ah%mkJR4wWPs(UY)CkpsllD_h~wowBfq{VF*6K<$7UxWG)w zb-~_uFFWTVHUxEtTTS!C2Fb!+res-GB9dBW!4g%jn9rcPxVDKG2ac@v&X1pL+V{h| zkM;+FUib5^Bhep`aIP5i#fqt`Z-4Fydn_{P#)%4ONd#OC11#}jdlS3hHNRZ@TR@0vc*@k~zBS}Zz3a54YjJhw~unwC& zE>Z~^vv(g?$|Y$mHJb*S>wzU0v2pkl8uq_BvAxeER9>7AJ!x_EOFHj*qanBeSsI&>Lf|4yy9}<-6|$&`NH!j3 zmi^I2lB~dC!1ReQ10}<>d$g+Qw+)P5r|-=g5Pc$n5A{TX<|_O;CY=06Py0pygltpQ zeS@Oe*8SONAkVo#0zxl{=_S{gvR}p0dfggFKCh+Q>M(dE8#_AvF)Go)=CCF;)=BmZ z2r9er)LBFOs$f=Rot7wk5m5knVAn&#&nW>-g{oq#A=?C90M`~r8ucOS19XgV#F&i= z=I(d)9pKDL#UwX^=s0{40LGA52*mi4gc=qR?~hNp5RwoEk#QIxSk?e=J~^#iez-gN zR17kNjiYkHjLnILG-dKeUL~M{nD4-zK%~&30IzYgGU*|96ca(>3v-tm{`i9D+Z`|r}=!>x5Q0Ju7{Q~Jq9tiU^ z;ccd6%vjp^#qPY9H1adF!rvw8sAo-8Jp;sJr#*kGu4$vHhicw4RGE{O2rL`}8!^?Q z+``%jGQvbaSs($Bl?ISbWq!5Nwp4%%CI@II<|2~Gai+?8HHI8{S%D_hB_mwiJq5*c zKQkpFu88KN9BlwtA=+a0FCZ}Eje@_zf(6YX%O@F9S&q&oZC+@4q0daNuU~lJKHH@A z$-DpX_;pwBpbx6oYU(xX!Y<#qcP^|nTA*|T{>Jt;4y%#N@@rxks3GdUs><@TD=(ql1#^8~l(T9sGwNp<72qh-L<1gt6!KB&Z0}HsS(UeP}ttDd&zF z(83&utOkmbeQc6y$%~3+mifM@Xir8WvQ_5RF>0b`spzkeoKoo7)+$@jVcdDpxX`%~ zcuR{_ZCbUD6VHgpUxErE!>CjpNHFuQEX|&X><}LhYAt#LuB1|*cH$k=q<0+8Z{Pmy z)~6G{*t_Y&V>erPHK){T=ILGSqqI4_f3xC3asOy|pmMvr!dxP*ZswqfYRT3<>ULj} zEpqq?dbO4QQvEzki`ZrX*Uw8xCJtx; zc%y7T1)hT-Llump5cCajg)sofHz6&oP6GxEjM-i65xy%>;jYV=^2Ol3d2p?SH}E#{ zjsYCVCLSIFA{7{-$s9zMLi{b(6rf6kAH1Qm6BFYV05(q%y&ftFUS>)z(oKNOa*jM) zG4XsTTv&0lCF6Xa8ITl4Y_j<(zD)ya%YS*Cd`;#Q+rn9w5cy zzT6iFt3q$m7hVU5-vV{Phzw46OE>- zmW{`>fGU`h;WJSTp@dqi zBPqjnIVbZ4*@Pn*r(vZs^dnkMk|#rJQD+lJ2S{ctl68R+xTc>-8}6c+>ugJM#V>8b zLe7^7N4lGsw`ct`h?`_>Lbuz4`l4z^;h4)1-q+6Dv9Q(lw$^b*Crc_&S3PIf7dvlL z;S7r_F}Gdz;^eNn+?iz#*|YACt}yq1Oj%*}9>`LEP18o@tuSk6dygct|22m&GC-JS zFyKJ2X9BiC)@OjXfZ0kQdVzW}!wRmew&c2i^3lY+YF`Y4LEddMOIG-U;BNHqqF$$r z01B5DdY{U&tkg2|R%#B>o{gJ5*Q;%*W0OD`U49eCigJyXha$#dkw{zu12~?XC9+1j zG1PB8cX3_&DWP0@l9R<*6v!H=3Y3dTRoZN5#h5{-=3S3Nma?ifd1Ddjp(_BS!&OY! zZiVjXLs*LPS2oUqm&1wj9dN;rOolN}gzKmunFv6w_&d0hAx3fG2C3DwUDN>XYOo=C zIid@&*K!|$(33&Jibxtar;@a2O5bJf*32j`uB~wDLyffpH#Y@`S(dbhbBv}3MfMcy%SI;qzMB0%0b5@pv z(!24dgC9-M3bwPCn}jPk%L$t#q|KaQ2{2KxKyH)&R6aVa-zcIVCH z&Tx{j>xvt0dod^i%KuKt}{K+}h+!#o@C5vfr{CO5+V-M$e7Q4GpbgLz z7%VE(r-@eFP;+h07IKX_Xrd&7_hX)eF~^O8AdBhr5F;f57fjtHh|LHJxy^Y#8wR-M zWWfVESyq>$@M9VvcjLYU>L7{S)EU$QwDE;>Z1_FYXRcY8SH#&3J%YfGb!$N&lBR+a zhs@URwF^_jHZ>qGsnD9! z@1K$=|ITjrMWd3o+miFz0(lLlgAUXtmt1Lc$m#~=$VKPz^DuXhQ$26h@>E>8Iy%Zd z-L5TCZKvZ^h}cJsvzdK*t@kPKYI~iLnb(Cb#K`OH3h>WBTO^CbxX;pDK!N0kuCpmenl%zx z;yz*P+2sK>=?>fVDPoCOYxW1d@piN${K?k&mdjnhBcVw8=egDy9l`AXvNyOtZm&ac+V2(- z`3jn0nQeUfrpaI^XjhkTerxey0qEx#Nk#VbEA6b^GA=@#HO{RW;}Y~wofppPPv#<} zwQWhB0gFr#z}P{G4M}+4r1$t$?uY@^!5rJb|`0O_bdtb<(6u)P`mh#IMS}(EoFT9&vYrSf6_9{^%f#dpNiAMZ#wThDQWqkn zxz>@eaJM7pE~Uq2Ji^>mG)9~KRGdM%!*B+rst&PPjGJ6maU*U86hcv&w#`Ln>G!M1 zP>)bOO-*_S_yRC0UP?#{7I_6!S)PmMdW=79XE{$l_Xl-`@&yTcbnLlwI)yA}M0QAJ zVmN|c66^PgqY2)l364(=KJ$e4sW{I=yC2-P?dKDR2EKgAamU^*Pul7;uR8L`-G><7 z=aQixH`EaV(D(joun4R}-V+H{#Ln(5JPuvpcOWnGO>Yc*q(HjP`XO`TCtAUbrua4Y zM3?tSm4m|@^8Q?`*}bcjxjVGybrdGP&F8vAzwB2_$}{t6#k%L`2N(N;?rHH_uG(GV zkqqrnt6sQuRHB{dv%4*jezt2YEe<$+%=cu>XVbCz#>GOtC3~)Ph-qu{#k;rcex`(F zcagB2^)In4r~255vg(8HTXt#m8$TCJSyS?(%X0pIazUx_Bdjt^U*<(6{4Xx3Ra@0f z)yX)ZirHN=x+dk!=-=(n{@vc6kVQFpSvxU7b53~XV>+9y=1V?zW#BHt%z&90`F`Rq zS>^~FNlFa~Xi!&R*JNt+n=Alk7z0r%vxpoU4+cPV9cfR2PeWDnt{0(BEGc`YP%ELI z$$B<9Uu4VSSV-#m^tc3kvSUq5*bqRp%lwjK^;F!2xa~0hGTXzJQAs#&NQ=QIOM6~P z{X{aFYoy#ZQkE63l>E6scJKX2%CPXP^6T#Qp{^wkEUC~uh^wlMj9ha z%kZ)QiUlAXct}~SnC#@5 zyFdFQzK2tvkns3nGe2*G0%9etkq&YRq(FB3KOBZJS=p(z0fM-4Q_MKfXRIL)S+JF(^ANcXYGqME}e#>+3u!I+6zj8Z6d2T(PH z7g;vfg#MXhKvyGFmu%GNpUdS}6h8rBC9ob#o_&&Rko332r%dEqg9(dC-xwlzE)Cm} zz6r*u-T)#)>@>zDvW-YgUNU8f_<(|fj2Nzg!by}t1VsX2C1UKNV8=E-g>7^6&mBU% z!CvG}su_3A)LC{sWPDePg3hoOvZPHmI)(l-MB^5USslYhTEsHsA&EPbI*YZj7DGjM zJA*Flgj7|whjeyP|M`K0E#!Mki*q)>Awf)Xb;K{9YRGHzFL@)CM&2Eg##;Ks9ltE< znMt20c#cqDS7;ti3d@8oW)GAs!6LPLOa8@~(Vg`klsHn-NF9}oZ5VW9vMfx7TL9c9 z2?L-p9AmNcJuf@d{9wE&J!Sa^ry3ktmP=ARK%5E5D3){u+E3KzZe|CpN}_wiSMApf=&3}9@pggem|79h-ZijmB+EG)p@No&>ieKUH`o~w>@`$a&B{%xQ6VuOc&n@ zfucLa)d)GZstr$I?u}_z>SyGQ)=OxDjc)djG+rF)=m->!O^glSLA!pbxwXl*T8u*- zX_IwZH3NPBT5bUeA}C8gxN2mX7S_LVNJklc!QB55dcj69 zX_`P!3WA)->6^`c^`bh!$c{6Oz-3Uu{PD?u+~2>T&(DSjdQ5+P@c#R3%eNT^ANoT>T%jHsyxh{g11}tS;t=2B&{9WD*gzO?ZXz?9B2o~2C z(HAD8eC0@WEeS5O;>+z#H~Ld}K}Ul%y<>e3+9o8Thv)Qn`yM19|7F0UU;N#+%N!Em zZ^@4z%EIFtEahw-LD3LeqK{$dVRYyqLlZHu?bihyvvJ@=q1d8iOGcY4Jb z!$I8d0sb4x2w$7+)x14c{T3@F!Tp%6>Je+DTkgn$XWb@bU=7;$k6Ep#zbY*@5h-rg zoSQY%zT>9Km0I*V+p+6x4p9lHN=NEiTl;{1fdJZcNj@s1G}AQWIKzYk79v_Ia+Qe@ zLD`Cs9HA+Fi{usoct|D@B0_WoWmwmN0tuLfNaNNL>o$YK-1p3VFLQkOZyX!Bx0-yw zk7)GJ@uOQM^Z+sg(<3byN0IOdFeDlRBu6CN(8sXFPjp43M*u4W*Z`ArgEKy0P61f} zOeedVkQ|An7HmfHWeLfIwS|=+b%LXqKBcpz!{`^Fj$<=HYln$BANB)f5GWARWLfpa z6p!K-=M-`x1_;1YSQB40OE5$?8!>HqX2S-%T4r+{`Rs+IkDXX%OIl`gMEAeC!Mn_s z@#GiB?thI-cD;W6!;U(RyXvwUjg8&@_O3u{k?#k*hhTav(H8$T?>TQ|Y{hht`g{1H zM1x%#V2elJ_5{LtrW@^uK~kpojw~0RrKy#th}>jgcJ-LDqLFc57pM;u{g&d3c!K0cby8q0q=t!qtm8wUl419hHiU#-FgVi4f7abVvJ?ePvoaDv@;U#o4esQC)-;di8aTz8L z;<@;$657u_hNuBqA`w137{tkxFnpHMSwJ`v?oN6T!!y~53VJdcaMu5?9I&x(aFNk6q%^SQMts>NEGt~2NeceQC zY?!SuM}fhksRgy%y=^R>DYU@A@hkynS3{|4ap`NcqN+7o_qSjIvMQ6O8@fQYIx~$k zr-R3WWARPH6%zPFa1&@eVz_y;2lagfG|KW|@^N4k=uE_?1z!p5#f-q7Ue*QkU5e=w zXjn=CyIJ{jGA9IyGNI})hQMdZLQZ(fF+p$uj4vg;U^uR+Yb-}3EnifFLVwZ@AxG!? zrwGb8@PFq)e&mrK*2X%c!`I&W+!Lt1|2by7m*<%H;|;J25;=JEc=V943mvNtdUG8h z{oCUbV}8BvuA`;ykVo_Pek+0J-CagMa~V1q zpIeczq1pJS)q4ES!RF59Jf9YSfo30efu4}V(>AMxBxr<=n$srNX{OYExL_3lpvtsTu|pTwdVxZI?j?W~SOkGk z(FX?2dtgw}Geb@&FX61>AnS2N&lz;=oI#aWYth{UpWi)j)J888_-e~*yMv!_rLdx; znq+5IQ6gxEY^@;X2Je@nuL>mxL^cNtV*?Ac1ej(PWg5l+#WagP6z)-Es)abGap5z( zvTp#D2F|l0pSC-_=4x*rtqVITnPZvgP>}NZdEQ6_qF(|-hWj8Of}uSoL4{YK6(VL= zvVRzGks%TNgD#%0FB3nKC`8Nx0U%<9XQ38>7(n|M7WE!?vGgs_w0mFQye7~IjMcCj zY$!e3*B$9zfPtig#>52ikLrm?K*)cpGS4ZoM))AhQ9FpZ1j2^YWyDVXaC>a$)z0-V z5~%s{0MpXtPhNZD@k1xpc)e@9=DjyO>Q6rEeNt=L`lKTtxZV9pd`=!?aUJif zF=NKGq~8qF!aYuNpmX^fBz%lCo3?F#?1?ckcGEj_`0pvlu-b?S0X-OZ#~?Q(`V|=g zVrqflLu|I#U7AX0`*4t8nglaq5V> z?`6E#(1Ec~JmfTDC?VzpCjcxRhYduFcwxlUVh%>>iGh+b4AhLYo8>s<+A`j9@=Fj* zQ92lAgfuDgAz$LZ01_k8C>1#jPXE&)o4o_Zv<<=)oF2`F~ zn(Vmp;f?#<7sMHFy>Z)vZ?ZvqZ0i+TWzxgLU5u!ptQm&8B+hEuHjof-k5Iy`ZR)nu zw9xX)ten%lp{DvdsUWYl$~9DL?fJ;PVW>;+6T7=8nTqj*o#hyY`y;Oh6dw7rdu$x6 zKWi(IE9ou=?{i2kw|jW#ecj>Zn@W~219vhFgN&l}mGC8OVO*okYk2;K7RC*xOx#&%czL6XJnq zmvd3USriQu9yS3+@wL()0K=m6V7&l=Y5WQd74e?BA$}uvb=Uz(=wmEERRUOuP>Ep` z?lWS*Iy(cVHV$tELK;L=Agr<)m@A(n&Egz!t%&qYj!aoe zwPcD3oED57nTIy5WfQk-9DM(VZ(n|iP2ABfX2++GzWKKI*F%kEf4}mh+po5m>aSmE z9og0F+vkdjE7f<_{)LgZDepl8s3B}xmoyQH_lg`r4bP#rkL*$(3{MV$D@NFTtCB`) zA@#suDzmpaqh(Pme*#kPbifQ00|@U?eRfCC+SsrAi-)!t_EvplQwgJRQm9%xe9*aO zRH0l`{)oChdB|4G0%1R!wj-VDS;Kv3q;_9)kw z2Z0d4IRG4>r07~PZg3xgmx6{=oQgPeKbZ!h7@BYQQ-r+bL(_{<(2+U}Vh7@lHxa-B z5{e5SgqmASrbW&&Dosp@#iiAg*%TY>s6&pm8Fl|nB%ut5Zbd?gSmK&o;&8q z)&Z(=sMhsSmwv2L^O*g9(_MP>0ln%0=7P(<|5C5IO^<%-I`*xr>ST&J+^u?tYn@Y0 zrVLA3T1UAS>kkH|B1Rho#bRT`QHisL1iuRZ?sKi=D7cr7-pExf`-1VoQI$mk#nxwp zq7wLm2A@ZbBVAIV&Aw+~-&%Mx955*Bc-Z)NXbD~<6b1}8Wc#AEgM?zx;K>1M;c+ED zSi%r!oD0$Quuni_G|Wd7M+D47oG~tAsY9I%4aQ#vW&+NH{$f@@n<&>cGAVXf(1}Qw z{cm;%^+P7@64`;jgj}kQ#Pk2xjV72}_S`e^iN8kgw8xr^Z+?2<_1C@ve4L}Gg=!T6)^*QA&v5?*cL3A?Y=~cQP8mWy)y2{!+H$HA`n579pemk~t-1NkN{bMBf zjDWi~$tg^_248=EXcUhy~XVb#WI-HP^CNxnqPg8 zko37>?O^enH38f#WJ^r!k6Hhub>*=Mq7k;7VLL~k^T zJuqbOhd2Z2cbSAKnrL$Aae!H&wxk1s3M53+x1nzaxu#@?aREwvKQUq;X3XRq0Q@w3 zCAKpmU59Vv5%ks@4XgpBOYmVY;3fg%XC5y@y;Ij&TU) z(%$ft8vNQ&U!8{hZJULBnpmu|`@MMI?mDxk1-PRAgt!7-+{&cPC zrgz7!_6^bUgEmJlB6WAzdU;w5^N>XdyeZtW+?r^KQ1F68vUURHIO1@wUUmkdEmM2q z)PZ#J;v<^grL_`Jc^8NYw<>L_^sdOt@G?Oj15c7;LsP?Z&OI;bl}+iE9GD$00w5}) zx0I1RmF?lORRuXZcMFd`Nr&}DK-ujoHSH0p{cKr=sfB8u? z-NLt}E%QEV9T%5TG#VR#Y)HsW`a_{s=^mJF?J&C4!)1tQaGkg}SN1LQitYO8<`UFxS(e-R{fq>xMJoPXW_HWtmf66XVYP3l`j0s`)E6L{8u=?4p`ECu{Cm&X~%88h@WOy3(7LrMB2oZgKD)=B1`()eqNPbu2?ER+q)~)nX-(08SxgAS78; zM0_f`8WenBdq{ei-?%67=1a005f0=Vinm45F?>i7IR5% zZi(iXy$Sm97=2mDzQO<0Z99?7)+X zfiFCsyH4!A^Kbj3oKQouPSGjv*yieN&33fGHXclo~NyWU_=EA zE1Ov0=2P2#Wkx=llEp<4VK?NmB)ttS-04Kho!6mFqXS5nQe*D8)-aP}PzT7h)7O-BV`FyCOT75-Nj2~Cn zI?#227?z@igHsC!kB=L!Ej%{e6jh3k50yfodZR+1+NbZ`W6@lNZ${%?`{P~Nn~Um? zThlE?R0_^HN&=n-qUi9GA>5!Fl!PTlSBNy$a*=Dykz^S(s$$k<$sSWlR)T&b(~eXY z0vv@OkTuav*C?nA*Qtd)c+;sYR(5j{C~7qbWHg$#ihHgPZ@!wQsf&Cq{?ZsM^+3J6J#9|O%d%cbz{ zp^z$}$)Yzxmd)drepYD7d<zFT6nwt_~_ohK@I%-FeIL*e;J1U-xB{#Pk+OwwanSI|Lxa5 zmQOnGpX*5$6tygn))7Q^G2s2&r~M`)>dQ93}>Ox zneTwH^XsjS^xWJkn|*3H{olni<7WvV)bzwG$p>lBJpI$%8Fi$D@VM0dk`l6&ln|z) zZJHCP2xGZcvqC#@NOR(sp0{(b{tu1KALe8JLXa z!Y0^J`dzMVfE`8>#F|Yw@EECRWdPTaqGDtN3xX#?oPfhpykvCmbmu-=BxGS@{*%ZC zp%tP%;14+yN0b!%m~^MMe&&S9wRMft#xYLEe;#(w}jtCZ) z-Ex?@0jH5;(J08e1YjpZ0wt%CmV;KDJFB9aHx#jopiZaug`EBVY|P$^$gIHbJ-z`7 zT0FpH*fRFvEiT6MO=>6$DVgh8%0^ZVW=ZCVqAv$gRqRzCngz1INzb~8bgGsCG=6t8~;9`RC<7%0HX;&F7zP=QWO|CH$8BV&2Z1O^1$Y z*}bMc$8tn**x8ZKD;^8WrQCp=qGOQUCFTqgPhnjP$@XMV2H}L*;6Vont{Hy9FufOW5!+B$B2h@IIrsg6-?r)u$Fhf&x+PIx0u2+4imx4vU-}u@ z)d46c>8tdZ5+YpAl`q@rM8sA+hUbTsW3at6qu7CX#VB}$45I=OBP56kNUv*u4tixIj#2e$t0lfUwid8c%SgUxhm{#N$fQLyk+(oz!M%PT;nWEuAm>^v|6 zPy&~b#1^s%x|uyPtgG7fOy9%(=+-Ci9NvCCUhBn3C7Q#PBC5oO7CHq}^}8zj*Hw|T z!ZQMo#)W}stPz9Y#ofV1x$;f>2L9u(?>kd`XE3=)hGZM~p>@gOj+m;udCzAKp_BG( z>bq0asY^r>!c`(7C$Tid;`GrA<=D$gj5Cbl#TY}#6+&@&&eW;KX!#9+6Zki(BOooJ z(hfe2s5D}vF_I+YMv!hIeUc1X0`BI^JJ$DcuP8#1_*geZI0M`cARK%VGHCL7Ks}n!(j$B%>lxaS zC%Fk$7cdjr5=NuZ0m0+yLP+O&%H|LD{f_QLq@%aRSTov;y?fO!+db z6XxwlG6nrcd69+mJbrSxlWKS~m>Q;W!*uvefKHNwhgJ%~H9WSTA4SkS%t!(Yf=3b7 zhw?=H)Vz(c^SJh@gAc~^3@hLK`OtOJw8YN$vcu*lppdRzA}-nt-90^Tf>hEbr8&LCWcLnI5%ZMQj)bS_bg z<8nB|SYLvOmV%+4!!COVbS$)kBo5$cdbCv;$_1Z*ab`peP$wjBo8geEv+E}W_ z$LUg+Y|m*O`Wp^1j5;kB^`3>C^IY;z&?h$`R<=v1iU?CV#KFx07rDz#LH$ThOywHm zJ$zyP8dpq6t&h}4*nY_2DX@$F3LDW9eZ+;nHk1qmF|vJ!zse6Osrg+&Uqr7{5l5D) z>2O$iN5|c;syA_bC-;>xZPJ}Q1-z|0?=(dxTP~k$F;o8@)XE;z-nwEybgqSu$LDG} zkI#h!!^}ku<9+yLD+DV`@XZQkA`z{?O*E`%ad1F$me9VDox(6&!kLFC3$V=yn=hBB zEQ&*5N1_GlP?{sgTv%Y)Vo)%*#j=sSO+qgj695-5F9O4H9Bc=M- z)lvqPL2Ll2^$fCfa%^ElIZB1AXjV3R-!;GvY+x=&$0J4wT*8bT5P~dXMEHO>2_6q| z1>OPd003J6;AbEDqeY}JgEhrpq>zQC;7 zrcZEYF)Dv)GcO9p&$RLx*<9t~eAdpe+-7bI>K~VL%JYoLor}bskYg;`orEc@Ez-*N zBfDPYXbd+s+Ad*r@ot0|ThAaj$kJ(f$ssBO=Wf1f;IiK5En78vuh}wApQ692HI58! zf83d8x!MZL+4xOFY@Z zJ>ShVthF=j<4SXHo}>%akat`^<9p3_xHD~${->m^aUL-U2JBfqBO6`@?+YEhA9IZU zoY~(w`wU%=YOO`fhiVgl^BQGm-)jS+>jze@ADA5%&qk>!eD`Mq7BUDC@dvjIY9B<} z434jsJKFrXE&Omb()B|*vN)mqm!6`ktX7*kp#|FrKWvLZBu3jJJh)>kR85kD1pZRp zvT_W`S4dJ~wjxv??uVy>lH0^Y01J5(%edpG$jG9d!nN4Q&ihmrYjt`rdPQ`2v z=Pp8>Nrs_+L7snlf+!Zr4aJm$pLGo8IwCy+=){?eNxLDI0&0r{9m-R43>l+jF^gl` zDd2`EYNqbM_|wk{1P4V$Z-lonzHx-XBp?R4D(L}uFEKMWvqXS1 zA|V{Imw?8ZzzVQZv4;@efy)Gzn-`u%UWg~~mGjl;Zjn(QtQx}S5@q4J^#gJqZi64Ccd08qf4IvkA-$c`Tn1iYVy!^gA>7zzPYJ?fmn zeKX#8`@Pp}rq^+mZ&~^13)}a}R5=3+j#2L;J2-d=DZ+Fb^I#$5c%#vxD`He2?VlDM z92+A$C@l0QS`J@v$~hFsvAZW)RjtR`+7{4nKR0HkqR*DC ziZr3Wgy1>v)ACb}CFg@c2J<7WdRGhLfC6-FMP-$Cga0*SbZP51F;!Zw(ehQbwS*M! zyp~D=g>WzVTio~B)eVCYrODncc2=slJQc3)>JTVplX1j|Au zE~~vE)A)7P@g4H-J3CBM7J5X!{D5Y8;H`!Ml_LiWmwiNY9(jJ*fb10>ZSEuMP0!}b zLkOY)W5oX@mzDRjguDV~HYh~mcz9FERO1v%?2k|tD8yJ0Lz`PzPW6{{;3$73m5+m6 zE}L1H=;n4=rox-j0MI#uCXyA1pO-x!D~${k3ET_Fu*y|e2Gcd22)Un}Pe99}-3MJ# z$O}~S5audueI$MojU{>=(IlV-E+@`KXKzI**dQCPh}`Fr1hB85xuMaZ@m0@z`f5`r zT_SuVDzLMFpIUvUg&g^E044xa5=SHlnV+!$(GYv7XoCU)VznM_L`zWwItkz+g|OeY=2#) zkN%(){h)pSgsI4Jnzs1F7uwzRy`4W(sKbpij&9 zUC38LQF|9#Imxt;G$N>O!WFovFwSD_WQu}mhl`>qMdVpd5xNWQI6)eimWVncLWAX5 z?9=2X0bhUy0gXVoRZeiPNnfX%o7Zbv3tu8q2j_|A7C2%IdA-OLsiI^9A|Vr^zPEiB zyb{J;l;!lZq-F65(Km2iVQ57K!J;6@e1d-E2oQWtgw@>VELxMgbVy$fv_C^V+Eh@e z5|$gWiUc+enLx~e0;MCKSx!HYFacp)`9OzcX>ob;X4AFG6o-*dB@cJ!$30X?0pOHk zT@mjqv={?s5kC`Kg7^(^edbco1exu|b*Qi0IcNC-txJ4lWkD*b>xV4mf~6=>iUB%F z*CFo-b2&*ygwG-&qM{@vjc}RveZ6nL_s)^7*EZkrzW;SwL%~h^ zU;1|4E3^h^S1LC@mWbi0JKR*Mj=S9n_M|)P;_~*Z+_+386l2ol3AOCRCKLhzW3DkS z5Sp#2BOaHCJ4)h5!^fz{-Gmg(@Aj@6Vp(dfn|z-8Z*JEtaD#R=Y{>YD*+qt~4rV+O z*3DJ+QRyT5Cj9?QaEl>!HDj{@0D85(H73m!|8^{N?Hx^jM?%-)^z0P7Hg@`ca|S(Q^ZxHqJD zk7bd)p=OUzafy}%?i;dI*PXQ-P~Y>Rpt(3XB?a{vuZ@ct^%+_uEKH_`0(Df4Nr z7Nq&Xq~9rrpjvKAqB=HI-DOX)sWpc zHs;FZfyYLUR<;bBo}fBnWh=OQ%2~w7xC^sWj0_k(8%}*FcJexjlrQC;QS+_3Ie8Sl zaYu7Fpj)`;#yG4%gQOf$E@25+S_qL&>42<*Y{n4Gm`gR{bIM|L@;ez|dGmR$>1-Kq zIdDjcf+dB>Imj1dNG(OU#q~k@P+{}j6mvDVU6I6Cp`oS~g~d|^qpQWa)P(GZgBihNuOs|X()3n5^yuGi`kB{y{FA1=5wqj!zwUq1HfPrM z=yNasg;68BP&H6b6UYe)Xl@y z`%bM}y_n{HEt!FK`QSuwo$<93Yn>O05X2x^zR`J^*66+MbXQnno_n&AHY)w>l&z|E z@bW~<6ws5fIN5?>eVRI&te#7Eh{jLPz*lCE3Gm%5=^nQ8;BQg+3GTawYvJX^`nDVl zmR&*7(&Mt5zDC<@a^HJ8n_PnW>srX2HbV2Ihtsj8SZ?ZBsurcY09y%Udy#|7s207- zJo_qh(UrrsX}$N>j+)#WWUU2&9y{EE7+!f-D^y15QXocJCOjPow($re(!`fk&B9&; zx-oz1l&Xa22j=%aFqh3!vg+ZWrW4%VvOhu3Hf&m6`WXohUe|ox;%UVF8`~#JDGd;i zg47kg9*Z?)n-IoI%)bz1#8D6?0>+NEtWe!I?0kgIr4&J$Gh&X={IK~B0d25MWvC3s z7VvdSnt$$hW)}{tVxKzQSxwSFfbSP-6BR$6@-mfX>#Ii+fkzE(HItV_vv80JMu#~i zgqcui=q(U?MB4Bn@T$xj2$m>`9B0nEqy zxl?V4VTo&?=}sy(8@`lPTtZD3(c~Fwt4g_3J)W+<9PS-zt7iMS>;#e>Y%D}J=G0sA zxh=hM>8|>T)5c-$NHiNQ%cDKd7zV*U5ll%T%OE!3vfQ>U-20%Fu;yN$GkaJF)Mn5y zhbWQs*Bt>iNhiIZZk;eQueZ-RQ9qwY#=9(0EzHbmZEEyi40I}%Ds`1D`Bd=wMQVm! zy_IMtQt6=*tWkQ{yvx=Z&xwAj#sf_zaaSPiZLrPM)BM`Y6pj&R8|wIaKdT zE3a5mIv69v`Eh_pk==(9BZR_Em>IT}x%ZFI%hEb>nC2oueqxEYR=>(f>jaG*x6)k} zr?0aFUEQpa4yAp~>ojTP+m@P?#o9vmtR#J*I+A3S2)LWoyMxt^IQ?gCkJ^E1iA$g4 z5O1{_I9ku(>%KiC!?%!kLVP=wNt==^IkvCVpR+hS%ag3&W8Tv;%oAo-1x=HPK^%9H z9>77P){IOH6S#-8N&meqNi8djACmSRKy}1kJ3#+D?bzWJ>Q9x)0Jv=na$7qs<*t|q zu4<#n_8AJ)HC|rchS7-H+Q&`x{l?MM9hmQ~7^UT?@Ng!+MV+Aqt48X#WdgMkZ2_pg zs71Xq(%ccTEz#`bURAS(>YHQ9JbIRXx0cs)n%UQp_O6u?z>=5Tz|?G<6>3e}sLi)b zwz9`oT|bl?tF265slSZZM^j~aG|JK4>GHup;-Jw`k6 zQj1h-c}@_w2uWI4a9Jk)wgg8m2qs!PY1@9Ctbgs{JLDw_=8uJ(X@#5NMizZbq%+7u zszl&-JMUTafOYl(YjmEIkJNciO}RgHsLvf*$+V}-Nznl0ZzXsP8*A zCt*DN#fLvimQo7~=TpfNJ;RbRqD!R z0OfL#Y|fHwYZ^IH>$ToKSH5^()hn5VNf|^QAZ3T_P`ifYuzN#t+em&YEK_;IDXi^Ty6%iUFfz11cOMb1p=VL0 z;0qELPVftg6$m4KB~k!D>k9k;vWNr(2q8ZTa@l19fmAH29z<;{No95dk0P{sjH-5P z<#~_j+Vm+2y3yU%mEQ_A(PSIwYV@<9j&-snV!W?_QvH8eI~TaN>vRAAerZCJkbDUt z&_IDE2o(_05FiAk2$W)}yE>MXa7#z|qj(Z=l=RrAWDg)(}TP%=Vb zQBopshGpk`qf(9`o~2smcEg{;%u}_~ZLF(T)fJaYhD4CttqI;8*Ai-N!NO2{An3&i z0VS_>eB3JI35djxrx(F_j!$U0pE$1ik18g0^GEu{Khr<(&Vj`sh6Oh{+%$(Etr@Ky z03NL+#U6o8D8OkVaDakfGRzWXAaMaG6T!*QGX;6Elw>g~QTPt%5R7ONYQTsPiAzHJ z1?)#;F!P%U9*|ikECVD;lI0D@L(5q9pFm~WMiEm9s4Rc{_o%Yrot={p&X#>|T>t#P z-uJ2CfBy>)`_|=qpO~e+KfdZ$ZyiLraI9>c-DZgPk(H4M6Nb(HM4*q1DvO2348L*A zmY4s{&oNqBfP9(he2jXYEepaKMHCM>%cl#|3=2>@`)tQ5hfD&hoRG6wepVqVc?F<> zvo8V!IXufmdJ>UQss_Lw9ue(J6l>L}2Pz~(&C7#lYLOa9KN#*Lyw0&`SKb~OgM$zw zl9vQ2$<8mEBgP_#a+saj*GJ}&^|YZ(zNS2wjP>Dl$h(4#$IB%bCkB7XuOA~KkL5DS zLy-SRl$2b%*((A2o_RI1mw#O@TAOtGjJ&;1OA1B3dtlO zBrlk5Y&Ia_U2hG5>- zB#7DIKt%gSyc#*J#CA)j2x|!ZpTW*ik_^DWC2oXop>v*PcjAebUS{K-o`^<0YsCqe zM!-a0#mPhGiJz7PS=5!3!O#i(vJ6_2{57UI$J+bFWQH0^BuF<*FIkQd%hNFt9p+Ua zK|q3x{T#O=6*wKGY;1T^NH#$zNAp%6r_T$u&qf^i;+sE{AWbhJ*J{kOp8L5Yon{#@ zc@UE=ayoH(OAaEa704QM3()Pb3BpdW`w+aHAnSVYn3}0@)%21z!<1uVo}~i=De*?O zCQ&z|b75Qotp-RNZruPGr!cnUz7bZAP!CacAo2J2Wn-S$PsxA+dwx}~MS^zrV`hX& z1!qySqjRztdKh+h&XcVM8B%lJd#xGK(vZPyaCEF35@i!`wjuuX*ZCLcx>HZf{L*cP zp4;S?UNiW1XhvfD4rhMLI>)71XLG&IZ~F|*Cly4-Q!h$#(;|Kv92rYT21U2tsGK$tUHtdbnWP+5L_V}BYl9wQUK9sYW-TE`)W0(hC11O+j;Ws41OeE+)QNs8vF4xL} z*J|dz3_W;5FU_d5htxj-gj{rdfGE5_)VPF)P(*N7p%>G4I?(?oL%Qxl%7u@``(iIv zOQgf1?B$(MF5!_2MoM-THymT^D!y(qy?Z%XSCoy@8!*yNO5RDa8Lv6~r940IV)c2hjq_gb;j9gsbo-nbrcT$08+3h!7MIU5c0h z-MB<~!p}`6FO;EoTtb9?Tq^R`8N`b=19XsH8u~aGo!OaPv@_fB;E{)?EY6+Tp1c0= zo5^4Qgd>=5sGZr}r)!tCO=PrUU@c=F{u*WiAW*+_QM2cXVW;!FGO^jLHMOZTEbhK> zr1`_iHOn(qxt0Qw}IX7m{Hsh&e!7}SP*w^K3jL`6qfD7-RF-2qNG`-^I9U~eW z$jlzIdP~`S$0BkDZRYImP*YaC2?SfLpU~BFRwm|(JdQ(T*c!Y^H}!~Jeo+6dOQsx+ zr}ftxIFd3kiNjIpgVfKgl^p|&49lq365h_5%#1M9sWP@#f7B+*){2GAo%*KImDy*0 zHZ&W@v!txVB1}V-I7G_|G(WSE5E4q~ZP(voIH&jG5C^}EnBXE1oa7HUk7QTk(U-`` zF?4a>LKiceI5_R67K+gUx^hdnsVV<;BoyU|PM@GHd z@f$7T{YLq$1ZuQze9&7ijz0a?$`momwD&R~qN2L)XT@a9r?zmf=2yBz;NpwT+;6+= ze%o|w$3S9&gN2X-jurM^Qi$RR^MpR5jz&5Pd(9XVb|TiHc%JD9+tYos>fPX9c?4vh ze-{N|^ZuWSf(m|=ZO$?j&HN~vukIhsdIIHCIbFxAvBGwAgI={k^c7d3T2SO)tA{Mb zK`>!f;G!cFwptiyUD?7Z7`k%HXo+mSa0b%P(!v44(%8t5U?v4}N|S=xOh*Y&i*X4~ zit1j%4w&4+X+zwrOsHT_M@q;JNr+y?yTv|VeXCgut+_$734pI|GmA~6yBGcnhZPob zNLbDTfW{z46PbVrA?QnSZV*inlbIRx(T!v=v%L|-CRdh|2H6TmK&Bz5@`@wy`5asE zk{h*F`;DNU(9?{SKX*{Aw?6egY#tm6Z;a@1Bp_hju?TesQePYm&=*J>5W$cnTTijX zsll#C8^yb)I6}f+hbIg)5V-{=D`;enON@rJt|%W$Zq~}|H*ddYeMiTlyb`JqY4`-mMnNIj%hYQ4CXx2nGg%$8 zv+r!HR$j96%70r!aN-&?9bl+9@QC{13eo?=9Ux)g$Oz(FOGZflAuq0p6PPoQ)0bY1 zh%mP4q?j^ke{mWxF$&NVeFih+BZVGnf8C0+DMEw zZvtJ82v2y$ToMjHL@9 zeWrZ{be2L_qWpvfp-KWa=02k&kQiiQ8vw0^#zas+k{^PRAR&AdqI}-WpRr^+E!piq z+IwKqYWP8x;lf|N{p*LmyWKQ%PIkdL*@heYf)GTY%PC$PhDy;8-8oVzA$sxwd=4;TdBTLP_QBI z&3}U8i#_PSMe*B$wdSMVyaI3DOs$!(^|fXJ^J{c(jez+vy{*}NG_F_0pT5{teW_Dh z7x=!T=Wkkg<==`{JBs_m%1S8=+TxXUXk!T52MQ=E@HCH{PITC}&{J(WVX7&L! zJ}8UiWgtCft{LdPdS(8bl3v9+6KatXktzz38a)NoAVe66Kz`<7NyP=V6(NC{C}&~X z8H85yptk<12Q{}$1={M1Jyffm!{9tBE!GuForFfB+laVmU@qTN@3J`&4?{FApE{Kq zmIu%vN@47+lYMYt)V;JxD50Y8bL#D48}(JJh<;vM`}jdqwBkYvF%Ql#WbXO!BiJAChJt6I--^CDkmWWzw-2nUs zV#bFVAXlEH&!r+iAfkRk3|muDjThc422=zMstrw67ONU?8{r z7y{6$ROdk@FrAA65MD9Der1@W$hpMH=S!Gr44&$$D1ve@C~b?9;r%f)b~tLBR1^ zix#y!YPM3gIYME!4?v#`Sc3Zk!>{JC!)2&Mo}0>R2${9|ZElrSOBb$!v+!=bflBPZ z%Al7KfG9|0mQ-}IqXu|0c`T(S;hZTpOUz`jbbYYYCNM_P+LHE_3EaSd5W~a32i!6B z`-3%jj7V#s*Q{fp$l7cV_buwB#<~V)&%EMbuzh^Y$7ZmyWWi z=~eBnF3b=lyrMy3tWy%16w_KSE*vAafEtBf)Jg+C$s4z;~Sz8MVT2>Ix~Bb|5b` zI47NE0=8HEa6W@)2wC~7Lkk4jE&G3(Ki}BfthQHN%FG26ISgM0yrs)y^I*qA;t~=M zDn|!Vcj{3KRZ^pPt;wIq394!>y$Co_ij(L>Z;n5k^~gKf_?@{oZ!0=T&n0due1i!b z8zh>r+&OTPaO{X#vA477us5)eatI^EsZ+7y5q^bg4VMz~3f2;$Hb7}K8EhBv-_(Bma8QBNlEYdbBtz3QIx$srDo4agu%BoQH}WfMSU)>TVl1wb={>6LCh1G;8T=^NNRM zTI<$G>g_E38hV*DQSbP23VuKD$mXf9nVBSOI?8gc|^WB-?IuiHJs$kI_xyd-1axR%K(sbve2-pjb zw-{4lb=YlrC-)NHTCM0$4%J%7grHHicRs0i1F62D!9;0xal1@x$#{(DO_Mk}(w0Ze zG_X@o23+o$hbxvpT=Cx2ykN`RqxZ}!xM!Zxnlm0F6VENG;xsM~?_>Ao_Upj_cjkdsO#R1shM9*Ts%c z`6D$Z0%gdaxZfFfw4yo-Vfe#FCbNpsZ)JIJ9G^G3OXF86B~b+(1}IEOH*8?dAU$MK z2<|Z^27&1jIm`{UpzP-Hc_q8F>W5C5w~wm6k>D0Yz`P(yT;ae)Ce(mL4NaO1dL=KR zzAj+6b$s5&U(THn>o;N&IC{V()&(~qwg!@ZB)+gNqJEz{y#eaKS&A!&g?bh;QcT2k zs5BX$2eH7>ONoqj97P=)M*&kMwU#BT?;pDqs`WnhI7s7nbQYm7oHtF&ZRBr0F?7I+0aTFi>OfamzqsCJ(&* zuohKmuZ_8oap#aK`@C{0hjX3BjF0Vzvej7A!-WsoZ#Nx}Y_Z8{H0M8aR&t(SB%)ac>ec3_t+@t3g6r5i$W1h>`dj5RT_6ZYZ&$7?-*Sy+aQ@zgq z-1Ak_XQTI#EJ4ZZL8MLm7k3K$3#zODkSqhDNT)!I4gUtp25Lh+Mu|<0i`t_BG*AN>1ELaj1ve8-M-=9UQYw&C(Rp2%ZBWlrQ6uA~ z2LqK;U-5_w2EZ9`3%e-X)f@I=K4^NNXbk4=0DpxhgdT&*<8|Y`L>EXW)W{^i7@{Py zGD$QNmsn|wVb0U16xs||Kl> zslP0Ae%pn4W535i6x>MMxwry`+vEItS^n3IvCZmHTkx=!d%AvNFelJ*zkc+|I_Hyh zjtn*B`+DPD;Q-i5a+K?fB5{YSNXr0$a*Hs>LSXmx{H@(z&j}Ye7yKk*6Fs2XQXwdp;?jUP6n3lZ#THFbTX~fR1>} zxc9+m#RZ1_R=8fWWhpR4xdTQ_n~1`Quv!_+31%Cae5RtvTdK9^Ue^7+Ix_*?>WziF z$~3K4eMx2ykYLB{PB}`_D0E3QBJ~kO6|Hp~ za+&Z40(M}4*>qXS!YN}Wll(h+Qek1}L}_RmnnGX?oynWEj)b{C6VdHrJ0 z(t`?%mCkg}NdB9wb5IFv&vdx}-8x~>3~JhhgXP3g6w>i&Wk(FxOXF%q+x9fw#jY8%cb8hn~4 zjHukhFaz`)A}ruSXZWZ1;$@vr>*iEo7;Gx+?N&diZ0pxsJX##~0Q_5=FleLk%5bxT z6Tx0JIyYNhL^8~XMz|kN5HM^R01B*viJG#f(yku-7OyiGJx>W9ftlUc*fb)s+04K-xjJ=S{hiV`1Hcg|1SpgHN)<8M^ z44KN1XieL@&SclY%fh|D$4~nTC;=1&JKvGBcPfZYuDyIxI2j^s=it48f7;uQ>D=A^=k+z8-2#PDbz^v}{1|KA%&o z&i`tSsO^Yc0H(@V;vKya5>-@hBbzeMhysD>k_bfTB{{|G)T7P~yhRuzZUnx`KEi55 z^viU=H*;MuAgs&|t%fub^59s}Fl1+)qtyhi&|IsQtbu#cuRPmLZW2+%b?WL$2RsTI zj=#Fw$(kn25!EK)7YNt_G4+KqV^eJZzDjCXBFDJ0Km^@2>p*bm;?ai^8!s~vXx@S2 zd`0?CudOIDUF86(3Pya_W_8a;`5_)oWc9SO#73){vSnZZWJnTJk&NSfu8**URH)>9 zPC8V{5(fsOLFG|TU(Ibz_%6Bbd`c#t8AK~^troQEE(VFBEG;q zqK?{dgtAQVQgB;w7DZIo2@VDug_0eIMSqWGl_?=K;Z6RI*FDX!N5tOOH>hVzic#^S zJ!uXszIWSbN2?of57T*E&GRjNO(0KzujT+%9N{2DuTpj~e22mbB8VM0E;59Hb6(8+AZrbQwSX~+yASMM*F5GOXY zAl`2@U1LO)IMdk`L)_Yei1OFt?3V;+ZAmA8c;yRyZ z{EoY-(w&MmO-xKI(#?+Mi3wjnh-w4q9C#w?e^j||DL-m9^ZB^RAs`3Tc9 zNCO1+Nz%2@1X1Migp9EV!Xdzb4~E)Jlq(Uil)7f$d0ARlV9Ol+x{{k+;o&smACj{M z@ILU85Jh3=5da!T7%Pag5LiR)E`A0;6tIE-I`#t=E-DmY7$`rkFjb_ekVRF)2g7S6 zOB)SnaPiAp_vjs(S+X3!$DvKPse})Y!F;7z)?6jre1>@aiux308sg;L`%v* zFY*+nj;78+`NBeGv>l`Ih?nNbK^g_H6>2CskX)r3Cnk2S(hJMoTrO6wU#%$B>h`}e zy@Z#D{g)}uVE#EWiEU77Vlpuy7D&QmhFC*F`?w}q zrRzI(tkO#=&hHgJ*=uBV_xfF3x2%|Gu-1FFR~awYWxOgXz&lmbvC|e{}!lvu9?9yOQCF#C>6S zX{htgpKhb5M@kMO$jQv61)EB0CkI*`-WY~8aZy3l#SV&7in|Vp94T(xJ>q{R1b6Xt z%WsXvq>@HxyN2|816$0um$hW50uX3~N!&h&iRh6yuE-4 z%o79W4?aD|%zJVcc2PMo*&^O60%dLzi1zlOsMcWzT)yt?I}?x1x~g^&Efd7iK<<-N%HxO3s;)%~qz zn0U3npm5d_c&l5n5pReh{`N4t~9fi!`qhJZCtL(*GRwl#*L%s`D z*`p^dZFNP5CIZQroqHiQ?0fX2v%P?`RC0`EB8G)^kny3xql6d-eSqaaYzoVPH^Bq4 zK&5_xTjJXC0$G`0#LyI^#i{p;B32i43uxft9|phNhA6e$zC2H}QP|Y0m&^yKio}${ zC{1)MSoV>^LA=w?il~5zBWyI}Vl~2c$JKskF}NZ(cPycH+T8F5ezNP-8}&9i*O>1% zOIgrfF{hZL6RFbhsL4|LWgB^7U8vSrSG3?r86c!6W7ONsM^C_jGj>HL`7xS`9avdQ z_5oFLtnn6+$st1Fwj2_BI7*bu28?CSTGNGgyyP~FM&l{m?)H$ZxTZ&Ugr>}jS- zV%9Mn9lITcFrO#pIBeKdEDs^UXLjYnN;T=TC5KWmlj+pw&Rc*KlR1uz=z>b%3bPGE z(?fL;&reB5#ST~@hh0cn>WT^a!i5=ZREHNp5o0GMNsbXRKo)QjoV@UNytroH**U?- zOj_R)<;fzsP&6SRcO-o!w*RRk_%k?;b9jCYY` z0S&-pS7<&8Cl;AHsWaMUF!zt7yC2f+`OA92KsbT13JP4$2kLKDe_Av^e~%rMOjWGz ztY_W_q#$AsLK#VAb=AOJu{FyOJ(}W^kFP}40Sy+6P1 z%~;%207#S?eb<_TS7^4>P{6lfpab=FPyaG)`W84($xvi3|auY>)};5sHZb zpR!uEv{;;}%2Z_h7h6Kx`$44IFNfAO*?`BFAAyAwU8-zl}e* zL0uGNK5MhjZwP*1l{t4vvKH~bMA#Na|ER9WrL53;_g~|}=TGIWmM=bs2q%)@=wmUy zh#7@4oKyk&70CCgUzc;lP#dC~ZBjW+*i6QXi$T5cOTwDJlxt&IHJ^1j_`5l_fsrJ) zt?3&9pL2Uf=K>BDssw(N8-*JY@KIc=c$>A z+^lZQHZmC2mIuBM!i*+FQqoJl8hk~!4JH^?^L|d}D|$)c`MpGfbB+pX4X{InAhAM> zU!<0$?QW=3or`TUU0!)?>V*9LclWXzf?`t@Ad2AQ;S^zSSC=2>kT~dM@)bRItI1*u z1Er|EVy(aB6@6m`WhDbuVZ+$FB_2_kU0vK+jESMPB8(MvnIw3ftHssRu3~aX$#JLC zcv=C&MXf8?Bv`nNZs6_|KR0(m)OSGB)(76n3S6XlI~-v?qj13uJsihSess;8xBiZE zjEQDdW`RYNTmmdZs2!W@)XoB9BIf;~VC@9PBORw8Kng%cvkE1`X`?MoOEbQNa zs=mADmX#JIE^!Z~N9qH<7=VnQ+(-6GP<{F6&;%U?O7{f8Osh#?z9qYBs z7-I(^ADVESI(1%NCE5x};K5$dpz`K9$ns#E_?dPai!!OUEON6~(+`I~7c-?Rj6x%L}TM-56oq2F)WcU3u;jx=%*0_r?|yaA6{PCMW@LQ*hzabqSx zN7O>MR;LW}fM6&-*zO6tARs{f#lDP_9&IA$644n<)TT(Q^Znc${EO8%GU$BT)mh^z zbXWFLxAQ6Th|Ov)g$YVJ>iLN3Uk-?l^Ni0L{D?3NbwR$1#*pekJ*K*t=0s8kNkG)P zz_}D^&MX3_Db9Q|rr@Euj*)ad{mb?|s)+$0a&BBw4d8#MDDdhctzff3y%XAhxENFn zQk;``x)4i8Uulf{)rgCt2-yTNd$hgepClssIv4`hBlQAJDu)?#0l-P)l#IuLL`ZPA z6G4iRjuBI+RU%ZviNFtS;8^Cq=)maBn#H4bok00cs=at9FvLcJs;|lju=P1i2wVq% z3wAT=7|wwgO0*{D;<_66eYDK*ft1cFUS}hfJfPlNqYB+x+}ElHpUm+UgETSe`&X-W zWC~Wm2V|Z|9l^oaUUfi3;5n`mC2pNcuUbKQ!2fBE zP$Pgjavl(T>HE1U*``Yzx2Yb)&&5d#H>j?jug+^h5W@o^Xr*xc% zM);9{>R;?&v*+a%bh*)MlS>#^mlU>(;DXHzA0rE#9wb1rG%kYHqIr9`?dAw}^vFOyI4GnbM@SU7E zfD4?|U=rXCNpq)IiG#V5`u%9$^(Lg}G-8F#E2We203l8nX2hH@oI!+xfmh zYuMduYY0QoKftHF4_LU7hMq;q=L^Aw@f-*mC%R-)(^}y$rAef$SGQ$jfTk9vCqsaS zTZ@^4=&`s%qRA$8Y3Zcb%)m6?7DMuj+Sw1z(He3m?cVZUDhArpXdOW}MJ~QYV>y+RAfC_BZK%3{e4W_DvW%KQt76 zas7jHKtV(N5|a}kU_b+c(ELx97)dhzQkg>pQq5|Q%~mH>_9M0pY%KKL)G8qvD9A2E z)5TM5UyZ6vLV37U+~CbazO4KUHfA2YwHE{p?1?I$Jr%W+x}|CbSQDQtY&3FcEKRfq z^o?hhR&wakf3sUqhTLjfN2>~YL&1f@g?kHS9;ilBpuMQ`V?+K0>)msL3$)^1+_gYE z_3ArkprqzDfS{A@4v;1jv!Oo8Q*WAUxF<033KAG^R6le(NQVXp z@3m{O$>Uw@v|JlCny((H_Mm*hNrhK7uqq#J{|=jesf&FsI^NeAaDxcgdb!(`-F_y9 zKBgTo6Ppqq%9Oh#MpNFa6g(C7v@S161SnNjPD=7zLY5rYR*X_wK^yZ;?d zAR1~J;;yS8Y}UKX1AVlkD0tCwDm*!_d2*nowS>W$K6 zIP;NCCGgnG&^)lqM!))k-Ao6lwwLxIr(@3rTy28ZCG-l7LQ}Ka)#>5G@B^#+qrOLs z%O^B*8Zh=0#Z#DjBYx#A-2j@)SO{Fz43?0zdeN{mdr*1uT;$}l>#Or>Te;U+V*C(P zNsL*2DqU*kfz>PX$}!ks_(ofq417?qe7QkOU$*pgH?9@dQB3`zz=k@qUUf%mXv(?4 zY1Tjz)Gxd$>o_erX)XsH;tpeCpVk=-6>JE(bLBGd;<%O(024uqq@dCEoyIpeH` z^)B_h?krcl&;SJECZOmJsrm)Z z$Ukp@vk@{K@BC$6+Y6e#g7Xbw zl4R(yAxtg7>4-9PC}O`4Q##Wkf*LYAGnj{K&j1^Odb&KsuBc5H2jG7}9r$)z+Akj>yeImTF% zUVQ`Vcbs5sM4aF<5kTgNu+MPwu&dH6LP^pUX$#*eE&IrYuRpp-<$Khp=eW}G_^7|x zcf)aON_k`O-s*;xU9kC-YmoJj*r5KrMfIyjn~i7-+rSdXIKj!wxdZ?OP=lW$7*Zc4j16_p z(n1%U4E%V+P5!0&`1pxNaBRjW!2NLlQC&f?!mkmd1+>Af9B?c&E=;w(5$61*jF3V| zLUVXQu&=Nd5#0eMQeUUqe8pTLZg*bPdkfT8R+|xDsC8W{5WoOu0aEdyurbjvG{@EZ z$D8T9yqlhQ@2!<9`FMF}GTn?4AB3vhn5rhJzNyB|XiNOFT z_74%^1IxnZ;Pg;{3*2NxwtYB=YA z>TfmN_~;)VnRSSN=Yid?J@(iCC5QM`4^BRAHRz}R{<|0MxkGeiXJ>rB6i7oY*m2Xi zfyrk{izGKQ)Y>E85d$-A@T^?v@oZ*Yi1V#wwdNOZa>m~!F|+v_ACde>OmEv5Q9z0Y zQ&BNeiO7kGs3H^|!op3Ais+J1ImbPPw-w{0${4lv)$uU`Tkv&>jksea+Q}^#n+Whg z!D#o64P~@Zwe=YrGOjYhg~mq5F=L^z93HZqYZINLWwq(%Vi`dr_H?gizpyyX^Nnpy zBut(a8M`>-d8omIof)ifz-P52T8T%W&G8k-^zhcS>604bjga)tYP9lKH%O3{bx4MK zNX(^oBKJD|dE}#`%Wu-nyt&R>bj_n*aXzg8BVon47bPBPbH<349EkLJj_+L2vvyrr z|5|Z9KYlz{)6&_CTpQEeU}PdZFpf(+KwVfn;iKp8F3Q z{?S6+JKg@Dbyp&O{%>?w|2Lhv&Dye0>nxkY!?WVP#b4U2n6m=wnK6P(_|KZ<&(EPN zE?8Wh8Rd3DMtK#{+p={DwT$YuQ85-@t2>B37NG#%fvN*^Hkq~#05g;^Rclj?$b98pphEke0<0sACTO2%QU^QK7 zGHKz+Nsgi9u&;khiT1%-y_XFJrbR*>AcruK*HiaFeE|szoQV>XgNZc`qlh^Ra<>7x z0GW~Sq-=t0Q#7r}P*BLw&@!+((SVEfpV?TM56OGrA&Do>#4U16)78SLQ+d;xAk!g+ zHU^nUESN}JI2$>3;MOT-C8YwP48jJ0X9=PM(Pm2m7DGmai2^!A2t>GN@DQpEj$}9= zx{_$Uz{_kj#Vc`g_H)afo*IY^PmoOy7)`f?+LZ@EQ^KP|W=k+6l?8Pz)jn89GSn_N z^K3T#t_N;=U^X=37eBf3n%|K=|4*|h9@}Ve8=krQUUsD^N9FgY|GeX7Hl@+z_+)@+ z!wznKnak@LO!rM*Bbf63MVYVz39GMpYEQ9bHAzzD{+QYDd+S@R5_GsGF`k}`Ymvf{ z=~glwQY@Nnp1}m>Nr^-FEO!9wU4Npse|QW_!8(bB-$ur0oJbu{dXOb6=?VHRU1%Q3 zAClM!_9QZtVu{HQ4=`8dSR4c~6Pa1ywVQ{?N;L1YRnI6nL8;wt)UJ&SZR|HnqLSCLsVVn4#&DLKuej3e@d4w3Xiyh7-`$VC%dI}J9XvQRQcboH@;D?}V5 z;!hpDdM%&K=Ex9Y_%#UOcqn0kX-W`7b9RF-5& z!A7P)tY=kbM_Fy?SuOv%!_Zx0n8`K2oNLw$W&F{;d$2sc=(E&-J1=6e{m9t%=h;9A z7((hoVIhtox?`9C6xFCq2xVgrp?_k%z@^dXksSeFkMW-Aj_8AFZCP!=iSQEQ0tYb% zc|c=?IV2X?xO?vO5&W+*{RAceC>&sd;!|`8Kum}{cs{NRNQF2@8GIt*Q9d(bQW3ZU z#AOC-Q-VXUaeq@E0;51zLD>S0QO1c568sLTl;mbmAyE;^ibp`o?$gN1Em$J+3A)Bd z##3nn9|bfeBl)QA!T7M4^3I7TMa+F=zGU}e%JlsnOlCykM7iGr4cOzq>%bNL~$<3$sY37Sb-er zEJSP*9HWyzu+)?@42Az2PWOeUzT~K^GQ4B96dqkFWx%?r$gG)J-Ksj)o0<7Ab)p-k z5{r>xp7yKL!vjp5Cd(x=v}k2yd^B}lYS^=4crZ#3b;^nddo*D`z?qyDdo}jLG!E`} zJ7l(dtiR|yd(kz@#?ti^>XwTjJ$4B$ZSmw*>_tUdZD+@A(OcixkXhynor$e*0xmNYyW*bjxq7{NXpYnWGP|@ zvEIU-g9!7%!DwRvqSAvi05=9!01j*?$r|8dcyQx=_S5#T8OR6t*dD`-L2WcBt-yD_ z-MB+eQPZD|hR?mcNN*c5nRn<*X8c&4U;gu)7H7`PT=nz0>d_Ciw);L*Pg=A^sU!Be z4$-MdR0cVlFup89BAi9H7Evk3bejw(L1jTXpCma;SbMmC4h&!~nP!ZHv=N6ZcNyxn zm@xK@+|KZka59wAiH#qargy=vL9oMtlY0a!fJaUcG}JVx$-ok5*oX+DeWR_U@03Ny zdf>o8@4&$&GAZFm1;P_=F}<2NUqL`w2XJ$=rVwBN26W!gOYAH7Bw-L_qElTx_kB^O zZt3zB^yR*qe*P$ZJ2_IrE|Eo)5b&yZvv^W&Ee_KC}0ZtB6@$=lyZb zEY`_zMMUof2W%ToU(>U0si*U_?rvh!_$b))bcaj3}hhVaW%;%Br$m4OqFD$HQR4c$w$4 zj>%H8!`&-RqdaZvp3E($EKZP9gyjB(pFVGKcgiEB?;! z+SXB<7d@V*e|KiBrmq$8e6O{r*J`wyyYDj|U9H!yezkSZ!uzw0SLrkLdMz-==U#r2 zec9RgnoesVNu6l+Kcs?xun2Rgs~ASPd+c`7MvEc zK|B;9hfqsgk0b?71YBVWz*&>Ch1L`8I*M>1e}(77xhXdS7%jEZs_*4ZKMwp1N5@8r zqz>+bgAf@QLLQKEfoEV~5qnT-!JL4_1<_?O(G+R zy#Nd?((eqmAVUX}SQL)A@_eer*7e{U(uEx)D)X*OzxjLV!e%D(oRfLBUGz}}lX>Qw zZn)C3@K_ru7!?Jl;i-RQ7h^A zWjwQ~-H~yGSibHcad~G%sfg#?+BVY~Ev(9lNcmkpUyF1er{mB}noepy-rH!CSruxljl_MfCL8VP^h3kr zQ-L#6WLHHzl9{n=&FWP>8<%>PbSzo6)auP*Law$|J>x3)k;_HH$2cwZLmQY!>8Pt< z#9j=sg^n%l(btmJC^;u@UvC_2!7q!J3qL2u5o?jwAa~y<o4GKHug(jzw6frZS?2ic91iAzN=OVcd;$j1x#1N-IU?8q2%MDEbdRV~ zq39tK30e0;&4H9tHKCf}u0*DU;f$LM%%6Y8oga#Ce8@I^n#>z!&2Vc6WoHz$Q^s9l z1b{vj@c|k+Y#=}&!bjkBgPuWc&J$sMAWZ<<9CtdqDZ3~)ET$#Oosjbb=l1t%kd4Bg z^TbpTQ*)=+AUl=-1Q;?37?wequM;T2m&88b&6czpK{g&l1O_j(H`=1o<|3n*6Io`HwG-G0@WN$IUAOIHP*0u|coma`& z%+S+UF*dV$k4;<7Epd1))%+L-vaqIz;)sA2$sxoNq4glHqJx2gqXQ)^m;Un$y`oN>M}7KCrHQZ3N|V+Ss5C`RGMU^N z&t1kgQOrq9-$>Z_EAxQu+2PUkh}8&_4iXJoQZ+Z{2BfZFw#Xc52Pr6->PLUa=n~}Y zp!MWL5(SBNLk-R|vpsEWH%)(mR|{_mMS-1yZ9(Egkj5|}kqizFcgPod4X6|`7jYW^ zQc1#P1P2ZY15#(>?8YbVIHE$@B>OPYW|r zY2(zMa=WuQoWkX8czKRJXQNj$TCM6{;#NKCPv!RN;_&Qzrm(uZ6p8MT`c|&GBwNo> zzscS2bfhIcnwh|+YW(IEB+dS~LO>rzlh8A%Us@~4#v|5_D4l+fekEC~pX7G`s`TjX zmgY2b9Sv>cbF{18OUy6X#L&*_t@*ZcVq|L1(iT1SYfe5QL=K=Y!a{)JmxdY^3r#zJ zVby{)6MBlxkIO=TBqDe9iwYaSeZ5+iGr*GP2UxV|C$YM4^@$$ATZG9mcy`~M>Fo%= zQH)`11u>)-pqQkrr?UXpq?v&uqBDRCF%bVLY&kAfVa%E--N+J0h9ZdsXtd$+P(YGw z08Ekb)IykGCgL^_VHaQseF#S&c1OxC38_PICd(bSUdvyKUhbRYIPk4Up0_xA&8`Oz zKfV3-XJ`864EFIk8AJW&_GJoxF63s9+Bo0Hp!JiB%!bDuL+B~2)@9vmJ*m>`^RYF{ zycpAxvc95qsiE1FDRyYau?&-EMF>+$H6Q1m9Y~l|-?hW#K5uuW&_xefEk1(jogZRd zv|J7A@P#V=_?6lDm>sRi_so&Ii$T436^ViTTpLe#Lh29`4r12C@}8RkexjYW@kP z8f_TgtX1zarsBCT+hkyuy0WtI%rfNX!JFq*|8*jsj*t-4i6=K_Y@QIzLuF*WN{b#i$i~B{Tq74D;0O0iN%+V z4~UXulj9KwJIP)Q_bc!^Oh0j9@+5!Kw8{lSGKli$2Njy9?Bo+pJvn%sD1n?)^UUKT zHO?A`Ba`WM=X-^Far{=aX(?xawoQ^F)gm`6WnW3bfQS5N>*?N`yd&ap$5v0tvh<7U z+-&cf#@iwR(@N(l3tzh6)C*a%cVsVpO~0Vy>e^e)>Hdh*S8FDATi<8KVXvOA;3ImM zl&o6k9~}Abc+Uw=Ic9Fmn*BN(+`(Knw@j;i9{!m2}}CA z-;EZGxwA>-UFb#yB-6R8_f~7Zk{x?caWfn%S*T67_3}8vu>tGhZ$^<#wSZ)i4GC8a z#yhsAI!uXCY65l+8a|LiigP@h@Oy}0epf(<7ib<;H>4}jPjj^7YG2T-rS|(;g63RT zM(l9Bd3pt^9Y9RvNHlaD5b!+&1!2`kf{Ab+s~_+rSShY2x<6bR=rz#plYI%c2@=VC zF18)=0HM5_wDl|JwL`GOe!$;}>pfj>JIwS$>(yy2Gw`IXZu>3qY{ zxuA@bSgr7yLC@5_c@A+u1=OF-xX|iw#NrT=xzKE>T$6Zv^&1YyCo}IpZB^|hj@z&N z_6K`rt}7|MuEc!v2)KjT<8q>qhl3WR4DJyx%R#xS+-ac$OP#zu#ghyjrBs_3|ZEAxS zdGeCp1Yya@!MniSyKQ*1TmxOBJN&>Cf}`+A*!h6mpTv z2lX-ek(SDx;Xqw8PqQl5JR`{xFImiqxX*clbKyPJU_wm$v$Tp)X^v*yO=sAM_y};kG!-Q(L0DHhE&>V;<)|r*>A!Jgqj+ zfQj=g&*>d&R`skpxy3W(^K~pjup5R*8cZsq+#=7tme6A*`s__NXP*;IAV zd{$+ZFQ!#CgwFD;WG^Km@5mBox3t`=S|=)Nt2~2oAgA~si3y_^eQaSSF_wN(?(xKM zRCvZy!%JVXfeS_TnPnb~H+AQF`XZwv?4l!{!O;;5V>Si`Vg%(BSMq6~G0tdla-9u* zFlBw7)Vd|B!&~EH7EAdSazutl!;G%fYt#kVaOGDOIXs)U42IRf0upkp?VKC2F+|^~ zWOyVtz^$U^8j*C8>HdME9Yo&4hzq};j*0BU{;;XmIFzz4H%Aibd|u(`R!<*jOh{kl zKp&C_7wxs$K)pBT+D1lrt+CN{Lc2Bn)AD~f4_m9N-NBDLu92*%2**}+?yga|% zBlB52k@2zq@aRaaFP+wNZDB%eREO2GnPoFR#6NrQPED(wImU^k_tlyx@0c`Cj)_Rv zc@l$QV=7qWkMK*RUee!jFMFH#Tzm9vi5CH8ULCZM1#x86Nr)S)i)eg_c!VzH6tm|%=qYFc=KSouGKs?HmK)y%ro=criO|# z8!nWH>7XM;K+dRcb&n?cKqkZ9`=~f8?Z)_^>D!@AnIX30Lj&M@iFnJ>1KL@>pBc_+ z^q=KnoMwxs-qYLD)#4F>mS+i|Q_mY(^5SO`$J;0!#?&qT5qOojr`}iR_xWo<&U}f1 zCf`IT7-r`4admB*G{-Uus;z3XRn5&l(BfIv>**aGUXcQw_jIiwdeL)oQw{T3Ri%6KO4=mFhfwMdwe~Yda=AH>Cg4Mp!LtlWu2O$NrLd-ILQ?9NT1FlG@ ze2yBraIAQ0@-w8lWER?d4tVNloDcGm6dwdB%m#?jd5 zZKl8;O>630trgRh`+1s^a!=)ych(lVc#38}V_&}WZlFhN+n{4HA=x&`&umVZCl|S5 zNgh5C__1b0h;pH(g-lIaz;dCzSqnQKwmTAG?i=-x)jLmX(03b|L9kqFyG5sOBNisc zqmX1CR)r2*7&glLx~nP1!$pIOway!KW2o`xmvD`bT;|?H2~Buv#~0ZbXwaentC*u; zEE^I>uIYTeu=7eQv*Mn+(&~G?@aXN;&fBYv*0nPS)xv{ny8GCG(*LJCEKGZxw*NN} zYjg0wBXGgk+oXQhJU}HvwS^ZEr3O3)!dA@Eu$!XAQbh~y#ZO70fjkY- z1!f`{{6I|;k)dvAH$fm!KWCiDBIaBZm{+y1;;H|r@^HMjNB`p$IwS9CpFriY<)TPI z93lgAK)gXcD3fS>N$vpWriQDhE#?PEv!H^4rwLKvfaaixL)Hb?P|Nk>$=GGDs1^H=t#ak}(My-zfR2 zC>bSy1JjWhe<+O_{Qt<{T=wD3hh`a^d;WCKr62us2Iqf1T;Sjx`*>KHDYft2CtjC_ zjf*t`M?4eX6ETDmkqjdvIbov$5+bL_tWQw%(NTuyuqpZszt8&nFWuTGeS7x_+H^Mz z?qHN*n8eS(8hJ85;`fJBDY%w!GA2jku}75LU`8Q|-a`PyG0^mAV%xCX@BIl&XM=B+ z%w++{$Wc?MQWBDny^O*q+v#!sy%eJPn=+~1A9(VcpgnE9+M8?Pk|JA}4xH}a78wBt zq7i_mlN>VeV}?_}{mX|d(y3l56Q6=A5Xyx=Koi9|#z0RvN&r8AA9jxbJR;2p$U4q7AOutr$nX)s z0-6F0z)K>)7Z*Hx^M4^qAgBssAcZz1J)vy0G2DN|l;CngpUHDGArwLeiU&Wu$US*> zF}D(ky8Yj#zs!O~F;2n^LQHCD2@MBqB+!LDRvfs1B{(9b?E!tFnF3d%z$crbp8B5) zQm}4#5H5UKV(7d91Kc#mV-yd_=?KH3b3pBdC?AC_%6yI-w0-qhRO^o+jTvh5{`=ql z7pvh~#^X+1@zuMgnW1bb>(mT~exx5X?7RKCW0>QMKiK)}Ltu^#BZC}(b&R$h9^!^R zHbf%R`5WGwn=BEj0cEeJC(q;Iod_Ui7;2!cfX_Ul}0Wx87b{#On&8Fw`DIT(vp2@d1bB0dTG za{@Uc1RZis97dwXMq$j6jaQ|)odKrlsgr%DYTEKi^WoFss}KxfctmkPjuDFii=<@m zM!^LAIMm2HfCNXgO0(DAN4YO5keNNE^?OW&Fn!S|cxR_x!R3D0I!ANVO_&W==rh~& zs%=I?^w2#zQETUacn{1{{OPy!Holde_4DkkXXj4lXzi(wwDhytyDRK-UE%(%h!9}# zM|!c_lHA@*&Ktcq-U7t0sLs)GO4`NAZWO=p9CegK`%qV6@4_ofBgkop{9FQUL8H+K zVq+5fu0)b(n(5}zTQ!#01-EKU_*v77QXToR#OW~O2(L^CeMJNL&OoJ*Mz|MI`4Of| zhzwb?NH4XH7p22|yqu>LiiCi{+a|01?WO zA|4gAkZ7=hnSmP+@DL+Mz%f8w18QlO6*wvx3xIlTecCc+DDq@BhB#YumQE&A~S=pkNWirzwFpaedc--U-w+EnJ@n1_CIN%!1bnx z#l)P5Xqwpy#3^y}z;ZV<5ov=WopH5H)qtFZVBy4vcWXr03ZcUa;b0pnBmc(c*w3Kk=nZpi)tb4Feeis1SW23gZSOI0H*4aT=HxDLB|lBaA=* z`6qY~I+MEtA^_{(hs&p>?Wxq+_-oUaAXh=kkQ!zfk}Qh&qUD=-jBkbKm`(okitj~7x7A{ zjF}Sg)w5sLjp>1@1TI&e0dqToT1YT}GsqNy#puALEF!KVW6mO(1(l3}%o!$6LH@F3 zg~()=oQdYnk;LqT^US~>2%Zrc(I70bXNlp1tluuyY@(R6ge~JP0p1y_ zc&@!%1}%&xnCFoAlild^s5fln`of6B`djWI{ju{dO<(GoRWVqeA0acC0R}QHX;zLC z-G=UgBdd!+MDgw$?b;IcsJ-e*dJeP$Ep2m;-f45*X=@ASby^HFZ#flTPi3F>@Gn2s ze)(^PmiDr??Kea*bNnk=E|^xv2sBTqNKvL#DP;a#vuk zB_0YMo5G*%m?alUs^!i%(a58;7WWd!rvxM6-(#FNGg8Ds35S-+a+E_4$G2ojkMY-4woP@8ro!#ZdXbTbOVieuQ%Dx=Z-+O0INgn z1`PqqZ3LgRE23Q?oF$^}vt!?o*d)?M_;g|=w*~W6@HWldAB}Y8t5s#P91x&^jN;=! zX)Zx4ByCBY8d}m!=nR8l$m`@+xdMUajSpr` zA7ZhiNR?1E;kx`#ZXY$alt26TR)^uNsN0ifRPY!pl z=r}oiYPe%vcwI-&3WN-iKEn-;^r%+~xYJQmU^#K{b3LnHxFp1g4%(HG6krQd)?t&U z@*i{;lV@ty$(yL~78uigMH8-e=8TL-$IyoYypeSZ!%|J^0Fxp65|jPL)k8k4fhf>S zXKP!XFBR6vkWwBvBPvqY71HknU>T&|1^!pe+#-{qRSGUX%cdQ3fA*bXTq`gV!% zoYiyN7w>;jb>|PP?O4}gUA5%YDxYWVI5H(szzy}apxyA)=Kih9RuYM~$e_z`Vj>Z% zCc!c+qS?jC$Ur(#WeM>XEGH8Enp?_9it&V{*z2k|y`@BZ#JZN+)B~0Y2h%s~=^T!n z=K)+A9ZvVlH91BriBi7Lv!%h~eGb4=v=wn{XZPEBIqO~q)QqO$1-InuTXuh?oR^)9 zfL{Bo&bNySLS|!pxG1ln95Vp(L`YK}bG~Z!p3(Syp?{V*Tg}E5!&P20Bi(YpqW@(1 zfI+`hi#b0(zwWzU6N>nGk~LdU`}&$*LuMAW#aFkqBG!GK+1NE4*f&QunSFCwZZz9g zqR1;+Wp4jZ(*e})N*tN#0ID@x=C>2b@y}-bwye<(A5)e6`ykT)UJzfs**$aAyymEx z()abbf`go@*Pbfcsoi?;z08n-%XIH$GGt&<4_=n({toH2hordpNN>AUpSi&D^9w8s zPb<*McArvoT7mNc2PsgQqJjh*_$NG})Z)#=`7(@lSI!JCmWED?{nHgG}3<%=r*bpdq5uX#B2$zQ4h)JC|VWT47dZxG> z`2tk}62!PFv>;*O=x^kdg{(p$Pbehd2F4{wa2i%}mDt!xF(EyT30D*&B4*&&r*uNm zf=MR96N~JC#v?#uNh(xR1i33SNZrVyMSZXGES0375U5xmLn=6Phq>$yv+d>Euls(% zYP0LU2VUQCbjQruIYnpZbUpaOu>lo#U3lcDGN9rDbyI2CH_TJFEDNt$x_0Th@T%3} zrE8aTtmr@^_|y@o%*nF~%L=~4475W9pL7bx-93)1T0NMvjF!!_JTf$nJd3eGb@f8R zoS4(j%3{Z0X_js3?O3;D*;%W0YyH;QAm!PCNfg>5Hs=7A1bg+c*)hyNMbtwD{xX@W zw49xWL9i<(|7b>k{_X`0m0Rko7?kG`X?bEu4?0gMF*DKI`A75A#g5FFDD|MZAkQI# zeOCSrZP(CnIDrF4ecoa$v#Jjl*u)T{mjyzazR5h6_7&7R^jkp)^lVxY)xE>C+ZSFGqtSDoll*3yAEXiJ)JjWXdWW85JQoY6?0F-9z@+)@|C zJEB^BT3D-c>p4I}Z~-H<#y(Fsy)`-_nWofYoqEH#sM@kuJ!f|D_UR(^$BSvP(-ieJ z1V@{m(Q}CM|DAVkahmU9C5jdFnr+o^>!#z2-m=&#SDaR5dC%w}%&o-8-0;trx>CqS zhKEc}TP{r`ki=1hH-ps>)mwWA806Xf>7^vkX<*k!$1i)^wL^xjpKh zTt{a1LD8jFGjG^_mCfg=UZgJ+34mvDaHXL#+_iLl&yuAlvhnIWjh@9-?-W;Z{)Ay{ zoL{gy!pS(M+9nMH(L#Qw>cqO0+UchIUBek$kot=itN?dU?OxSqDLvVW#JFHVcER5) zHA1x{P#~`!8rY_9xAB@Ibt{%On@*_&^+L-IZ|3Ro4%N-~ZVPcA@b!1qsqX1!cD-71!I%t9+f_YgG>nS++e<_a>3eyQ*}f=lq~W_l1nVU8)CuUXCz- zqjmj8YtU$XrWFAs7GLC}xa$BnvAiN-CfSS3H$lKCuMq{KA#6vpfn|c5U0L#5&b0n7 z3}c+xWS#x`(I%@AMBsA0>T;r+U;Vxw3|pl`drGf*O0@qU>9apqw8_e-?w{tNh13GU zO;(w~9(E??fhMC!CA$e-3z<+5!n}5NFydY5gb?;KR)t14vaufpvjo9W0a3A$st*3k z`9NA}FrdZp5b$HbTlIsWvzu zu$Qvq5s-rV2Jc2Dqz(690%aV1JtlQL79vdr(*|Av|B~=2#8Uz@lw7-fBwMzA>Gr?? z2rwc>CQbzl1Wb{IR*TaW%mXhm6Ki3~h4n@oCd4oO4tqVUh3Uubw*`7-IeGMW&Sa**I$?Pl@#BYYnxt!f|o=lL;_rUGEz~}Avi^9 zfJ_PVg-osIsNuZg>>@xh<7B2QjuGRG#ElDwR1w^r#*6SaglF*Z+WK4@QbQ_V?ogs~ zkS7wBWj{w|FjEbqMIa3ckW;1<@v#4=v-5$Cvb^K?`@X!_Ud#3IIL}J|xdX4YuyW9A zdu>lCrR^FDrMa~f(BSZTe{7|+*j{f)aH9rLx|9?&Ou-Q#&_Tl%QBfquS%pc)bS5TK zNsN?$&VNwdmTe5XeSUW^9l9)=Vq>bbT<>|G=lOkq-|y#FUZ!l*RhFv`H3v`~*$AW^ z8MWmtQ|mps_$u)xl0$tnZwP}HKQ+P@6pa{p0qQYzf;q6T+L>Xk%1-_kiq+e;S?opv~EvA5qCIkgYRdM6LQm(&nx#AJNP?`mBWR-b08@|&$Z zfK2;xVG{1Tw-2KZWA(K8`a!GlX3Fm?wX@u==y~=?WU@%w3t_gizTwcQN+##+UM za3&KzfJ8}(QABh089v*qZn3E!I;~8TQ&+68|NmB)`JpxQ?i^wnQPq@VZER_{>F4#& zJN$d}HyHuPT0nYcjsj#xq$(_u+z3ih97xOINx{&81i}=NuU@}n{`8r_Y2yw5Hur?# zdOaLx*UdeV&;l!19xMFe-!}I|q~5Bk$!QaJ<}}}#V=~diq8D3ai7vOm0nmKBn&8HFL_c? zIjIBl4r&W~0=gFJUn`S7ci*Os*9IgXjx$$ARhDx7V}s&hqj3QMkJunjH`r=|U~~z% z^U+uXUru72SA{$&C^(cBb>Hkl2;Vs6sg^5!d55*S_+brC$b@yPTRR(IH-ZL4IgWzN z#=y)nx+7wvM|5Q`1DHl98Q(J-2f0Kns(^Xukd><8JbQ@9dbwI(mW$~LAr!u4z^Qqk z9_FVsFszL)Z|CLn$0wRa|BUM;yBUN;iB%zMkvb!3!tD@wDu`+d9SH&)mr717Ab51= zgak+1kUxiTFfn_fdCwnd~J3%I)C`AB{T=Vl&nK>`P@K!qs9PF z0oCs;&>B$_c;DgN5TfTilG(0|bicJ4FDO1<{(>quTX-7Y1aZ>=e(LyP{^;#3Qt~Nw>B& zS@gWBeC<>KmK(){&@T}uAV#u&8Gyhtz^8>A0IUXS58wz+VuU`iwyF-BQ+i3zGC!ST zqkw=;WVOQ^X2konLappM7IBlM{-Cypx1PQ)6TILihwH#+O7ERXgngG;)svZNC2u+ z!6e@b-i>1IzQ0<$8?S!-YEUt7ITJo|q5T1ySe(-+1Y!K;#S*1tWE^{N7-+`^>3?AQ zf)|E2Q^V5e3;Rep_gyF$yu+koh^zte2oyUhB++`+E~}FQn~lmllA0WP5I7UJeV&V;JB7 zRi3uiwaG+2ltQS`GUAgRW-Mf9Go2IkgeQ&sgk2C$=i>qPVY*6mIEDL#w}v(@T~(5I z>`%bVWz!}EtSmRiSkPEWeY8|T1eb6CF+Y-Lp8ti1r#Lv|q}RZ?@`LU9cI*dmhJIYN zo?b8;G}0w*2N_BuSX}Wv@;3rKE30Y14|fO z9UCm&J;X|W2R&;J5OjZbOyDrSYRS{lsK<{ieiLK(tuK$K8-Fm24WFFYdih1B9S!AY z4&|FW58QIB(t7mWe#f!Ox+Q%weYsJRu3T?gnM#qlH#_^rTQgtBvlk}U*KWL&PR=j8 z#Ema}Fv}iswbr*}Ka=IceTgvv|DDp(rr)2R3vwdCn$YKDtfQoLwC9>s)2Z{)9u^OJ zt`nq%$2A{YsU15a4sGS8n0_|LF1M+mdU%SxD7|Hw>!UWch_Pzq(Dmi&g&cs)w#L@m zdeuEeOqC0(qdA#Va>EQ77R?QPO*|EAf^8jgMMEiTQ0@6DHP3OnNN;p}m_H-iO^cYe ztK`))_A#(b4KUj)>{5plHvAb5^+bWgR$y6M$hW=*xo9qPx7H$=ovAfy%Cf$=oq3r^YeDMmbcJ6|bcO${SKYvDPWQzkx1-3-ERKVzR^CwERQXfynhL&*s0(*9 zE=}ns)Y$$h68=N#M?#`#;>J(Jm7{pH1oAGZg1w*BakZzgkwg>%6Ybpc3bm^;7o7-m za9Nw@N2!{(DULWF3fG*@$~csk19GM)!%1)&n}T|=6tJ?3s~9Z-LS z*H$w2fQpvI*r2300oRTz8$lsD)>Ivg)}-7l4mRJF`c~hQw_k5YN;&U7cI%&9y6h2c zGMbkQ8sC&&prGiaca&^efH9U-Cs}YrIGJRLR8Q20HZExn9!hu-A-*jhC;~)q{%A*X z2vEK7aM7%jQj-=nJv(|Iv<$ia@no{w3T2zLrD3S~ovx)FJ>3TJE@>7rQ~=rqx%n`i zVE}BULe!CV201mkewBo(qp>xf1baZ9OHbb_JLw@h>aeWHz;oQx9Vv z7RZd`x{45!m@b}}mX5tBt8f8%qK=r@i)oXsqEs^Wr%W_yc}+v<>Et6YMq|o|N-N-+ zbLsX3cH-)UbAA7Ur1OHwe^e+KOHp<@EG6xHaj|R&k)peMwOEwfX_*0_>51vi@di=; zae=@H2iyy7fzzKO7p64@4Y^(E4uN(cw851U%0RN4#;dyf8@8ocpA%6F@BUo3uZwhb z#JzviP0%Z?+ML8pEn*z)7-c%AUBVSo>D_rruSkZh617pf36SM33`qm}#8aB(Dbt95 zd{L`lVEI-S3bzi51nyjX zN19!f@*Fm_s5tP*t#Zs#aqyJz(8;+45{5Jr!AZ%z$pw)+Z!!L^AYLVQG@d~pPYWbf zp;6wC(Uw(N-#@z$uP!`<|J_NH*5Ey2YW^=KTN+$M@T^t6rb&;aS+8m4 zkoY27{6zB-exhyqjBVm4I+^qnolKk@m-VXsGT80rlv#g&WwH6*4W-rBph5{(-Ik26 z8;h)glE|o&>4N{&FGQ(~IE8rtCj@ntOpzig%UELS3|cNSAqmVaOSIx!R`%$u@eA-3 zq*%dnjW?Ix1s7+A5@X#%D!>>ZBA^r`T)kxInamC2x-G+&*eq#6kw}6t$TCNEBq0x2 zf;tY?8-O3NLSWBA1!S%zk0b>Y9uMis;+rE1Q!2~^cS|g?hW@8(v4)_k_FA7ZCB=*ChOP=guA{*8a9>Y9f9@3Le3BKCNC=2mldV%eyD|>pV>O28 zXq4k5!cvFH)ukfo_P&U5J{3KS?k#MNh__Lz5cQkZB>(ir=y8sErt*nCkRSNrbA`4* zx1sywNGu)TN0Qky*dyI}WBI;7K(TAsK+)1rIDzCX0V_&W4QaMr65e&IVh# zzEL+dPt7Bmb7g3CI2li?4kU)e_+T^2_y^b@1lia>k!Z*jR4~95x~LZ&Ti^|nH-=mb mNCKlABU%WlN28+`sNV4W0S%!j" -> "ro[lr] , 1" for yasm compatibility -PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/' - -C2GOASM=c2goasm -CC=clang-11 -C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \ - -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib -ASM_FLAGS_AVX2=-mavx2 -mfma -ASM_FLAGS_SSE4=-msse4 -ASM_FLAGS_BMI2=-mbmi2 -ASM_FLAGS_POPCNT=-mpopcnt - -C_FLAGS_NEON=-O3 -fvectorize -mllvm -force-vector-width=16 -fno-asynchronous-unwind-tables -mno-red-zone -mstackrealign -fno-exceptions \ - -fno-rtti -fno-builtin -ffast-math -fno-jump-tables -I_lib - -GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') -ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') - -.PHONEY: assembly - -INTEL_SOURCES := \ - bitmap_ops_avx2_amd64.s bitmap_ops_sse4_amd64.s - -# -# ARROW-15336: DO NOT add the assembly target for Arm64 (ARM_SOURCES) until c2goasm added the Arm64 support. -# min_max_neon_arm64.s was generated by asm2plan9s. -# And manually formatted it as the Arm64 Plan9. -# - -assembly: $(INTEL_SOURCES) - -_lib/bitmap_ops_avx2_amd64.s: _lib/bitmap_ops.c - $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/bitmap_ops_sse4_amd64.s: _lib/bitmap_ops.c - $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -bitmap_ops_avx2_amd64.s: _lib/bitmap_ops_avx2_amd64.s - $(C2GOASM) -a -f $^ $@ - -bitmap_ops_sse4_amd64.s: _lib/bitmap_ops_sse4_amd64.s - $(C2GOASM) -a -f $^ $@ - -clean: - rm -f $(INTEL_SOURCES) - rm -f $(addprefix _lib/,$(INTEL_SOURCES)) diff --git a/go/arrow/bitutil/_lib/bitmap_ops.c b/go/arrow/bitutil/_lib/bitmap_ops.c deleted file mode 100644 index f48b4d4d821cb..0000000000000 --- a/go/arrow/bitutil/_lib/bitmap_ops.c +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "../../../internal/utils/_lib/arch.h" -#include - -// like elsewhere in this repo, this .c file gets compiled into optimized -// assembly and then converted to go plan9 assembly via c2goasm so we can -// call these functions. see the Makefile in the parent directory. - -void FULL_NAME(bitmap_aligned_and)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { - for (int64_t i = 0; i < nbytes; ++i) { - out[i] = left[i] & right[i]; - } -} - -void FULL_NAME(bitmap_aligned_or)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { - for (int64_t i = 0; i < nbytes; ++i) { - out[i] = left[i] | right[i]; - } -} - -void FULL_NAME(bitmap_aligned_and_not)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { - for (int64_t i = 0; i < nbytes; ++i) { - out[i] = left[i] & ~right[i]; - } -} - -void FULL_NAME(bitmap_aligned_xor)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) { - for (int64_t i = 0; i < nbytes; ++i) { - out[i] = left[i] ^ right[i]; - } -} diff --git a/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s b/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s deleted file mode 100644 index a4010dab55b25..0000000000000 --- a/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s +++ /dev/null @@ -1,410 +0,0 @@ - .text - .intel_syntax noprefix - .file "bitmap_ops.c" - .globl bitmap_aligned_and_avx2 # -- Begin function bitmap_aligned_and_avx2 - .p2align 4, 0x90 - .type bitmap_aligned_and_avx2,@function -bitmap_aligned_and_avx2: # @bitmap_aligned_and_avx2 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB0_12 -# %bb.1: - cmp rcx, 127 - ja .LBB0_7 -# %bb.2: - xor r10d, r10d - jmp .LBB0_3 -.LBB0_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r11b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r10d, r10d - test r11b, bl - jne .LBB0_3 -# %bb.8: - and r8b, r9b - jne .LBB0_3 -# %bb.9: - mov r10, rcx - and r10, -128 - xor r8d, r8d - .p2align 4, 0x90 -.LBB0_10: # =>This Inner Loop Header: Depth=1 - vmovups ymm0, ymmword ptr [rsi + r8] - vmovups ymm1, ymmword ptr [rsi + r8 + 32] - vmovups ymm2, ymmword ptr [rsi + r8 + 64] - vmovups ymm3, ymmword ptr [rsi + r8 + 96] - vandps ymm0, ymm0, ymmword ptr [rdi + r8] - vandps ymm1, ymm1, ymmword ptr [rdi + r8 + 32] - vandps ymm2, ymm2, ymmword ptr [rdi + r8 + 64] - vandps ymm3, ymm3, ymmword ptr [rdi + r8 + 96] - vmovups ymmword ptr [rdx + r8], ymm0 - vmovups ymmword ptr [rdx + r8 + 32], ymm1 - vmovups ymmword ptr [rdx + r8 + 64], ymm2 - vmovups ymmword ptr [rdx + r8 + 96], ymm3 - sub r8, -128 - cmp r10, r8 - jne .LBB0_10 -# %bb.11: - cmp r10, rcx - je .LBB0_12 -.LBB0_3: - mov r8, r10 - not r8 - add r8, rcx - mov r9, rcx - and r9, 3 - je .LBB0_5 - .p2align 4, 0x90 -.LBB0_4: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r10] - and al, byte ptr [rdi + r10] - mov byte ptr [rdx + r10], al - add r10, 1 - add r9, -1 - jne .LBB0_4 -.LBB0_5: - cmp r8, 3 - jb .LBB0_12 - .p2align 4, 0x90 -.LBB0_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r10] - and al, byte ptr [rdi + r10] - mov byte ptr [rdx + r10], al - movzx eax, byte ptr [rsi + r10 + 1] - and al, byte ptr [rdi + r10 + 1] - mov byte ptr [rdx + r10 + 1], al - movzx eax, byte ptr [rsi + r10 + 2] - and al, byte ptr [rdi + r10 + 2] - mov byte ptr [rdx + r10 + 2], al - movzx eax, byte ptr [rsi + r10 + 3] - and al, byte ptr [rdi + r10 + 3] - mov byte ptr [rdx + r10 + 3], al - add r10, 4 - cmp rcx, r10 - jne .LBB0_6 -.LBB0_12: - lea rsp, [rbp - 8] - pop rbx - pop rbp - vzeroupper - ret -.Lfunc_end0: - .size bitmap_aligned_and_avx2, .Lfunc_end0-bitmap_aligned_and_avx2 - # -- End function - .globl bitmap_aligned_or_avx2 # -- Begin function bitmap_aligned_or_avx2 - .p2align 4, 0x90 - .type bitmap_aligned_or_avx2,@function -bitmap_aligned_or_avx2: # @bitmap_aligned_or_avx2 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB1_12 -# %bb.1: - cmp rcx, 127 - ja .LBB1_7 -# %bb.2: - xor r10d, r10d - jmp .LBB1_3 -.LBB1_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r11b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r10d, r10d - test r11b, bl - jne .LBB1_3 -# %bb.8: - and r8b, r9b - jne .LBB1_3 -# %bb.9: - mov r10, rcx - and r10, -128 - xor r8d, r8d - .p2align 4, 0x90 -.LBB1_10: # =>This Inner Loop Header: Depth=1 - vmovups ymm0, ymmword ptr [rsi + r8] - vmovups ymm1, ymmword ptr [rsi + r8 + 32] - vmovups ymm2, ymmword ptr [rsi + r8 + 64] - vmovups ymm3, ymmword ptr [rsi + r8 + 96] - vorps ymm0, ymm0, ymmword ptr [rdi + r8] - vorps ymm1, ymm1, ymmword ptr [rdi + r8 + 32] - vorps ymm2, ymm2, ymmword ptr [rdi + r8 + 64] - vorps ymm3, ymm3, ymmword ptr [rdi + r8 + 96] - vmovups ymmword ptr [rdx + r8], ymm0 - vmovups ymmword ptr [rdx + r8 + 32], ymm1 - vmovups ymmword ptr [rdx + r8 + 64], ymm2 - vmovups ymmword ptr [rdx + r8 + 96], ymm3 - sub r8, -128 - cmp r10, r8 - jne .LBB1_10 -# %bb.11: - cmp r10, rcx - je .LBB1_12 -.LBB1_3: - mov r8, r10 - not r8 - add r8, rcx - mov r9, rcx - and r9, 3 - je .LBB1_5 - .p2align 4, 0x90 -.LBB1_4: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r10] - or al, byte ptr [rdi + r10] - mov byte ptr [rdx + r10], al - add r10, 1 - add r9, -1 - jne .LBB1_4 -.LBB1_5: - cmp r8, 3 - jb .LBB1_12 - .p2align 4, 0x90 -.LBB1_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r10] - or al, byte ptr [rdi + r10] - mov byte ptr [rdx + r10], al - movzx eax, byte ptr [rsi + r10 + 1] - or al, byte ptr [rdi + r10 + 1] - mov byte ptr [rdx + r10 + 1], al - movzx eax, byte ptr [rsi + r10 + 2] - or al, byte ptr [rdi + r10 + 2] - mov byte ptr [rdx + r10 + 2], al - movzx eax, byte ptr [rsi + r10 + 3] - or al, byte ptr [rdi + r10 + 3] - mov byte ptr [rdx + r10 + 3], al - add r10, 4 - cmp rcx, r10 - jne .LBB1_6 -.LBB1_12: - lea rsp, [rbp - 8] - pop rbx - pop rbp - vzeroupper - ret -.Lfunc_end1: - .size bitmap_aligned_or_avx2, .Lfunc_end1-bitmap_aligned_or_avx2 - # -- End function - .globl bitmap_aligned_and_not_avx2 # -- Begin function bitmap_aligned_and_not_avx2 - .p2align 4, 0x90 - .type bitmap_aligned_and_not_avx2,@function -bitmap_aligned_and_not_avx2: # @bitmap_aligned_and_not_avx2 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB2_12 -# %bb.1: - cmp rcx, 127 - ja .LBB2_7 -# %bb.2: - xor r8d, r8d - jmp .LBB2_3 -.LBB2_7: - lea r8, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r11b - lea rax, [rsi + rcx] - cmp r8, rdi - seta bl - cmp rax, rdx - seta r10b - cmp r8, rsi - seta r9b - xor r8d, r8d - test r11b, bl - jne .LBB2_3 -# %bb.8: - and r10b, r9b - jne .LBB2_3 -# %bb.9: - mov r8, rcx - and r8, -128 - xor eax, eax - .p2align 4, 0x90 -.LBB2_10: # =>This Inner Loop Header: Depth=1 - vmovups ymm0, ymmword ptr [rsi + rax] - vmovups ymm1, ymmword ptr [rsi + rax + 32] - vmovups ymm2, ymmword ptr [rsi + rax + 64] - vmovups ymm3, ymmword ptr [rsi + rax + 96] - vandnps ymm0, ymm0, ymmword ptr [rdi + rax] - vandnps ymm1, ymm1, ymmword ptr [rdi + rax + 32] - vandnps ymm2, ymm2, ymmword ptr [rdi + rax + 64] - vandnps ymm3, ymm3, ymmword ptr [rdi + rax + 96] - vmovups ymmword ptr [rdx + rax], ymm0 - vmovups ymmword ptr [rdx + rax + 32], ymm1 - vmovups ymmword ptr [rdx + rax + 64], ymm2 - vmovups ymmword ptr [rdx + rax + 96], ymm3 - sub rax, -128 - cmp r8, rax - jne .LBB2_10 -# %bb.11: - cmp r8, rcx - je .LBB2_12 -.LBB2_3: - mov r9, r8 - not r9 - test cl, 1 - je .LBB2_5 -# %bb.4: - mov al, byte ptr [rsi + r8] - not al - and al, byte ptr [rdi + r8] - mov byte ptr [rdx + r8], al - or r8, 1 -.LBB2_5: - add r9, rcx - je .LBB2_12 - .p2align 4, 0x90 -.LBB2_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r8] - not al - and al, byte ptr [rdi + r8] - mov byte ptr [rdx + r8], al - movzx eax, byte ptr [rsi + r8 + 1] - not al - and al, byte ptr [rdi + r8 + 1] - mov byte ptr [rdx + r8 + 1], al - add r8, 2 - cmp rcx, r8 - jne .LBB2_6 -.LBB2_12: - lea rsp, [rbp - 8] - pop rbx - pop rbp - vzeroupper - ret -.Lfunc_end2: - .size bitmap_aligned_and_not_avx2, .Lfunc_end2-bitmap_aligned_and_not_avx2 - # -- End function - .globl bitmap_aligned_xor_avx2 # -- Begin function bitmap_aligned_xor_avx2 - .p2align 4, 0x90 - .type bitmap_aligned_xor_avx2,@function -bitmap_aligned_xor_avx2: # @bitmap_aligned_xor_avx2 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB3_12 -# %bb.1: - cmp rcx, 127 - ja .LBB3_7 -# %bb.2: - xor r10d, r10d - jmp .LBB3_3 -.LBB3_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r11b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r10d, r10d - test r11b, bl - jne .LBB3_3 -# %bb.8: - and r8b, r9b - jne .LBB3_3 -# %bb.9: - mov r10, rcx - and r10, -128 - xor r8d, r8d - .p2align 4, 0x90 -.LBB3_10: # =>This Inner Loop Header: Depth=1 - vmovups ymm0, ymmword ptr [rsi + r8] - vmovups ymm1, ymmword ptr [rsi + r8 + 32] - vmovups ymm2, ymmword ptr [rsi + r8 + 64] - vmovups ymm3, ymmword ptr [rsi + r8 + 96] - vxorps ymm0, ymm0, ymmword ptr [rdi + r8] - vxorps ymm1, ymm1, ymmword ptr [rdi + r8 + 32] - vxorps ymm2, ymm2, ymmword ptr [rdi + r8 + 64] - vxorps ymm3, ymm3, ymmword ptr [rdi + r8 + 96] - vmovups ymmword ptr [rdx + r8], ymm0 - vmovups ymmword ptr [rdx + r8 + 32], ymm1 - vmovups ymmword ptr [rdx + r8 + 64], ymm2 - vmovups ymmword ptr [rdx + r8 + 96], ymm3 - sub r8, -128 - cmp r10, r8 - jne .LBB3_10 -# %bb.11: - cmp r10, rcx - je .LBB3_12 -.LBB3_3: - mov r8, r10 - not r8 - add r8, rcx - mov r9, rcx - and r9, 3 - je .LBB3_5 - .p2align 4, 0x90 -.LBB3_4: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r10] - xor al, byte ptr [rdi + r10] - mov byte ptr [rdx + r10], al - add r10, 1 - add r9, -1 - jne .LBB3_4 -.LBB3_5: - cmp r8, 3 - jb .LBB3_12 - .p2align 4, 0x90 -.LBB3_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r10] - xor al, byte ptr [rdi + r10] - mov byte ptr [rdx + r10], al - movzx eax, byte ptr [rsi + r10 + 1] - xor al, byte ptr [rdi + r10 + 1] - mov byte ptr [rdx + r10 + 1], al - movzx eax, byte ptr [rsi + r10 + 2] - xor al, byte ptr [rdi + r10 + 2] - mov byte ptr [rdx + r10 + 2], al - movzx eax, byte ptr [rsi + r10 + 3] - xor al, byte ptr [rdi + r10 + 3] - mov byte ptr [rdx + r10 + 3], al - add r10, 4 - cmp rcx, r10 - jne .LBB3_6 -.LBB3_12: - lea rsp, [rbp - 8] - pop rbx - pop rbp - vzeroupper - ret -.Lfunc_end3: - .size bitmap_aligned_xor_avx2, .Lfunc_end3-bitmap_aligned_xor_avx2 - # -- End function - .ident "Ubuntu clang version 11.1.0-6" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s b/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s deleted file mode 100644 index 840c1a623bb1b..0000000000000 --- a/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s +++ /dev/null @@ -1,530 +0,0 @@ - .text - .intel_syntax noprefix - .file "bitmap_ops.c" - .globl bitmap_aligned_and_sse4 # -- Begin function bitmap_aligned_and_sse4 - .p2align 4, 0x90 - .type bitmap_aligned_and_sse4,@function -bitmap_aligned_and_sse4: # @bitmap_aligned_and_sse4 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB0_16 -# %bb.1: - cmp rcx, 31 - ja .LBB0_7 -# %bb.2: - xor r11d, r11d -.LBB0_3: - mov r8, r11 - not r8 - add r8, rcx - mov r9, rcx - and r9, 3 - je .LBB0_5 - .p2align 4, 0x90 -.LBB0_4: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - and al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - add r11, 1 - add r9, -1 - jne .LBB0_4 -.LBB0_5: - cmp r8, 3 - jb .LBB0_16 - .p2align 4, 0x90 -.LBB0_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - and al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - movzx eax, byte ptr [rsi + r11 + 1] - and al, byte ptr [rdi + r11 + 1] - mov byte ptr [rdx + r11 + 1], al - movzx eax, byte ptr [rsi + r11 + 2] - and al, byte ptr [rdi + r11 + 2] - mov byte ptr [rdx + r11 + 2], al - movzx eax, byte ptr [rsi + r11 + 3] - and al, byte ptr [rdi + r11 + 3] - mov byte ptr [rdx + r11 + 3], al - add r11, 4 - cmp rcx, r11 - jne .LBB0_6 - jmp .LBB0_16 -.LBB0_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r10b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r11d, r11d - test r10b, bl - jne .LBB0_3 -# %bb.8: - and r8b, r9b - jne .LBB0_3 -# %bb.9: - mov r11, rcx - and r11, -32 - lea rax, [r11 - 32] - mov r9, rax - shr r9, 5 - add r9, 1 - test rax, rax - je .LBB0_10 -# %bb.11: - mov r10, r9 - and r10, -2 - neg r10 - xor r8d, r8d - .p2align 4, 0x90 -.LBB0_12: # =>This Inner Loop Header: Depth=1 - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - andps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - andps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 - movups xmm0, xmmword ptr [rdi + r8 + 32] - movups xmm1, xmmword ptr [rdi + r8 + 48] - movups xmm2, xmmword ptr [rsi + r8 + 32] - andps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 48] - andps xmm0, xmm1 - movups xmmword ptr [rdx + r8 + 32], xmm2 - movups xmmword ptr [rdx + r8 + 48], xmm0 - add r8, 64 - add r10, 2 - jne .LBB0_12 -# %bb.13: - test r9b, 1 - je .LBB0_15 -.LBB0_14: - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - andps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - andps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 -.LBB0_15: - cmp r11, rcx - jne .LBB0_3 -.LBB0_16: - lea rsp, [rbp - 8] - pop rbx - pop rbp - ret -.LBB0_10: - xor r8d, r8d - test r9b, 1 - jne .LBB0_14 - jmp .LBB0_15 -.Lfunc_end0: - .size bitmap_aligned_and_sse4, .Lfunc_end0-bitmap_aligned_and_sse4 - # -- End function - .globl bitmap_aligned_or_sse4 # -- Begin function bitmap_aligned_or_sse4 - .p2align 4, 0x90 - .type bitmap_aligned_or_sse4,@function -bitmap_aligned_or_sse4: # @bitmap_aligned_or_sse4 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB1_16 -# %bb.1: - cmp rcx, 31 - ja .LBB1_7 -# %bb.2: - xor r11d, r11d -.LBB1_3: - mov r8, r11 - not r8 - add r8, rcx - mov r9, rcx - and r9, 3 - je .LBB1_5 - .p2align 4, 0x90 -.LBB1_4: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - or al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - add r11, 1 - add r9, -1 - jne .LBB1_4 -.LBB1_5: - cmp r8, 3 - jb .LBB1_16 - .p2align 4, 0x90 -.LBB1_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - or al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - movzx eax, byte ptr [rsi + r11 + 1] - or al, byte ptr [rdi + r11 + 1] - mov byte ptr [rdx + r11 + 1], al - movzx eax, byte ptr [rsi + r11 + 2] - or al, byte ptr [rdi + r11 + 2] - mov byte ptr [rdx + r11 + 2], al - movzx eax, byte ptr [rsi + r11 + 3] - or al, byte ptr [rdi + r11 + 3] - mov byte ptr [rdx + r11 + 3], al - add r11, 4 - cmp rcx, r11 - jne .LBB1_6 - jmp .LBB1_16 -.LBB1_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r10b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r11d, r11d - test r10b, bl - jne .LBB1_3 -# %bb.8: - and r8b, r9b - jne .LBB1_3 -# %bb.9: - mov r11, rcx - and r11, -32 - lea rax, [r11 - 32] - mov r9, rax - shr r9, 5 - add r9, 1 - test rax, rax - je .LBB1_10 -# %bb.11: - mov r10, r9 - and r10, -2 - neg r10 - xor r8d, r8d - .p2align 4, 0x90 -.LBB1_12: # =>This Inner Loop Header: Depth=1 - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - orps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - orps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 - movups xmm0, xmmword ptr [rdi + r8 + 32] - movups xmm1, xmmword ptr [rdi + r8 + 48] - movups xmm2, xmmword ptr [rsi + r8 + 32] - orps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 48] - orps xmm0, xmm1 - movups xmmword ptr [rdx + r8 + 32], xmm2 - movups xmmword ptr [rdx + r8 + 48], xmm0 - add r8, 64 - add r10, 2 - jne .LBB1_12 -# %bb.13: - test r9b, 1 - je .LBB1_15 -.LBB1_14: - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - orps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - orps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 -.LBB1_15: - cmp r11, rcx - jne .LBB1_3 -.LBB1_16: - lea rsp, [rbp - 8] - pop rbx - pop rbp - ret -.LBB1_10: - xor r8d, r8d - test r9b, 1 - jne .LBB1_14 - jmp .LBB1_15 -.Lfunc_end1: - .size bitmap_aligned_or_sse4, .Lfunc_end1-bitmap_aligned_or_sse4 - # -- End function - .globl bitmap_aligned_and_not_sse4 # -- Begin function bitmap_aligned_and_not_sse4 - .p2align 4, 0x90 - .type bitmap_aligned_and_not_sse4,@function -bitmap_aligned_and_not_sse4: # @bitmap_aligned_and_not_sse4 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB2_16 -# %bb.1: - cmp rcx, 31 - ja .LBB2_7 -# %bb.2: - xor r11d, r11d -.LBB2_3: - mov r8, r11 - not r8 - test cl, 1 - je .LBB2_5 -# %bb.4: - mov al, byte ptr [rsi + r11] - not al - and al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - or r11, 1 -.LBB2_5: - add r8, rcx - je .LBB2_16 - .p2align 4, 0x90 -.LBB2_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - not al - and al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - movzx eax, byte ptr [rsi + r11 + 1] - not al - and al, byte ptr [rdi + r11 + 1] - mov byte ptr [rdx + r11 + 1], al - add r11, 2 - cmp rcx, r11 - jne .LBB2_6 - jmp .LBB2_16 -.LBB2_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r10b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r11d, r11d - test r10b, bl - jne .LBB2_3 -# %bb.8: - and r8b, r9b - jne .LBB2_3 -# %bb.9: - mov r11, rcx - and r11, -32 - lea rax, [r11 - 32] - mov r9, rax - shr r9, 5 - add r9, 1 - test rax, rax - je .LBB2_10 -# %bb.11: - mov r10, r9 - and r10, -2 - neg r10 - xor r8d, r8d - .p2align 4, 0x90 -.LBB2_12: # =>This Inner Loop Header: Depth=1 - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - andnps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - andnps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 - movups xmm0, xmmword ptr [rdi + r8 + 32] - movups xmm1, xmmword ptr [rdi + r8 + 48] - movups xmm2, xmmword ptr [rsi + r8 + 32] - andnps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 48] - andnps xmm0, xmm1 - movups xmmword ptr [rdx + r8 + 32], xmm2 - movups xmmword ptr [rdx + r8 + 48], xmm0 - add r8, 64 - add r10, 2 - jne .LBB2_12 -# %bb.13: - test r9b, 1 - je .LBB2_15 -.LBB2_14: - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - andnps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - andnps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 -.LBB2_15: - cmp r11, rcx - jne .LBB2_3 -.LBB2_16: - lea rsp, [rbp - 8] - pop rbx - pop rbp - ret -.LBB2_10: - xor r8d, r8d - test r9b, 1 - jne .LBB2_14 - jmp .LBB2_15 -.Lfunc_end2: - .size bitmap_aligned_and_not_sse4, .Lfunc_end2-bitmap_aligned_and_not_sse4 - # -- End function - .globl bitmap_aligned_xor_sse4 # -- Begin function bitmap_aligned_xor_sse4 - .p2align 4, 0x90 - .type bitmap_aligned_xor_sse4,@function -bitmap_aligned_xor_sse4: # @bitmap_aligned_xor_sse4 -# %bb.0: - push rbp - mov rbp, rsp - push rbx - and rsp, -8 - test rcx, rcx - jle .LBB3_16 -# %bb.1: - cmp rcx, 31 - ja .LBB3_7 -# %bb.2: - xor r11d, r11d -.LBB3_3: - mov r8, r11 - not r8 - add r8, rcx - mov r9, rcx - and r9, 3 - je .LBB3_5 - .p2align 4, 0x90 -.LBB3_4: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - xor al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - add r11, 1 - add r9, -1 - jne .LBB3_4 -.LBB3_5: - cmp r8, 3 - jb .LBB3_16 - .p2align 4, 0x90 -.LBB3_6: # =>This Inner Loop Header: Depth=1 - movzx eax, byte ptr [rsi + r11] - xor al, byte ptr [rdi + r11] - mov byte ptr [rdx + r11], al - movzx eax, byte ptr [rsi + r11 + 1] - xor al, byte ptr [rdi + r11 + 1] - mov byte ptr [rdx + r11 + 1], al - movzx eax, byte ptr [rsi + r11 + 2] - xor al, byte ptr [rdi + r11 + 2] - mov byte ptr [rdx + r11 + 2], al - movzx eax, byte ptr [rsi + r11 + 3] - xor al, byte ptr [rdi + r11 + 3] - mov byte ptr [rdx + r11 + 3], al - add r11, 4 - cmp rcx, r11 - jne .LBB3_6 - jmp .LBB3_16 -.LBB3_7: - lea r9, [rdx + rcx] - lea rax, [rdi + rcx] - cmp rax, rdx - seta r10b - lea rax, [rsi + rcx] - cmp r9, rdi - seta bl - cmp rax, rdx - seta r8b - cmp r9, rsi - seta r9b - xor r11d, r11d - test r10b, bl - jne .LBB3_3 -# %bb.8: - and r8b, r9b - jne .LBB3_3 -# %bb.9: - mov r11, rcx - and r11, -32 - lea rax, [r11 - 32] - mov r9, rax - shr r9, 5 - add r9, 1 - test rax, rax - je .LBB3_10 -# %bb.11: - mov r10, r9 - and r10, -2 - neg r10 - xor r8d, r8d - .p2align 4, 0x90 -.LBB3_12: # =>This Inner Loop Header: Depth=1 - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - xorps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - xorps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 - movups xmm0, xmmword ptr [rdi + r8 + 32] - movups xmm1, xmmword ptr [rdi + r8 + 48] - movups xmm2, xmmword ptr [rsi + r8 + 32] - xorps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 48] - xorps xmm0, xmm1 - movups xmmword ptr [rdx + r8 + 32], xmm2 - movups xmmword ptr [rdx + r8 + 48], xmm0 - add r8, 64 - add r10, 2 - jne .LBB3_12 -# %bb.13: - test r9b, 1 - je .LBB3_15 -.LBB3_14: - movups xmm0, xmmword ptr [rdi + r8] - movups xmm1, xmmword ptr [rdi + r8 + 16] - movups xmm2, xmmword ptr [rsi + r8] - xorps xmm2, xmm0 - movups xmm0, xmmword ptr [rsi + r8 + 16] - xorps xmm0, xmm1 - movups xmmword ptr [rdx + r8], xmm2 - movups xmmword ptr [rdx + r8 + 16], xmm0 -.LBB3_15: - cmp r11, rcx - jne .LBB3_3 -.LBB3_16: - lea rsp, [rbp - 8] - pop rbx - pop rbp - ret -.LBB3_10: - xor r8d, r8d - test r9b, 1 - jne .LBB3_14 - jmp .LBB3_15 -.Lfunc_end3: - .size bitmap_aligned_xor_sse4, .Lfunc_end3-bitmap_aligned_xor_sse4 - # -- End function - .ident "Ubuntu clang version 11.1.0-6" - .section ".note.GNU-stack","",@progbits - .addrsig diff --git a/go/arrow/bitutil/bitmap_ops.go b/go/arrow/bitutil/bitmap_ops.go deleted file mode 100644 index 7db750a6dd937..0000000000000 --- a/go/arrow/bitutil/bitmap_ops.go +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutil - -func alignedBitAndGo(left, right, out []byte) { - var ( - nbytes = len(out) - i = 0 - ) - if nbytes > uint64SizeBytes { - // case where we have enough bytes to operate on words - leftWords := bytesToUint64(left[i:]) - rightWords := bytesToUint64(right[i:]) - outWords := bytesToUint64(out[i:]) - - for w := range outWords { - outWords[w] = leftWords[w] & rightWords[w] - } - - i += len(outWords) * uint64SizeBytes - } - // grab any remaining bytes that were fewer than a word - for ; i < nbytes; i++ { - out[i] = left[i] & right[i] - } -} - -func alignedBitAndNotGo(left, right, out []byte) { - var ( - nbytes = len(out) - i = 0 - ) - if nbytes > uint64SizeBytes { - // case where we have enough bytes to operate on words - leftWords := bytesToUint64(left[i:]) - rightWords := bytesToUint64(right[i:]) - outWords := bytesToUint64(out[i:]) - - for w := range outWords { - outWords[w] = leftWords[w] &^ rightWords[w] - } - - i += len(outWords) * uint64SizeBytes - } - // grab any remaining bytes that were fewer than a word - for ; i < nbytes; i++ { - out[i] = left[i] &^ right[i] - } -} - -func alignedBitOrGo(left, right, out []byte) { - var ( - nbytes = len(out) - i = 0 - ) - if nbytes > uint64SizeBytes { - // case where we have enough bytes to operate on words - leftWords := bytesToUint64(left[i:]) - rightWords := bytesToUint64(right[i:]) - outWords := bytesToUint64(out[i:]) - - for w := range outWords { - outWords[w] = leftWords[w] | rightWords[w] - } - - i += len(outWords) * uint64SizeBytes - } - // grab any remaining bytes that were fewer than a word - for ; i < nbytes; i++ { - out[i] = left[i] | right[i] - } -} - -func alignedBitXorGo(left, right, out []byte) { - var ( - nbytes = len(out) - i = 0 - ) - if nbytes > uint64SizeBytes { - // case where we have enough bytes to operate on words - leftWords := bytesToUint64(left[i:]) - rightWords := bytesToUint64(right[i:]) - outWords := bytesToUint64(out[i:]) - - for w := range outWords { - outWords[w] = leftWords[w] ^ rightWords[w] - } - - i += len(outWords) * uint64SizeBytes - } - // grab any remaining bytes that were fewer than a word - for ; i < nbytes; i++ { - out[i] = left[i] ^ right[i] - } -} diff --git a/go/arrow/bitutil/bitmap_ops_amd64.go b/go/arrow/bitutil/bitmap_ops_amd64.go deleted file mode 100644 index ad0fd674ab9b7..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_amd64.go +++ /dev/null @@ -1,41 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package bitutil - -import "golang.org/x/sys/cpu" - -func init() { - if cpu.X86.HasAVX2 { - bitAndOp.opAligned = bitmapAlignedAndAVX2 - bitOrOp.opAligned = bitmapAlignedOrAVX2 - bitAndNotOp.opAligned = bitmapAlignedAndNotAVX2 - bitXorOp.opAligned = bitmapAlignedXorAVX2 - } else if cpu.X86.HasSSE42 { - bitAndOp.opAligned = bitmapAlignedAndSSE4 - bitOrOp.opAligned = bitmapAlignedOrSSE4 - bitAndNotOp.opAligned = bitmapAlignedAndNotSSE4 - bitXorOp.opAligned = bitmapAlignedXorSSE4 - } else { - bitAndOp.opAligned = alignedBitAndGo - bitOrOp.opAligned = alignedBitOrGo - bitAndNotOp.opAligned = alignedBitAndNotGo - bitXorOp.opAligned = alignedBitXorGo - } -} diff --git a/go/arrow/bitutil/bitmap_ops_arm64.go b/go/arrow/bitutil/bitmap_ops_arm64.go deleted file mode 100644 index 28d95d84ade2d..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_arm64.go +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package bitutil - -func init() { - bitAndOp.opAligned = alignedBitAndGo - bitOrOp.opAligned = alignedBitOrGo - bitAndNotOp.opAligned = alignedBitAndNotGo - bitXorOp.opAligned = alignedBitXorGo -} diff --git a/go/arrow/bitutil/bitmap_ops_avx2_amd64.go b/go/arrow/bitutil/bitmap_ops_avx2_amd64.go deleted file mode 100644 index 1c01bd0f38015..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_avx2_amd64.go +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package bitutil - -import ( - "unsafe" -) - -//go:noescape -func _bitmap_aligned_and_avx2(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedAndAVX2(left, right, out []byte) { - _bitmap_aligned_and_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} - -//go:noescape -func _bitmap_aligned_or_avx2(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedOrAVX2(left, right, out []byte) { - _bitmap_aligned_or_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} - -//go:noescape -func _bitmap_aligned_and_not_avx2(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedAndNotAVX2(left, right, out []byte) { - _bitmap_aligned_and_not_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} - -//go:noescape -func _bitmap_aligned_xor_avx2(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedXorAVX2(left, right, out []byte) { - _bitmap_aligned_xor_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} diff --git a/go/arrow/bitutil/bitmap_ops_avx2_amd64.s b/go/arrow/bitutil/bitmap_ops_avx2_amd64.s deleted file mode 100644 index 00172e865926d..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_avx2_amd64.s +++ /dev/null @@ -1,373 +0,0 @@ -//+build !noasm !appengine -// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT - -TEXT ·_bitmap_aligned_and_avx2(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB0_12 - LONG $0x7ff98348 // cmp rcx, 127 - JA LBB0_7 - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - JMP LBB0_3 - -LBB0_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd3970f41 // seta r11b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - WORD $0x8441; BYTE $0xdb // test r11b, bl - JNE LBB0_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB0_3 - WORD $0x8949; BYTE $0xca // mov r10, rcx - LONG $0x80e28349 // and r10, -128 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB0_10: - LONG $0x107ca1c4; WORD $0x0604 // vmovups ymm0, yword [rsi + r8] - LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups ymm1, yword [rsi + r8 + 32] - LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups ymm2, yword [rsi + r8 + 64] - LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups ymm3, yword [rsi + r8 + 96] - LONG $0x547ca1c4; WORD $0x0704 // vandps ymm0, ymm0, yword [rdi + r8] - LONG $0x5474a1c4; WORD $0x074c; BYTE $0x20 // vandps ymm1, ymm1, yword [rdi + r8 + 32] - LONG $0x546ca1c4; WORD $0x0754; BYTE $0x40 // vandps ymm2, ymm2, yword [rdi + r8 + 64] - LONG $0x5464a1c4; WORD $0x075c; BYTE $0x60 // vandps ymm3, ymm3, yword [rdi + r8 + 96] - LONG $0x117ca1c4; WORD $0x0204 // vmovups yword [rdx + r8], ymm0 - LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups yword [rdx + r8 + 32], ymm1 - LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups yword [rdx + r8 + 64], ymm2 - LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups yword [rdx + r8 + 96], ymm3 - LONG $0x80e88349 // sub r8, -128 - WORD $0x394d; BYTE $0xc2 // cmp r10, r8 - JNE LBB0_10 - WORD $0x3949; BYTE $0xca // cmp r10, rcx - JE LBB0_12 - -LBB0_3: - WORD $0x894d; BYTE $0xd0 // mov r8, r10 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0x0149; BYTE $0xc8 // add r8, rcx - WORD $0x8949; BYTE $0xc9 // mov r9, rcx - LONG $0x03e18349 // and r9, 3 - JE LBB0_5 - -LBB0_4: - LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] - LONG $0x17042242 // and al, byte [rdi + r10] - LONG $0x12048842 // mov byte [rdx + r10], al - LONG $0x01c28349 // add r10, 1 - LONG $0xffc18349 // add r9, -1 - JNE LBB0_4 - -LBB0_5: - LONG $0x03f88349 // cmp r8, 3 - JB LBB0_12 - -LBB0_6: - LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] - LONG $0x17042242 // and al, byte [rdi + r10] - LONG $0x12048842 // mov byte [rdx + r10], al - LONG $0x44b60f42; WORD $0x0116 // movzx eax, byte [rsi + r10 + 1] - LONG $0x17442242; BYTE $0x01 // and al, byte [rdi + r10 + 1] - LONG $0x12448842; BYTE $0x01 // mov byte [rdx + r10 + 1], al - LONG $0x44b60f42; WORD $0x0216 // movzx eax, byte [rsi + r10 + 2] - LONG $0x17442242; BYTE $0x02 // and al, byte [rdi + r10 + 2] - LONG $0x12448842; BYTE $0x02 // mov byte [rdx + r10 + 2], al - LONG $0x44b60f42; WORD $0x0316 // movzx eax, byte [rsi + r10 + 3] - LONG $0x17442242; BYTE $0x03 // and al, byte [rdi + r10 + 3] - LONG $0x12448842; BYTE $0x03 // mov byte [rdx + r10 + 3], al - LONG $0x04c28349 // add r10, 4 - WORD $0x394c; BYTE $0xd1 // cmp rcx, r10 - JNE LBB0_6 - -LBB0_12: - VZEROUPPER - RET - -TEXT ·_bitmap_aligned_or_avx2(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB1_12 - LONG $0x7ff98348 // cmp rcx, 127 - JA LBB1_7 - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - JMP LBB1_3 - -LBB1_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd3970f41 // seta r11b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - WORD $0x8441; BYTE $0xdb // test r11b, bl - JNE LBB1_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB1_3 - WORD $0x8949; BYTE $0xca // mov r10, rcx - LONG $0x80e28349 // and r10, -128 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB1_10: - LONG $0x107ca1c4; WORD $0x0604 // vmovups ymm0, yword [rsi + r8] - LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups ymm1, yword [rsi + r8 + 32] - LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups ymm2, yword [rsi + r8 + 64] - LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups ymm3, yword [rsi + r8 + 96] - LONG $0x567ca1c4; WORD $0x0704 // vorps ymm0, ymm0, yword [rdi + r8] - LONG $0x5674a1c4; WORD $0x074c; BYTE $0x20 // vorps ymm1, ymm1, yword [rdi + r8 + 32] - LONG $0x566ca1c4; WORD $0x0754; BYTE $0x40 // vorps ymm2, ymm2, yword [rdi + r8 + 64] - LONG $0x5664a1c4; WORD $0x075c; BYTE $0x60 // vorps ymm3, ymm3, yword [rdi + r8 + 96] - LONG $0x117ca1c4; WORD $0x0204 // vmovups yword [rdx + r8], ymm0 - LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups yword [rdx + r8 + 32], ymm1 - LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups yword [rdx + r8 + 64], ymm2 - LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups yword [rdx + r8 + 96], ymm3 - LONG $0x80e88349 // sub r8, -128 - WORD $0x394d; BYTE $0xc2 // cmp r10, r8 - JNE LBB1_10 - WORD $0x3949; BYTE $0xca // cmp r10, rcx - JE LBB1_12 - -LBB1_3: - WORD $0x894d; BYTE $0xd0 // mov r8, r10 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0x0149; BYTE $0xc8 // add r8, rcx - WORD $0x8949; BYTE $0xc9 // mov r9, rcx - LONG $0x03e18349 // and r9, 3 - JE LBB1_5 - -LBB1_4: - LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] - LONG $0x17040a42 // or al, byte [rdi + r10] - LONG $0x12048842 // mov byte [rdx + r10], al - LONG $0x01c28349 // add r10, 1 - LONG $0xffc18349 // add r9, -1 - JNE LBB1_4 - -LBB1_5: - LONG $0x03f88349 // cmp r8, 3 - JB LBB1_12 - -LBB1_6: - LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] - LONG $0x17040a42 // or al, byte [rdi + r10] - LONG $0x12048842 // mov byte [rdx + r10], al - LONG $0x44b60f42; WORD $0x0116 // movzx eax, byte [rsi + r10 + 1] - LONG $0x17440a42; BYTE $0x01 // or al, byte [rdi + r10 + 1] - LONG $0x12448842; BYTE $0x01 // mov byte [rdx + r10 + 1], al - LONG $0x44b60f42; WORD $0x0216 // movzx eax, byte [rsi + r10 + 2] - LONG $0x17440a42; BYTE $0x02 // or al, byte [rdi + r10 + 2] - LONG $0x12448842; BYTE $0x02 // mov byte [rdx + r10 + 2], al - LONG $0x44b60f42; WORD $0x0316 // movzx eax, byte [rsi + r10 + 3] - LONG $0x17440a42; BYTE $0x03 // or al, byte [rdi + r10 + 3] - LONG $0x12448842; BYTE $0x03 // mov byte [rdx + r10 + 3], al - LONG $0x04c28349 // add r10, 4 - WORD $0x394c; BYTE $0xd1 // cmp rcx, r10 - JNE LBB1_6 - -LBB1_12: - VZEROUPPER - RET - -TEXT ·_bitmap_aligned_and_not_avx2(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB2_12 - LONG $0x7ff98348 // cmp rcx, 127 - JA LBB2_7 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - JMP LBB2_3 - -LBB2_7: - LONG $0x0a048d4c // lea r8, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd3970f41 // seta r11b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf8 // cmp r8, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd2970f41 // seta r10b - WORD $0x3949; BYTE $0xf0 // cmp r8, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - WORD $0x8441; BYTE $0xdb // test r11b, bl - JNE LBB2_3 - WORD $0x2045; BYTE $0xca // and r10b, r9b - JNE LBB2_3 - WORD $0x8949; BYTE $0xc8 // mov r8, rcx - LONG $0x80e08349 // and r8, -128 - WORD $0xc031 // xor eax, eax - -LBB2_10: - LONG $0x0410fcc5; BYTE $0x06 // vmovups ymm0, yword [rsi + rax] - LONG $0x4c10fcc5; WORD $0x2006 // vmovups ymm1, yword [rsi + rax + 32] - LONG $0x5410fcc5; WORD $0x4006 // vmovups ymm2, yword [rsi + rax + 64] - LONG $0x5c10fcc5; WORD $0x6006 // vmovups ymm3, yword [rsi + rax + 96] - LONG $0x0455fcc5; BYTE $0x07 // vandnps ymm0, ymm0, yword [rdi + rax] - LONG $0x4c55f4c5; WORD $0x2007 // vandnps ymm1, ymm1, yword [rdi + rax + 32] - LONG $0x5455ecc5; WORD $0x4007 // vandnps ymm2, ymm2, yword [rdi + rax + 64] - LONG $0x5c55e4c5; WORD $0x6007 // vandnps ymm3, ymm3, yword [rdi + rax + 96] - LONG $0x0411fcc5; BYTE $0x02 // vmovups yword [rdx + rax], ymm0 - LONG $0x4c11fcc5; WORD $0x2002 // vmovups yword [rdx + rax + 32], ymm1 - LONG $0x5411fcc5; WORD $0x4002 // vmovups yword [rdx + rax + 64], ymm2 - LONG $0x5c11fcc5; WORD $0x6002 // vmovups yword [rdx + rax + 96], ymm3 - LONG $0x80e88348 // sub rax, -128 - WORD $0x3949; BYTE $0xc0 // cmp r8, rax - JNE LBB2_10 - WORD $0x3949; BYTE $0xc8 // cmp r8, rcx - JE LBB2_12 - -LBB2_3: - WORD $0x894d; BYTE $0xc1 // mov r9, r8 - WORD $0xf749; BYTE $0xd1 // not r9 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB2_5 - LONG $0x06048a42 // mov al, byte [rsi + r8] - WORD $0xd0f6 // not al - LONG $0x07042242 // and al, byte [rdi + r8] - LONG $0x02048842 // mov byte [rdx + r8], al - LONG $0x01c88349 // or r8, 1 - -LBB2_5: - WORD $0x0149; BYTE $0xc9 // add r9, rcx - JE LBB2_12 - -LBB2_6: - LONG $0x04b60f42; BYTE $0x06 // movzx eax, byte [rsi + r8] - WORD $0xd0f6 // not al - LONG $0x07042242 // and al, byte [rdi + r8] - LONG $0x02048842 // mov byte [rdx + r8], al - LONG $0x44b60f42; WORD $0x0106 // movzx eax, byte [rsi + r8 + 1] - WORD $0xd0f6 // not al - LONG $0x07442242; BYTE $0x01 // and al, byte [rdi + r8 + 1] - LONG $0x02448842; BYTE $0x01 // mov byte [rdx + r8 + 1], al - LONG $0x02c08349 // add r8, 2 - WORD $0x394c; BYTE $0xc1 // cmp rcx, r8 - JNE LBB2_6 - -LBB2_12: - VZEROUPPER - RET - -TEXT ·_bitmap_aligned_xor_avx2(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB3_12 - LONG $0x7ff98348 // cmp rcx, 127 - JA LBB3_7 - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - JMP LBB3_3 - -LBB3_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd3970f41 // seta r11b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - WORD $0x8441; BYTE $0xdb // test r11b, bl - JNE LBB3_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB3_3 - WORD $0x8949; BYTE $0xca // mov r10, rcx - LONG $0x80e28349 // and r10, -128 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB3_10: - LONG $0x107ca1c4; WORD $0x0604 // vmovups ymm0, yword [rsi + r8] - LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups ymm1, yword [rsi + r8 + 32] - LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups ymm2, yword [rsi + r8 + 64] - LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups ymm3, yword [rsi + r8 + 96] - LONG $0x577ca1c4; WORD $0x0704 // vxorps ymm0, ymm0, yword [rdi + r8] - LONG $0x5774a1c4; WORD $0x074c; BYTE $0x20 // vxorps ymm1, ymm1, yword [rdi + r8 + 32] - LONG $0x576ca1c4; WORD $0x0754; BYTE $0x40 // vxorps ymm2, ymm2, yword [rdi + r8 + 64] - LONG $0x5764a1c4; WORD $0x075c; BYTE $0x60 // vxorps ymm3, ymm3, yword [rdi + r8 + 96] - LONG $0x117ca1c4; WORD $0x0204 // vmovups yword [rdx + r8], ymm0 - LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups yword [rdx + r8 + 32], ymm1 - LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups yword [rdx + r8 + 64], ymm2 - LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups yword [rdx + r8 + 96], ymm3 - LONG $0x80e88349 // sub r8, -128 - WORD $0x394d; BYTE $0xc2 // cmp r10, r8 - JNE LBB3_10 - WORD $0x3949; BYTE $0xca // cmp r10, rcx - JE LBB3_12 - -LBB3_3: - WORD $0x894d; BYTE $0xd0 // mov r8, r10 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0x0149; BYTE $0xc8 // add r8, rcx - WORD $0x8949; BYTE $0xc9 // mov r9, rcx - LONG $0x03e18349 // and r9, 3 - JE LBB3_5 - -LBB3_4: - LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] - LONG $0x17043242 // xor al, byte [rdi + r10] - LONG $0x12048842 // mov byte [rdx + r10], al - LONG $0x01c28349 // add r10, 1 - LONG $0xffc18349 // add r9, -1 - JNE LBB3_4 - -LBB3_5: - LONG $0x03f88349 // cmp r8, 3 - JB LBB3_12 - -LBB3_6: - LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10] - LONG $0x17043242 // xor al, byte [rdi + r10] - LONG $0x12048842 // mov byte [rdx + r10], al - LONG $0x44b60f42; WORD $0x0116 // movzx eax, byte [rsi + r10 + 1] - LONG $0x17443242; BYTE $0x01 // xor al, byte [rdi + r10 + 1] - LONG $0x12448842; BYTE $0x01 // mov byte [rdx + r10 + 1], al - LONG $0x44b60f42; WORD $0x0216 // movzx eax, byte [rsi + r10 + 2] - LONG $0x17443242; BYTE $0x02 // xor al, byte [rdi + r10 + 2] - LONG $0x12448842; BYTE $0x02 // mov byte [rdx + r10 + 2], al - LONG $0x44b60f42; WORD $0x0316 // movzx eax, byte [rsi + r10 + 3] - LONG $0x17443242; BYTE $0x03 // xor al, byte [rdi + r10 + 3] - LONG $0x12448842; BYTE $0x03 // mov byte [rdx + r10 + 3], al - LONG $0x04c28349 // add r10, 4 - WORD $0x394c; BYTE $0xd1 // cmp rcx, r10 - JNE LBB3_6 - -LBB3_12: - VZEROUPPER - RET diff --git a/go/arrow/bitutil/bitmap_ops_noasm.go b/go/arrow/bitutil/bitmap_ops_noasm.go deleted file mode 100644 index e25347791fe45..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_noasm.go +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build noasm -// +build noasm - -package bitutil - -func init() { - bitAndOp.opAligned = alignedBitAndGo - bitOrOp.opAligned = alignedBitOrGo - bitAndNotOp.opAligned = alignedBitAndNotGo - bitXorOp.opAligned = alignedBitXorGo -} diff --git a/go/arrow/bitutil/bitmap_ops_ppc64le.go b/go/arrow/bitutil/bitmap_ops_ppc64le.go deleted file mode 100644 index 28d95d84ade2d..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_ppc64le.go +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package bitutil - -func init() { - bitAndOp.opAligned = alignedBitAndGo - bitOrOp.opAligned = alignedBitOrGo - bitAndNotOp.opAligned = alignedBitAndNotGo - bitXorOp.opAligned = alignedBitXorGo -} diff --git a/go/arrow/bitutil/bitmap_ops_s390x.go b/go/arrow/bitutil/bitmap_ops_s390x.go deleted file mode 100644 index 28d95d84ade2d..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_s390x.go +++ /dev/null @@ -1,27 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package bitutil - -func init() { - bitAndOp.opAligned = alignedBitAndGo - bitOrOp.opAligned = alignedBitOrGo - bitAndNotOp.opAligned = alignedBitAndNotGo - bitXorOp.opAligned = alignedBitXorGo -} diff --git a/go/arrow/bitutil/bitmap_ops_sse4_amd64.go b/go/arrow/bitutil/bitmap_ops_sse4_amd64.go deleted file mode 100644 index f16bce12bbfa2..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_sse4_amd64.go +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package bitutil - -import ( - "unsafe" -) - -//go:noescape -func _bitmap_aligned_and_sse4(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedAndSSE4(left, right, out []byte) { - _bitmap_aligned_and_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} - -//go:noescape -func _bitmap_aligned_or_sse4(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedOrSSE4(left, right, out []byte) { - _bitmap_aligned_or_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} - -//go:noescape -func _bitmap_aligned_and_not_sse4(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedAndNotSSE4(left, right, out []byte) { - _bitmap_aligned_and_not_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} - -//go:noescape -func _bitmap_aligned_xor_sse4(left, right, out unsafe.Pointer, length int64) - -func bitmapAlignedXorSSE4(left, right, out []byte) { - _bitmap_aligned_xor_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out))) -} diff --git a/go/arrow/bitutil/bitmap_ops_sse4_amd64.s b/go/arrow/bitutil/bitmap_ops_sse4_amd64.s deleted file mode 100644 index c15e186253a36..0000000000000 --- a/go/arrow/bitutil/bitmap_ops_sse4_amd64.s +++ /dev/null @@ -1,501 +0,0 @@ -//+build !noasm !appengine -// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT - -TEXT ·_bitmap_aligned_and_sse4(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB0_16 - LONG $0x1ff98348 // cmp rcx, 31 - JA LBB0_7 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - -LBB0_3: - WORD $0x894d; BYTE $0xd8 // mov r8, r11 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0x0149; BYTE $0xc8 // add r8, rcx - WORD $0x8949; BYTE $0xc9 // mov r9, rcx - LONG $0x03e18349 // and r9, 3 - JE LBB0_5 - -LBB0_4: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - LONG $0x1f042242 // and al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x01c38349 // add r11, 1 - LONG $0xffc18349 // add r9, -1 - JNE LBB0_4 - -LBB0_5: - LONG $0x03f88349 // cmp r8, 3 - JB LBB0_16 - -LBB0_6: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - LONG $0x1f042242 // and al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] - LONG $0x1f442242; BYTE $0x01 // and al, byte [rdi + r11 + 1] - LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al - LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] - LONG $0x1f442242; BYTE $0x02 // and al, byte [rdi + r11 + 2] - LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al - LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] - LONG $0x1f442242; BYTE $0x03 // and al, byte [rdi + r11 + 3] - LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al - LONG $0x04c38349 // add r11, 4 - WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 - JNE LBB0_6 - JMP LBB0_16 - -LBB0_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd2970f41 // seta r10b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - WORD $0x8441; BYTE $0xda // test r10b, bl - JNE LBB0_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB0_3 - WORD $0x8949; BYTE $0xcb // mov r11, rcx - LONG $0xe0e38349 // and r11, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc1 // mov r9, rax - LONG $0x05e9c149 // shr r9, 5 - LONG $0x01c18349 // add r9, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB0_10 - WORD $0x894d; BYTE $0xca // mov r10, r9 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB0_12: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] - LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] - LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] - WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 - LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] - WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 - LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 - LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 - LONG $0x40c08349 // add r8, 64 - LONG $0x02c28349 // add r10, 2 - JNE LBB0_12 - LONG $0x01c1f641 // test r9b, 1 - JE LBB0_15 - -LBB0_14: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - -LBB0_15: - WORD $0x3949; BYTE $0xcb // cmp r11, rcx - JNE LBB0_3 - -LBB0_16: - RET - -LBB0_10: - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - LONG $0x01c1f641 // test r9b, 1 - JNE LBB0_14 - JMP LBB0_15 - -TEXT ·_bitmap_aligned_or_sse4(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB1_16 - LONG $0x1ff98348 // cmp rcx, 31 - JA LBB1_7 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - -LBB1_3: - WORD $0x894d; BYTE $0xd8 // mov r8, r11 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0x0149; BYTE $0xc8 // add r8, rcx - WORD $0x8949; BYTE $0xc9 // mov r9, rcx - LONG $0x03e18349 // and r9, 3 - JE LBB1_5 - -LBB1_4: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - LONG $0x1f040a42 // or al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x01c38349 // add r11, 1 - LONG $0xffc18349 // add r9, -1 - JNE LBB1_4 - -LBB1_5: - LONG $0x03f88349 // cmp r8, 3 - JB LBB1_16 - -LBB1_6: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - LONG $0x1f040a42 // or al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] - LONG $0x1f440a42; BYTE $0x01 // or al, byte [rdi + r11 + 1] - LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al - LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] - LONG $0x1f440a42; BYTE $0x02 // or al, byte [rdi + r11 + 2] - LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al - LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] - LONG $0x1f440a42; BYTE $0x03 // or al, byte [rdi + r11 + 3] - LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al - LONG $0x04c38349 // add r11, 4 - WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 - JNE LBB1_6 - JMP LBB1_16 - -LBB1_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd2970f41 // seta r10b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - WORD $0x8441; BYTE $0xda // test r10b, bl - JNE LBB1_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB1_3 - WORD $0x8949; BYTE $0xcb // mov r11, rcx - LONG $0xe0e38349 // and r11, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc1 // mov r9, rax - LONG $0x05e9c149 // shr r9, 5 - LONG $0x01c18349 // add r9, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB1_10 - WORD $0x894d; BYTE $0xca // mov r10, r9 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB1_12: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] - LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] - LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] - WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 - LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] - WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 - LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 - LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 - LONG $0x40c08349 // add r8, 64 - LONG $0x02c28349 // add r10, 2 - JNE LBB1_12 - LONG $0x01c1f641 // test r9b, 1 - JE LBB1_15 - -LBB1_14: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - -LBB1_15: - WORD $0x3949; BYTE $0xcb // cmp r11, rcx - JNE LBB1_3 - -LBB1_16: - RET - -LBB1_10: - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - LONG $0x01c1f641 // test r9b, 1 - JNE LBB1_14 - JMP LBB1_15 - -TEXT ·_bitmap_aligned_and_not_sse4(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB2_16 - LONG $0x1ff98348 // cmp rcx, 31 - JA LBB2_7 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - -LBB2_3: - WORD $0x894d; BYTE $0xd8 // mov r8, r11 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0xc1f6; BYTE $0x01 // test cl, 1 - JE LBB2_5 - LONG $0x1e048a42 // mov al, byte [rsi + r11] - WORD $0xd0f6 // not al - LONG $0x1f042242 // and al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x01cb8349 // or r11, 1 - -LBB2_5: - WORD $0x0149; BYTE $0xc8 // add r8, rcx - JE LBB2_16 - -LBB2_6: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - WORD $0xd0f6 // not al - LONG $0x1f042242 // and al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] - WORD $0xd0f6 // not al - LONG $0x1f442242; BYTE $0x01 // and al, byte [rdi + r11 + 1] - LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al - LONG $0x02c38349 // add r11, 2 - WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 - JNE LBB2_6 - JMP LBB2_16 - -LBB2_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd2970f41 // seta r10b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - WORD $0x8441; BYTE $0xda // test r10b, bl - JNE LBB2_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB2_3 - WORD $0x8949; BYTE $0xcb // mov r11, rcx - LONG $0xe0e38349 // and r11, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc1 // mov r9, rax - LONG $0x05e9c149 // shr r9, 5 - LONG $0x01c18349 // add r9, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB2_10 - WORD $0x894d; BYTE $0xca // mov r10, r9 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB2_12: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x550f; BYTE $0xd0 // andnps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x550f; BYTE $0xc1 // andnps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] - LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] - LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] - WORD $0x550f; BYTE $0xd0 // andnps xmm2, xmm0 - LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] - WORD $0x550f; BYTE $0xc1 // andnps xmm0, xmm1 - LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 - LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 - LONG $0x40c08349 // add r8, 64 - LONG $0x02c28349 // add r10, 2 - JNE LBB2_12 - LONG $0x01c1f641 // test r9b, 1 - JE LBB2_15 - -LBB2_14: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x550f; BYTE $0xd0 // andnps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x550f; BYTE $0xc1 // andnps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - -LBB2_15: - WORD $0x3949; BYTE $0xcb // cmp r11, rcx - JNE LBB2_3 - -LBB2_16: - RET - -LBB2_10: - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - LONG $0x01c1f641 // test r9b, 1 - JNE LBB2_14 - JMP LBB2_15 - -TEXT ·_bitmap_aligned_xor_sse4(SB), $0-32 - - MOVQ left+0(FP), DI - MOVQ right+8(FP), SI - MOVQ out+16(FP), DX - MOVQ length+24(FP), CX - - WORD $0x8548; BYTE $0xc9 // test rcx, rcx - JLE LBB3_16 - LONG $0x1ff98348 // cmp rcx, 31 - JA LBB3_7 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - -LBB3_3: - WORD $0x894d; BYTE $0xd8 // mov r8, r11 - WORD $0xf749; BYTE $0xd0 // not r8 - WORD $0x0149; BYTE $0xc8 // add r8, rcx - WORD $0x8949; BYTE $0xc9 // mov r9, rcx - LONG $0x03e18349 // and r9, 3 - JE LBB3_5 - -LBB3_4: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - LONG $0x1f043242 // xor al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x01c38349 // add r11, 1 - LONG $0xffc18349 // add r9, -1 - JNE LBB3_4 - -LBB3_5: - LONG $0x03f88349 // cmp r8, 3 - JB LBB3_16 - -LBB3_6: - LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] - LONG $0x1f043242 // xor al, byte [rdi + r11] - LONG $0x1a048842 // mov byte [rdx + r11], al - LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] - LONG $0x1f443242; BYTE $0x01 // xor al, byte [rdi + r11 + 1] - LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al - LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] - LONG $0x1f443242; BYTE $0x02 // xor al, byte [rdi + r11 + 2] - LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al - LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] - LONG $0x1f443242; BYTE $0x03 // xor al, byte [rdi + r11 + 3] - LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al - LONG $0x04c38349 // add r11, 4 - WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 - JNE LBB3_6 - JMP LBB3_16 - -LBB3_7: - LONG $0x0a0c8d4c // lea r9, [rdx + rcx] - LONG $0x0f048d48 // lea rax, [rdi + rcx] - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd2970f41 // seta r10b - LONG $0x0e048d48 // lea rax, [rsi + rcx] - WORD $0x3949; BYTE $0xf9 // cmp r9, rdi - WORD $0x970f; BYTE $0xd3 // seta bl - WORD $0x3948; BYTE $0xd0 // cmp rax, rdx - LONG $0xd0970f41 // seta r8b - WORD $0x3949; BYTE $0xf1 // cmp r9, rsi - LONG $0xd1970f41 // seta r9b - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - WORD $0x8441; BYTE $0xda // test r10b, bl - JNE LBB3_3 - WORD $0x2045; BYTE $0xc8 // and r8b, r9b - JNE LBB3_3 - WORD $0x8949; BYTE $0xcb // mov r11, rcx - LONG $0xe0e38349 // and r11, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc1 // mov r9, rax - LONG $0x05e9c149 // shr r9, 5 - LONG $0x01c18349 // add r9, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB3_10 - WORD $0x894d; BYTE $0xca // mov r10, r9 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB3_12: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x570f; BYTE $0xd0 // xorps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x570f; BYTE $0xc1 // xorps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] - LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] - LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] - WORD $0x570f; BYTE $0xd0 // xorps xmm2, xmm0 - LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] - WORD $0x570f; BYTE $0xc1 // xorps xmm0, xmm1 - LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 - LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 - LONG $0x40c08349 // add r8, 64 - LONG $0x02c28349 // add r10, 2 - JNE LBB3_12 - LONG $0x01c1f641 // test r9b, 1 - JE LBB3_15 - -LBB3_14: - LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] - LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] - LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] - WORD $0x570f; BYTE $0xd0 // xorps xmm2, xmm0 - LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] - WORD $0x570f; BYTE $0xc1 // xorps xmm0, xmm1 - LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 - LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 - -LBB3_15: - WORD $0x3949; BYTE $0xcb // cmp r11, rcx - JNE LBB3_3 - -LBB3_16: - RET - -LBB3_10: - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - LONG $0x01c1f641 // test r9b, 1 - JNE LBB3_14 - JMP LBB3_15 diff --git a/go/arrow/bitutil/bitmaps.go b/go/arrow/bitutil/bitmaps.go deleted file mode 100644 index fb4fcd597b804..0000000000000 --- a/go/arrow/bitutil/bitmaps.go +++ /dev/null @@ -1,747 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutil - -import ( - "bytes" - "errors" - "math/bits" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow/endian" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -// BitmapReader is a simple bitmap reader for a byte slice. -type BitmapReader struct { - bitmap []byte - pos int - len int - - current byte - byteOffset int - bitOffset int -} - -// NewBitmapReader creates and returns a new bitmap reader for the given bitmap -func NewBitmapReader(bitmap []byte, offset, length int) *BitmapReader { - curbyte := byte(0) - if length > 0 && bitmap != nil { - curbyte = bitmap[offset/8] - } - return &BitmapReader{ - bitmap: bitmap, - byteOffset: offset / 8, - bitOffset: offset % 8, - current: curbyte, - len: length, - } -} - -// Set returns true if the current bit is set -func (b *BitmapReader) Set() bool { - return (b.current & (1 << b.bitOffset)) != 0 -} - -// NotSet returns true if the current bit is not set -func (b *BitmapReader) NotSet() bool { - return (b.current & (1 << b.bitOffset)) == 0 -} - -// Next advances the reader to the next bit in the bitmap. -func (b *BitmapReader) Next() { - b.bitOffset++ - b.pos++ - if b.bitOffset == 8 { - b.bitOffset = 0 - b.byteOffset++ - if b.pos < b.len { - b.current = b.bitmap[int(b.byteOffset)] - } - } -} - -// Pos returns the current bit position in the bitmap that the reader is looking at -func (b *BitmapReader) Pos() int { return b.pos } - -// Len returns the total number of bits in the bitmap -func (b *BitmapReader) Len() int { return b.len } - -// BitmapWriter is a simple writer for writing bitmaps to byte slices -type BitmapWriter struct { - buf []byte - pos int - length int - - curByte uint8 - bitMask uint8 - byteOffset int -} - -// NewBitmapWriter returns a sequential bitwise writer that preserves surrounding -// bit values as it writes. -func NewBitmapWriter(bitmap []byte, start, length int) *BitmapWriter { - ret := &BitmapWriter{ - buf: bitmap, - length: length, - byteOffset: start / 8, - bitMask: BitMask[start%8], - } - if length > 0 { - ret.curByte = bitmap[int(ret.byteOffset)] - } - return ret -} - -// Reset resets the position and view of the slice to restart writing a bitmap -// to the same byte slice. -func (b *BitmapWriter) Reset(start, length int) { - b.pos = 0 - b.byteOffset = start / 8 - b.bitMask = BitMask[start%8] - b.length = length - if b.length > 0 { - b.curByte = b.buf[int(b.byteOffset)] - } -} - -func (b *BitmapWriter) Pos() int { return b.pos } -func (b *BitmapWriter) Set() { b.curByte |= b.bitMask } -func (b *BitmapWriter) Clear() { b.curByte &= ^b.bitMask } - -// Next increments the writer to the next bit for writing. -func (b *BitmapWriter) Next() { - b.bitMask = b.bitMask << 1 - b.pos++ - if b.bitMask == 0 { - b.bitMask = 0x01 - b.buf[b.byteOffset] = b.curByte - b.byteOffset++ - if b.pos < b.length { - b.curByte = b.buf[int(b.byteOffset)] - } - } -} - -// AppendBools writes a series of booleans to the bitmapwriter and returns -// the number of remaining bytes left in the buffer for writing. -func (b *BitmapWriter) AppendBools(in []bool) int { - space := min(b.length-b.pos, len(in)) - if space == 0 { - return 0 - } - - bitOffset := bits.TrailingZeros32(uint32(b.bitMask)) - // location that the first byte needs to be written to for appending - appslice := b.buf[int(b.byteOffset) : b.byteOffset+int(BytesForBits(int64(bitOffset+space)))] - // update everything but curByte - appslice[0] = b.curByte - for i, b := range in[:space] { - if b { - SetBit(appslice, i+bitOffset) - } else { - ClearBit(appslice, i+bitOffset) - } - } - - b.pos += space - b.bitMask = BitMask[(bitOffset+space)%8] - b.byteOffset += (bitOffset + space) / 8 - b.curByte = appslice[len(appslice)-1] - - return space -} - -// Finish flushes the final byte out to the byteslice in case it was not already -// on a byte aligned boundary. -func (b *BitmapWriter) Finish() { - if b.length > 0 && (b.bitMask != 0x01 || b.pos < b.length) { - b.buf[int(b.byteOffset)] = b.curByte - } -} - -// BitmapWordReader is a reader for bitmaps that reads a word at a time (a word being an 8 byte uint64) -// and then provides functions to grab the individual trailing bytes after the last word -type BitmapWordReader struct { - bitmap []byte - offset int - nwords int - trailingBits int - trailingBytes int - curword uint64 -} - -// NewBitmapWordReader sets up a word reader, calculates the number of trailing bits and -// number of trailing bytes, along with the number of words. -func NewBitmapWordReader(bitmap []byte, offset, length int) *BitmapWordReader { - bitoffset := offset % 8 - byteOffset := offset / 8 - bm := &BitmapWordReader{ - offset: bitoffset, - bitmap: bitmap[byteOffset : byteOffset+int(BytesForBits(int64(bitoffset+length)))], - // decrement wordcount by 1 as we may touch two adjacent words in one iteration - nwords: length/int(unsafe.Sizeof(uint64(0))*8) - 1, - } - if bm.nwords < 0 { - bm.nwords = 0 - } - bm.trailingBits = length - bm.nwords*int(unsafe.Sizeof(uint64(0)))*8 - bm.trailingBytes = int(BytesForBits(int64(bm.trailingBits))) - - if bm.nwords > 0 { - bm.curword = toFromLEFunc(endian.Native.Uint64(bm.bitmap)) - } else if length > 0 { - setLSB(&bm.curword, bm.bitmap[0]) - } - return bm -} - -// NextWord returns the next full word read from the bitmap, should not be called -// if Words() is 0 as it will step outside of the bounds of the bitmap slice and panic. -// -// We don't perform the bounds checking in order to improve performance. -func (bm *BitmapWordReader) NextWord() uint64 { - bm.bitmap = bm.bitmap[unsafe.Sizeof(bm.curword):] - word := bm.curword - nextWord := toFromLEFunc(endian.Native.Uint64(bm.bitmap)) - if bm.offset != 0 { - // combine two adjacent words into one word - // |<------ next ----->|<---- current ---->| - // +-------------+-----+-------------+-----+ - // | --- | A | B | --- | - // +-------------+-----+-------------+-----+ - // | | offset - // v v - // +-----+-------------+ - // | A | B | - // +-----+-------------+ - // |<------ word ----->| - word >>= uint64(bm.offset) - word |= nextWord << (int64(unsafe.Sizeof(uint64(0))*8) - int64(bm.offset)) - } - bm.curword = nextWord - return word -} - -// NextTrailingByte returns the next trailing byte of the bitmap after the last word -// along with the number of valid bits in that byte. When validBits < 8, that -// is the last byte. -// -// If the bitmap ends on a byte alignment, then the last byte can also return 8 valid bits. -// Thus the TrailingBytes function should be used to know how many trailing bytes to read. -func (bm *BitmapWordReader) NextTrailingByte() (val byte, validBits int) { - debug.Assert(bm.trailingBits > 0, "next trailing byte called with no trailing bits") - - if bm.trailingBits <= 8 { - // last byte - validBits = bm.trailingBits - bm.trailingBits = 0 - rdr := NewBitmapReader(bm.bitmap, bm.offset, validBits) - for i := 0; i < validBits; i++ { - val >>= 1 - if rdr.Set() { - val |= 0x80 - } - rdr.Next() - } - val >>= (8 - validBits) - return - } - - bm.bitmap = bm.bitmap[1:] - nextByte := bm.bitmap[0] - val = getLSB(bm.curword) - if bm.offset != 0 { - val >>= byte(bm.offset) - val |= nextByte << (8 - bm.offset) - } - setLSB(&bm.curword, nextByte) - bm.trailingBits -= 8 - bm.trailingBytes-- - validBits = 8 - return -} - -func (bm *BitmapWordReader) Words() int { return bm.nwords } -func (bm *BitmapWordReader) TrailingBytes() int { return bm.trailingBytes } - -// BitmapWordWriter is a bitmap writer for writing a full word at a time (a word being -// a uint64). After the last full word is written, PutNextTrailingByte can be used to -// write the remaining trailing bytes. -type BitmapWordWriter struct { - bitmap []byte - offset int - len int - - bitMask uint64 - currentWord uint64 -} - -// NewBitmapWordWriter initializes a new bitmap word writer which will start writing -// into the byte slice at bit offset start, expecting to write len bits. -func NewBitmapWordWriter(bitmap []byte, start, len int) *BitmapWordWriter { - ret := &BitmapWordWriter{ - bitmap: bitmap[start/8:], - len: len, - offset: start % 8, - bitMask: (uint64(1) << uint64(start%8)) - 1, - } - - if ret.offset != 0 { - if ret.len >= int(unsafe.Sizeof(uint64(0))*8) { - ret.currentWord = toFromLEFunc(endian.Native.Uint64(ret.bitmap)) - } else if ret.len > 0 { - setLSB(&ret.currentWord, ret.bitmap[0]) - } - } - return ret -} - -// PutNextWord writes the given word to the bitmap, potentially splitting across -// two adjacent words. -func (bm *BitmapWordWriter) PutNextWord(word uint64) { - sz := int(unsafe.Sizeof(word)) - if bm.offset != 0 { - // split one word into two adjacent words, don't touch unused bits - // |<------ word ----->| - // +-----+-------------+ - // | A | B | - // +-----+-------------+ - // | | - // v v offset - // +-------------+-----+-------------+-----+ - // | --- | A | B | --- | - // +-------------+-----+-------------+-----+ - // |<------ next ----->|<---- current ---->| - word = (word << uint64(bm.offset)) | (word >> (int64(sz*8) - int64(bm.offset))) - next := toFromLEFunc(endian.Native.Uint64(bm.bitmap[sz:])) - bm.currentWord = (bm.currentWord & bm.bitMask) | (word &^ bm.bitMask) - next = (next &^ bm.bitMask) | (word & bm.bitMask) - endian.Native.PutUint64(bm.bitmap, toFromLEFunc(bm.currentWord)) - endian.Native.PutUint64(bm.bitmap[sz:], toFromLEFunc(next)) - bm.currentWord = next - } else { - endian.Native.PutUint64(bm.bitmap, toFromLEFunc(word)) - } - bm.bitmap = bm.bitmap[sz:] -} - -// PutNextTrailingByte writes the number of bits indicated by validBits from b to -// the bitmap. -func (bm *BitmapWordWriter) PutNextTrailingByte(b byte, validBits int) { - curbyte := getLSB(bm.currentWord) - if validBits == 8 { - if bm.offset != 0 { - b = (b << bm.offset) | (b >> (8 - bm.offset)) - next := bm.bitmap[1] - curbyte = (curbyte & byte(bm.bitMask)) | (b &^ byte(bm.bitMask)) - next = (next &^ byte(bm.bitMask)) | (b & byte(bm.bitMask)) - bm.bitmap[0] = curbyte - bm.bitmap[1] = next - bm.currentWord = uint64(next) - } else { - bm.bitmap[0] = b - } - bm.bitmap = bm.bitmap[1:] - } else { - debug.Assert(validBits > 0 && validBits < 8, "invalid valid bits in bitmap word writer") - debug.Assert(BytesForBits(int64(bm.offset+validBits)) <= int64(len(bm.bitmap)), "writing trailing byte outside of bounds of bitmap") - wr := NewBitmapWriter(bm.bitmap, int(bm.offset), validBits) - for i := 0; i < validBits; i++ { - if b&0x01 != 0 { - wr.Set() - } else { - wr.Clear() - } - wr.Next() - b >>= 1 - } - wr.Finish() - } -} - -type transferMode int8 - -const ( - transferCopy transferMode = iota - transferInvert -) - -func transferBitmap(mode transferMode, src []byte, srcOffset, length int, dst []byte, dstOffset int) { - if length == 0 { - // if there's nothing to write, end early. - return - } - - bitOffset := srcOffset % 8 - destBitOffset := dstOffset % 8 - - // slow path, one of the bitmaps are not byte aligned. - if bitOffset != 0 || destBitOffset != 0 { - rdr := NewBitmapWordReader(src, srcOffset, length) - wr := NewBitmapWordWriter(dst, dstOffset, length) - - nwords := rdr.Words() - for nwords > 0 { - nwords-- - if mode == transferInvert { - wr.PutNextWord(^rdr.NextWord()) - } else { - wr.PutNextWord(rdr.NextWord()) - } - } - nbytes := rdr.TrailingBytes() - for nbytes > 0 { - nbytes-- - bt, validBits := rdr.NextTrailingByte() - if mode == transferInvert { - bt = ^bt - } - wr.PutNextTrailingByte(bt, validBits) - } - return - } - - // fast path, both are starting with byte-aligned bitmaps - nbytes := int(BytesForBits(int64(length))) - - // shift by its byte offset - src = src[srcOffset/8:] - dst = dst[dstOffset/8:] - - // Take care of the trailing bits in the last byte - // E.g., if trailing_bits = 5, last byte should be - // - low 3 bits: new bits from last byte of data buffer - // - high 5 bits: old bits from last byte of dest buffer - trailingBits := nbytes*8 - length - trailMask := byte(uint(1)<<(8-trailingBits)) - 1 - var lastData byte - if mode == transferInvert { - for i, b := range src[:nbytes-1] { - dst[i] = ^b - } - lastData = ^src[nbytes-1] - } else { - copy(dst, src[:nbytes-1]) - lastData = src[nbytes-1] - } - - dst[nbytes-1] &= ^trailMask - dst[nbytes-1] |= lastData & trailMask -} - -// CopyBitmap copies the bitmap indicated by src, starting at bit offset srcOffset, -// and copying length bits into dst, starting at bit offset dstOffset. -func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) { - transferBitmap(transferCopy, src, srcOffset, length, dst, dstOffset) -} - -// InvertBitmap copies a bit range of a bitmap, inverting it as it copies -// over into the destination. -func InvertBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) { - transferBitmap(transferInvert, src, srcOffset, length, dst, dstOffset) -} - -type bitOp struct { - opWord func(uint64, uint64) uint64 - opByte func(byte, byte) byte - opAligned func(l, r, o []byte) -} - -var ( - bitAndOp = bitOp{ - opWord: func(l, r uint64) uint64 { return l & r }, - opByte: func(l, r byte) byte { return l & r }, - } - bitOrOp = bitOp{ - opWord: func(l, r uint64) uint64 { return l | r }, - opByte: func(l, r byte) byte { return l | r }, - } - bitAndNotOp = bitOp{ - opWord: func(l, r uint64) uint64 { return l &^ r }, - opByte: func(l, r byte) byte { return l &^ r }, - } - bitXorOp = bitOp{ - opWord: func(l, r uint64) uint64 { return l ^ r }, - opByte: func(l, r byte) byte { return l ^ r }, - } -) - -func alignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { - debug.Assert(lOffset%8 == rOffset%8, "aligned bitmap op called with unaligned offsets") - debug.Assert(lOffset%8 == outOffset%8, "aligned bitmap op called with unaligned output offset") - - nbytes := BytesForBits(length + lOffset%8) - left = left[lOffset/8:] - right = right[rOffset/8:] - out = out[outOffset/8:] - endMask := (lOffset + length%8) - switch nbytes { - case 0: - return - case 1: // everything within a single byte - // (length+lOffset%8) <= 8 - mask := PrecedingBitmask[lOffset%8] - if endMask != 0 { - mask |= TrailingBitmask[(lOffset+length)%8] - } - out[0] = (out[0] & mask) | (op.opByte(left[0], right[0]) &^ mask) - case 2: // don't send zero length to opAligned - firstByteMask := PrecedingBitmask[lOffset%8] - out[0] = (out[0] & firstByteMask) | (op.opByte(left[0], right[0]) &^ firstByteMask) - lastByteMask := byte(0) - if endMask != 0 { - lastByteMask = TrailingBitmask[(lOffset+length)%8] - } - out[1] = (out[1] & lastByteMask) | (op.opByte(left[1], right[1]) &^ lastByteMask) - default: - firstByteMask := PrecedingBitmask[lOffset%8] - out[0] = (out[0] & firstByteMask) | (op.opByte(left[0], right[0]) &^ firstByteMask) - - op.opAligned(left[1:nbytes-1], right[1:nbytes-1], out[1:nbytes-1]) - - lastByteMask := byte(0) - if endMask != 0 { - lastByteMask = TrailingBitmask[(lOffset+length)%8] - } - out[nbytes-1] = (out[nbytes-1] & lastByteMask) | (op.opByte(left[nbytes-1], right[nbytes-1]) &^ lastByteMask) - } -} - -func unalignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { - leftRdr := NewBitmapWordReader(left, int(lOffset), int(length)) - rightRdr := NewBitmapWordReader(right, int(rOffset), int(length)) - writer := NewBitmapWordWriter(out, int(outOffset), int(length)) - - for nwords := leftRdr.Words(); nwords > 0; nwords-- { - writer.PutNextWord(op.opWord(leftRdr.NextWord(), rightRdr.NextWord())) - } - for nbytes := leftRdr.TrailingBytes(); nbytes > 0; nbytes-- { - leftByte, leftValid := leftRdr.NextTrailingByte() - rightByte, rightValid := rightRdr.NextTrailingByte() - debug.Assert(leftValid == rightValid, "unexpected mismatch of valid bits") - writer.PutNextTrailingByte(op.opByte(leftByte, rightByte), leftValid) - } -} - -func BitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset, length int64) { - if (outOffset%8 == lOffset%8) && (outOffset%8 == rOffset%8) { - // fastcase! - alignedBitmapOp(op, left, right, lOffset, rOffset, out, outOffset, length) - } else { - unalignedBitmapOp(op, left, right, lOffset, rOffset, out, outOffset, length) - } -} - -func BitmapOpAlloc(mem memory.Allocator, op bitOp, left, right []byte, lOffset, rOffset int64, length int64, outOffset int64) *memory.Buffer { - bits := length + outOffset - buf := memory.NewResizableBuffer(mem) - buf.Resize(int(BytesForBits(bits))) - BitmapOp(op, left, right, lOffset, rOffset, buf.Bytes(), outOffset, length) - return buf -} - -func BitmapAnd(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { - BitmapOp(bitAndOp, left, right, lOffset, rOffset, out, outOffset, length) -} - -func BitmapOr(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { - BitmapOp(bitOrOp, left, right, lOffset, rOffset, out, outOffset, length) -} - -func BitmapAndAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { - return BitmapOpAlloc(mem, bitAndOp, left, right, lOffset, rOffset, length, outOffset) -} - -func BitmapOrAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { - return BitmapOpAlloc(mem, bitOrOp, left, right, lOffset, rOffset, length, outOffset) -} - -func BitmapAndNot(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { - BitmapOp(bitAndNotOp, left, right, lOffset, rOffset, out, outOffset, length) -} - -func BitmapAndNotAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { - return BitmapOpAlloc(mem, bitAndNotOp, left, right, lOffset, rOffset, length, outOffset) -} - -func BitmapXor(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) { - BitmapOp(bitXorOp, left, right, lOffset, rOffset, out, outOffset, length) -} - -func BitmapXorAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer { - return BitmapOpAlloc(mem, bitXorOp, left, right, lOffset, rOffset, length, outOffset) -} - -func BitmapEquals(left, right []byte, lOffset, rOffset int64, length int64) bool { - if lOffset%8 == 0 && rOffset%8 == 0 { - // byte aligned, fast path, can use bytes.Equal (memcmp) - byteLen := length / 8 - lStart := lOffset / 8 - rStart := rOffset / 8 - if !bytes.Equal(left[lStart:lStart+byteLen], right[rStart:rStart+byteLen]) { - return false - } - - // check trailing bits - for i := (length / 8) * 8; i < length; i++ { - if BitIsSet(left, int(lOffset+i)) != BitIsSet(right, int(rOffset+i)) { - return false - } - } - return true - } - - lrdr := NewBitmapWordReader(left, int(lOffset), int(length)) - rrdr := NewBitmapWordReader(right, int(rOffset), int(length)) - - nwords := lrdr.Words() - for nwords > 0 { - nwords-- - if lrdr.NextWord() != rrdr.NextWord() { - return false - } - } - - nbytes := lrdr.TrailingBytes() - for nbytes > 0 { - nbytes-- - lbt, _ := lrdr.NextTrailingByte() - rbt, _ := rrdr.NextTrailingByte() - if lbt != rbt { - return false - } - } - return true -} - -// OptionalBitIndexer is a convenience wrapper for getting bits from -// a bitmap which may or may not be nil. -type OptionalBitIndexer struct { - Bitmap []byte - Offset int -} - -func (b *OptionalBitIndexer) GetBit(i int) bool { - return b.Bitmap == nil || BitIsSet(b.Bitmap, b.Offset+i) -} - -type Bitmap struct { - Data []byte - Offset, Len int64 -} - -func bitLength(bitmaps []Bitmap) (int64, error) { - for _, b := range bitmaps[1:] { - if b.Len != bitmaps[0].Len { - return -1, errors.New("bitmaps must be same length") - } - } - return bitmaps[0].Len, nil -} - -func runVisitWordsAndWriteLoop(bitLen int64, rdrs []*BitmapWordReader, wrs []*BitmapWordWriter, visitor func(in, out []uint64)) { - const bitWidth int64 = int64(uint64SizeBits) - - visited := make([]uint64, len(rdrs)) - output := make([]uint64, len(wrs)) - - // every reader will have same number of words, since they are same - // length'ed. This will be inefficient in some cases. When there's - // offsets beyond the Word boundary, every word would have to be - // created from 2 adjoining words - nwords := int64(rdrs[0].Words()) - bitLen -= nwords * bitWidth - for nwords > 0 { - nwords-- - for i := range visited { - visited[i] = rdrs[i].NextWord() - } - visitor(visited, output) - for i := range output { - wrs[i].PutNextWord(output[i]) - } - } - - // every reader will have the same number of trailing bytes, because - // we already confirmed they have the same length. Because - // offsets beyond the Word boundary can cause adjoining words, the - // tailing portion could be more than one word remaining full/partial - // words to write. - if bitLen == 0 { - return - } - - // convert the word visitor to a bytevisitor - byteVisitor := func(in, out []byte) { - for i, w := range in { - visited[i] = uint64(w) - } - visitor(visited, output) - for i, w := range output { - out[i] = byte(w) - } - } - - visitedBytes := make([]byte, len(rdrs)) - outputBytes := make([]byte, len(wrs)) - nbytes := rdrs[0].trailingBytes - for nbytes > 0 { - nbytes-- - memory.Set(visitedBytes, 0) - memory.Set(outputBytes, 0) - - var validBits int - for i := range rdrs { - visitedBytes[i], validBits = rdrs[i].NextTrailingByte() - } - byteVisitor(visitedBytes, outputBytes) - for i, w := range outputBytes { - wrs[i].PutNextTrailingByte(w, validBits) - } - } -} - -// VisitWordsAndWrite visits words of bits from each input bitmap and -// collects outputs to a slice of output Bitmaps. -// -// All bitmaps must have identical lengths. The first bit in a visited -// bitmap may be offset within the first visited word, but words will -// otherwise contain densely packed bits loaded from the bitmap. That -// offset within the first word is returned. -// -// NOTE: this function is efficient on 3+ sufficiently large bitmaps. -// It also has a large prolog/epilog overhead and should be used -// carefully in other cases. For 2 or fewer bitmaps, and/or smaller -// bitmaps, try BitmapReader and or other utilities. -func VisitWordsAndWrite(args []Bitmap, out []Bitmap, visitor func(in, out []uint64)) error { - bitLen, err := bitLength(args) - if err != nil { - return err - } - - rdrs, wrs := make([]*BitmapWordReader, len(args)), make([]*BitmapWordWriter, len(out)) - for i, in := range args { - rdrs[i] = NewBitmapWordReader(in.Data, int(in.Offset), int(in.Len)) - } - for i, o := range out { - wrs[i] = NewBitmapWordWriter(o.Data, int(o.Offset), int(o.Len)) - } - runVisitWordsAndWriteLoop(bitLen, rdrs, wrs, visitor) - return nil -} diff --git a/go/arrow/bitutil/bitmaps_test.go b/go/arrow/bitutil/bitmaps_test.go deleted file mode 100644 index 726bfa050cc4b..0000000000000 --- a/go/arrow/bitutil/bitmaps_test.go +++ /dev/null @@ -1,580 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutil_test - -import ( - "fmt" - "math/rand" - "strconv" - "testing" - - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/suite" -) - -func bitmapFromSlice(vals []int, bitOffset int) []byte { - out := make([]byte, int(bitutil.BytesForBits(int64(len(vals)+bitOffset)))) - writer := bitutil.NewBitmapWriter(out, bitOffset, len(vals)) - for _, val := range vals { - if val == 1 { - writer.Set() - } else { - writer.Clear() - } - writer.Next() - } - writer.Finish() - - return out -} - -func assertReaderVals(t *testing.T, reader *bitutil.BitmapReader, vals []bool) { - for _, v := range vals { - if v { - assert.True(t, reader.Set()) - assert.False(t, reader.NotSet()) - } else { - assert.True(t, reader.NotSet()) - assert.False(t, reader.Set()) - } - reader.Next() - } -} - -func TestNormalOperation(t *testing.T) { - for _, offset := range []int{0, 1, 3, 5, 7, 8, 12, 13, 21, 38, 75, 120} { - buf := bitmapFromSlice([]int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1}, offset) - - reader := bitutil.NewBitmapReader(buf, offset, 14) - assertReaderVals(t, reader, []bool{false, true, true, true, false, false, false, true, false, true, false, true, false, true}) - } -} - -func TestDoesNotReadOutOfBounds(t *testing.T) { - var bitmap [16]byte - const length = 128 - - reader := bitutil.NewBitmapReader(bitmap[:], 0, length) - assert.EqualValues(t, length, reader.Len()) - assert.NotPanics(t, func() { - for i := 0; i < length; i++ { - assert.True(t, reader.NotSet()) - reader.Next() - } - }) - assert.EqualValues(t, length, reader.Pos()) - - reader = bitutil.NewBitmapReader(bitmap[:], 5, length-5) - assert.EqualValues(t, length-5, reader.Len()) - assert.NotPanics(t, func() { - for i := 0; i < length-5; i++ { - assert.True(t, reader.NotSet()) - reader.Next() - } - }) - assert.EqualValues(t, length-5, reader.Pos()) - - assert.NotPanics(t, func() { - reader = bitutil.NewBitmapReader(nil, 0, 0) - }) -} - -func writeToWriter(vals []int, wr *bitutil.BitmapWriter) { - for _, v := range vals { - if v != 0 { - wr.Set() - } else { - wr.Clear() - } - wr.Next() - } - wr.Finish() -} - -func TestBitmapWriter(t *testing.T) { - for _, fillByte := range []byte{0x00, 0xFF} { - { - bitmap := []byte{fillByte, fillByte, fillByte, fillByte} - wr := bitutil.NewBitmapWriter(bitmap, 0, 12) - writeToWriter([]int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}, wr) - // {0b00110110, 0b....1010, ........, ........} - assert.Equal(t, []byte{0x36, (0x0A | (fillByte & 0xF0)), fillByte, fillByte}, bitmap) - } - { - bitmap := []byte{fillByte, fillByte, fillByte, fillByte} - wr := bitutil.NewBitmapWriter(bitmap, 0, 12) - wr.AppendBools([]bool{false, true, true, false, true, true, false, false, false, true, false, true}) - assert.Equal(t, []byte{0x36, (0x0A | (fillByte & 0xF0)), fillByte, fillByte}, bitmap) - } - { - bitmap := []byte{fillByte, fillByte, fillByte, fillByte} - wr := bitutil.NewBitmapWriter(bitmap, 3, 12) - writeToWriter([]int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}, wr) - // {0b10110..., 0b.1010001, ........, ........} - assert.Equal(t, []byte{0xb0 | (fillByte & 0x07), 0x51 | (fillByte & 0x80), fillByte, fillByte}, bitmap) - } - { - bitmap := []byte{fillByte, fillByte, fillByte, fillByte} - wr := bitutil.NewBitmapWriter(bitmap, 3, 12) - wr.AppendBools([]bool{false, true, true, false}) - wr.AppendBools([]bool{true, true, false, false}) - wr.AppendBools([]bool{false, true, false, true}) - assert.Equal(t, []byte{0xb0 | (fillByte & 0x07), 0x51 | (fillByte & 0x80), fillByte, fillByte}, bitmap) - } - { - bitmap := []byte{fillByte, fillByte, fillByte, fillByte} - wr := bitutil.NewBitmapWriter(bitmap, 20, 12) - writeToWriter([]int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}, wr) - // {........, ........, 0b0110...., 0b10100011} - assert.Equal(t, []byte{fillByte, fillByte, 0x60 | (fillByte & 0x0f), 0xa3}, bitmap) - } - } -} - -func TestBitmapReader(t *testing.T) { - assertReaderVals := func(vals []int, rdr *bitutil.BitmapReader) { - for _, v := range vals { - if v != 0 { - assert.True(t, rdr.Set()) - assert.False(t, rdr.NotSet()) - } else { - assert.False(t, rdr.Set()) - assert.True(t, rdr.NotSet()) - } - rdr.Next() - } - } - - vals := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} - - for _, offset := range []int{0, 1, 3, 5, 7, 8, 12, 13, 21, 38, 75, 120} { - bm := make([]byte, bitutil.BytesForBits(int64(len(vals)+offset))) - wr := bitutil.NewBitmapWriter(bm, offset, len(vals)) - writeToWriter(vals, wr) - - rdr := bitutil.NewBitmapReader(bm, offset, 14) - assertReaderVals(vals, rdr) - } -} - -func TestCopyBitmap(t *testing.T) { - const bufsize = 1000 - lengths := []int{bufsize*8 - 4, bufsize * 8} - offsets := []int{0, 12, 16, 32, 37, 63, 64, 128} - - buffer := make([]byte, bufsize) - - // random bytes - r := rand.New(rand.NewSource(0)) - r.Read(buffer) - - // add 16 byte padding - otherBuffer := make([]byte, bufsize+32) - r.Read(otherBuffer) - - for _, nbits := range lengths { - for _, offset := range offsets { - for _, destOffset := range offsets { - t.Run(fmt.Sprintf("bits %d off %d dst %d", nbits, offset, destOffset), func(t *testing.T) { - copyLen := nbits - offset - - bmCopy := make([]byte, len(otherBuffer)) - copy(bmCopy, otherBuffer) - - bitutil.CopyBitmap(buffer, offset, copyLen, bmCopy, destOffset) - - for i := 0; i < int(destOffset); i++ { - assert.Equalf(t, bitutil.BitIsSet(otherBuffer, i), bitutil.BitIsSet(bmCopy, i), "bit index: %d", i) - } - for i := 0; i < int(copyLen); i++ { - assert.Equalf(t, bitutil.BitIsSet(buffer, i+int(offset)), bitutil.BitIsSet(bmCopy, i+int(destOffset)), "bit index: %d", i) - } - for i := int(destOffset + copyLen); i < len(otherBuffer); i++ { - assert.Equalf(t, bitutil.BitIsSet(otherBuffer, i), bitutil.BitIsSet(bmCopy, i), "bit index: %d", i) - } - }) - } - } - } -} - -func benchmarkCopyBitmapN(b *testing.B, offsetSrc, offsetDest, n int) { - nbits := n * 8 - // random bytes - r := rand.New(rand.NewSource(0)) - src := make([]byte, n) - r.Read(src) - - length := nbits - offsetSrc - - dest := make([]byte, bitutil.BytesForBits(int64(length+offsetDest))) - - b.ResetTimer() - b.SetBytes(int64(n)) - for i := 0; i < b.N; i++ { - bitutil.CopyBitmap(src, offsetSrc, length, dest, offsetDest) - } -} - -// Fast path which is just a memcopy -func BenchmarkCopyBitmapWithoutOffset(b *testing.B) { - for _, sz := range []int{32, 128, 1000, 1024} { - b.Run(strconv.Itoa(sz), func(b *testing.B) { - benchmarkCopyBitmapN(b, 0, 0, sz) - }) - } -} - -// slow path where the source buffer is not byte aligned -func BenchmarkCopyBitmapWithOffset(b *testing.B) { - for _, sz := range []int{32, 128, 1000, 1024} { - b.Run(strconv.Itoa(sz), func(b *testing.B) { - benchmarkCopyBitmapN(b, 4, 0, sz) - }) - } -} - -// slow path where both source and dest are not byte aligned -func BenchmarkCopyBitmapWithOffsetBoth(b *testing.B) { - for _, sz := range []int{32, 128, 1000, 1024} { - b.Run(strconv.Itoa(sz), func(b *testing.B) { - benchmarkCopyBitmapN(b, 3, 7, sz) - }) - } -} - -const bufferSize = 1024 * 8 - -// a naive bitmap reader for a baseline - -type NaiveBitmapReader struct { - bitmap []byte - pos int -} - -func (n *NaiveBitmapReader) IsSet() bool { return bitutil.BitIsSet(n.bitmap, n.pos) } -func (n *NaiveBitmapReader) IsNotSet() bool { return !n.IsSet() } -func (n *NaiveBitmapReader) Next() { n.pos++ } - -// naive bitmap writer for a baseline - -type NaiveBitmapWriter struct { - bitmap []byte - pos int -} - -func (n *NaiveBitmapWriter) Set() { - byteOffset := n.pos / 8 - bitOffset := n.pos % 8 - bitSetMask := uint8(1 << bitOffset) - n.bitmap[byteOffset] |= bitSetMask -} - -func (n *NaiveBitmapWriter) Clear() { - byteOffset := n.pos / 8 - bitOffset := n.pos % 8 - bitClearMask := uint8(0xFF ^ (1 << bitOffset)) - n.bitmap[byteOffset] &= bitClearMask -} - -func (n *NaiveBitmapWriter) Next() { n.pos++ } -func (n *NaiveBitmapWriter) Finish() {} - -func randomBuffer(nbytes int64) []byte { - buf := make([]byte, nbytes) - r := rand.New(rand.NewSource(0)) - r.Read(buf) - return buf -} - -func BenchmarkBitmapReader(b *testing.B) { - buf := randomBuffer(bufferSize) - nbits := bufferSize * 8 - - b.Run("naive baseline", func(b *testing.B) { - b.SetBytes(2 * bufferSize) - for i := 0; i < b.N; i++ { - { - total := 0 - rdr := NaiveBitmapReader{buf, 0} - for j := 0; j < nbits; j++ { - if rdr.IsSet() { - total++ - } - rdr.Next() - } - } - { - total := 0 - rdr := NaiveBitmapReader{buf, 0} - for j := 0; j < nbits; j++ { - if rdr.IsSet() { - total++ - } - rdr.Next() - } - } - } - }) - b.Run("bitmap reader", func(b *testing.B) { - b.SetBytes(2 * bufferSize) - for i := 0; i < b.N; i++ { - { - total := 0 - rdr := bitutil.NewBitmapReader(buf, 0, nbits) - for j := 0; j < nbits; j++ { - if rdr.Set() { - total++ - } - rdr.Next() - } - } - { - total := 0 - rdr := bitutil.NewBitmapReader(buf, 0, nbits) - for j := 0; j < nbits; j++ { - if rdr.Set() { - total++ - } - rdr.Next() - } - } - } - }) -} - -type ( - noAllocFn func(left, right []byte, lOffset, rOffset int64, out []byte, outOffset, length int64) - allocFn func(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer - bitmapOp struct { - noAlloc noAllocFn - alloc allocFn - } -) - -type BitmapOpSuite struct { - suite.Suite -} - -func (s *BitmapOpSuite) testAligned(op bitmapOp, leftBits, rightBits []int, resultBits []bool) { - var ( - left, right []byte - out *memory.Buffer - length int64 - ) - for _, lOffset := range []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536} { - s.Run(fmt.Sprintf("left offset %d", lOffset), func() { - left = bitmapFromSlice(leftBits, int(lOffset)) - length = int64(len(leftBits)) - for _, rOffset := range []int64{lOffset, lOffset + 8, lOffset + 40} { - s.Run(fmt.Sprintf("right offset %d", rOffset), func() { - right = bitmapFromSlice(rightBits, int(rOffset)) - for _, outOffset := range []int64{lOffset, lOffset + 16, lOffset + 24} { - s.Run(fmt.Sprintf("out offset %d", outOffset), func() { - s.Run("zero-length", func() { - out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, 0, outOffset) - s.EqualValues(bitutil.BytesForBits(outOffset), out.Len()) - expected := make([]byte, out.Len()) - if out.Len() > 0 { - s.Equal(expected, out.Bytes()) - } else { - s.Nil(out.Bytes()) - } - - memory.Set(out.Bytes(), 0xFF) - op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, 0) - if out.Len() > 0 { - memory.Set(expected, 0xFF) - s.Equal(expected, out.Bytes()) - } else { - s.Nil(out.Bytes()) - } - out.Release() - }) - - out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, outOffset) - defer out.Release() - rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) - assertReaderVals(s.T(), rdr, resultBits) - - memory.Set(out.Bytes(), 0x00) - op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, length) - rdr = bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) - assertReaderVals(s.T(), rdr, resultBits) - }) - } - }) - } - }) - } -} - -func (s *BitmapOpSuite) testUnaligned(op bitmapOp, leftBits, rightBits []int, resultBits []bool) { - var ( - left, right []byte - out *memory.Buffer - length int64 - offsets = []int64{0, 1, 3, 5, 7, 8, 13, 21, 38, 75, 120, 65536} - ) - - for _, lOffset := range offsets { - s.Run(fmt.Sprintf("left offset %d", lOffset), func() { - left = bitmapFromSlice(leftBits, int(lOffset)) - length = int64(len(leftBits)) - for _, rOffset := range offsets { - s.Run(fmt.Sprintf("right offset %d", rOffset), func() { - right = bitmapFromSlice(rightBits, int(rOffset)) - for _, outOffset := range offsets { - s.Run(fmt.Sprintf("out offset %d", outOffset), func() { - s.Run("zero-length", func() { - out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, 0, outOffset) - s.EqualValues(bitutil.BytesForBits(outOffset), out.Len()) - expected := make([]byte, out.Len()) - if out.Len() > 0 { - s.Equal(expected, out.Bytes()) - } else { - s.Nil(out.Bytes()) - } - - memory.Set(out.Bytes(), 0xFF) - op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, 0) - if out.Len() > 0 { - memory.Set(expected, 0xFF) - s.Equal(expected, out.Bytes()) - } else { - s.Nil(out.Bytes()) - } - out.Release() - }) - s.Run("alloc", func() { - out = op.alloc(memory.DefaultAllocator, left, right, lOffset, rOffset, length, outOffset) - rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) - assertReaderVals(s.T(), rdr, resultBits) - }) - s.Run("noalloc", func() { - memory.Set(out.Bytes(), 0x00) - op.noAlloc(left, right, lOffset, rOffset, out.Bytes(), outOffset, length) - rdr := bitutil.NewBitmapReader(out.Bytes(), int(outOffset), int(length)) - assertReaderVals(s.T(), rdr, resultBits) - }) - }) - } - }) - } - }) - } -} - -func (s *BitmapOpSuite) TestBitmapAnd() { - op := bitmapOp{ - noAlloc: bitutil.BitmapAnd, - alloc: bitutil.BitmapAndAlloc, - } - - leftBits := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} - rightBits := []int{0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0} - resultBits := []bool{false, false, true, false, false, false, false, false, false, true, false, false, false, false} - - s.Run("aligned", func() { - s.testAligned(op, leftBits, rightBits, resultBits) - }) - s.Run("unaligned", func() { - s.testUnaligned(op, leftBits, rightBits, resultBits) - }) -} - -func (s *BitmapOpSuite) TestBitmapOr() { - op := bitmapOp{ - noAlloc: bitutil.BitmapOr, - alloc: bitutil.BitmapOrAlloc, - } - - leftBits := []int{0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1} - rightBits := []int{0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0} - resultBits := []bool{false, true, true, true, true, true, false, true, true, true, true, true, true, true} - - s.Run("aligned", func() { - s.testAligned(op, leftBits, rightBits, resultBits) - }) - s.Run("unaligned", func() { - s.testUnaligned(op, leftBits, rightBits, resultBits) - }) -} - -func TestBitmapOps(t *testing.T) { - suite.Run(t, new(BitmapOpSuite)) -} - -func TestSmallBitmapOp(t *testing.T) { - // 0b01111111 0b11001111 - left := [2]byte{127, 207} - // 0b11111110 0b01111111 - right := [2]byte{254, 127} - // 0b01111110 0b01001111 - results := [2]byte{126, 79} - - var out [2]byte - bitutil.BitmapAnd(left[:], right[:], 0, 0, out[:], 0, 8) - assert.Equal(t, results[:1], out[:1]) - - bitutil.BitmapAnd(left[:], right[:], 0, 0, out[:], 0, 16) - assert.Equal(t, results, out) -} - -func createRandomBuffer(mem memory.Allocator, src *rand.Rand, nbytes int) []byte { - buf := mem.Allocate(nbytes) - src.Read(buf) - return buf -} - -func benchBitOpImpl(b *testing.B, nBytes, offset int, op noAllocFn) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - r := rand.New(rand.NewSource(0)) - - buf1 := createRandomBuffer(mem, r, nBytes) - buf2 := createRandomBuffer(mem, r, nBytes) - buf3 := createRandomBuffer(mem, r, nBytes) - b.Cleanup(func() { - mem.Free(buf1) - mem.Free(buf2) - mem.Free(buf3) - }) - - numBits := nBytes*8 - offset - b.ResetTimer() - b.SetBytes(bitutil.BytesForBits(int64(numBits)) * 2) - for i := 0; i < b.N; i++ { - op(buf1, buf2, 0, int64(offset), buf3, 0, int64(numBits)) - } -} - -func BenchmarkBitmapAnd(b *testing.B) { - sizes := []int{bufferSize * 4, bufferSize * 16} - offsets := []int{0, 1, 2} - - for _, s := range sizes { - b.Run(fmt.Sprintf("nbytes=%d", s), func(b *testing.B) { - for _, o := range offsets { - b.Run(fmt.Sprintf("%d", o), func(b *testing.B) { - benchBitOpImpl(b, s, o, bitutil.BitmapAnd) - }) - } - }) - } -} diff --git a/go/arrow/bitutil/bitutil.go b/go/arrow/bitutil/bitutil.go deleted file mode 100644 index c4b633c73aa40..0000000000000 --- a/go/arrow/bitutil/bitutil.go +++ /dev/null @@ -1,186 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutil - -import ( - "math" - "math/bits" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow/memory" -) - -var ( - BitMask = [8]byte{1, 2, 4, 8, 16, 32, 64, 128} - FlippedBitMask = [8]byte{254, 253, 251, 247, 239, 223, 191, 127} -) - -// IsMultipleOf8 returns whether v is a multiple of 8. -func IsMultipleOf8(v int64) bool { return v&7 == 0 } - -// IsMultipleOf64 returns whether v is a multiple of 64 -func IsMultipleOf64(v int64) bool { return v&63 == 0 } - -func BytesForBits(bits int64) int64 { return (bits + 7) >> 3 } - -// NextPowerOf2 rounds x to the next power of two. -func NextPowerOf2(x int) int { return 1 << uint(bits.Len(uint(x))) } - -// CeilByte rounds size to the next multiple of 8. -func CeilByte(size int) int { return (size + 7) &^ 7 } - -// CeilByte64 rounds size to the next multiple of 8. -func CeilByte64(size int64) int64 { return (size + 7) &^ 7 } - -// BitIsSet returns true if the bit at index i in buf is set (1). -func BitIsSet(buf []byte, i int) bool { return (buf[uint(i)/8] & BitMask[byte(i)%8]) != 0 } - -// BitIsNotSet returns true if the bit at index i in buf is not set (0). -func BitIsNotSet(buf []byte, i int) bool { return (buf[uint(i)/8] & BitMask[byte(i)%8]) == 0 } - -// SetBit sets the bit at index i in buf to 1. -func SetBit(buf []byte, i int) { buf[uint(i)/8] |= BitMask[byte(i)%8] } - -// ClearBit sets the bit at index i in buf to 0. -func ClearBit(buf []byte, i int) { buf[uint(i)/8] &= FlippedBitMask[byte(i)%8] } - -// SetBitTo sets the bit at index i in buf to val. -func SetBitTo(buf []byte, i int, val bool) { - if val { - SetBit(buf, i) - } else { - ClearBit(buf, i) - } -} - -// CountSetBits counts the number of 1's in buf up to n bits. -func CountSetBits(buf []byte, offset, n int) int { - if offset > 0 { - return countSetBitsWithOffset(buf, offset, n) - } - - count := 0 - - uint64Bytes := n / uint64SizeBits * 8 - for _, v := range bytesToUint64(buf[:uint64Bytes]) { - count += bits.OnesCount64(v) - } - - for _, v := range buf[uint64Bytes : n/8] { - count += bits.OnesCount8(v) - } - - // tail bits - for i := n &^ 0x7; i < n; i++ { - if BitIsSet(buf, i) { - count++ - } - } - - return count -} - -func countSetBitsWithOffset(buf []byte, offset, n int) int { - count := 0 - - beg := offset - begU8 := roundUp(beg, uint64SizeBits) - - init := min(n, begU8-beg) - for i := offset; i < beg+init; i++ { - if BitIsSet(buf, i) { - count++ - } - } - - begU64 := BytesForBits(int64(beg + init)) - return count + CountSetBits(buf[begU64:], 0, n-init) -} - -func roundUp(v, f int) int { - return (v + (f - 1)) / f * f -} - -func min(a, b int) int { - if a < b { - return a - } - return b -} - -const ( - uint64SizeBytes = int(unsafe.Sizeof(uint64(0))) - uint64SizeBits = uint64SizeBytes * 8 -) - -var ( - // PrecedingBitmask is a convenience set of values as bitmasks for checking - // prefix bits of a byte - PrecedingBitmask = [8]byte{0, 1, 3, 7, 15, 31, 63, 127} - // TrailingBitmask is the bitwise complement version of kPrecedingBitmask - TrailingBitmask = [8]byte{255, 254, 252, 248, 240, 224, 192, 128} -) - -// SetBitsTo is a convenience function to quickly set or unset all the bits -// in a bitmap starting at startOffset for length bits. -func SetBitsTo(bits []byte, startOffset, length int64, areSet bool) { - if length == 0 { - return - } - - beg := startOffset - end := startOffset + length - var fill uint8 = 0 - if areSet { - fill = math.MaxUint8 - } - - byteBeg := beg / 8 - byteEnd := end/8 + 1 - - // don't modify bits before the startOffset by using this mask - firstByteMask := PrecedingBitmask[beg%8] - // don't modify bits past the length by using this mask - lastByteMask := TrailingBitmask[end%8] - - if byteEnd == byteBeg+1 { - // set bits within a single byte - onlyByteMask := firstByteMask - if end%8 != 0 { - onlyByteMask = firstByteMask | lastByteMask - } - - bits[byteBeg] &= onlyByteMask - bits[byteBeg] |= fill &^ onlyByteMask - return - } - - // set/clear trailing bits of first byte - bits[byteBeg] &= firstByteMask - bits[byteBeg] |= fill &^ firstByteMask - - if byteEnd-byteBeg > 2 { - memory.Set(bits[byteBeg+1:byteEnd-1], fill) - } - - if end%8 == 0 { - return - } - - bits[byteEnd-1] &= lastByteMask - bits[byteEnd-1] |= fill &^ lastByteMask -} diff --git a/go/arrow/bitutil/bitutil_bytes.go b/go/arrow/bitutil/bitutil_bytes.go deleted file mode 100644 index 09dd5cbc67d39..0000000000000 --- a/go/arrow/bitutil/bitutil_bytes.go +++ /dev/null @@ -1,37 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.20 || tinygo - -package bitutil - -import ( - "unsafe" -) - -func bytesToUint64(b []byte) []uint64 { - if len(b) < uint64SizeBytes { - return nil - } - - ptr := unsafe.SliceData(b) - if ptr == nil { - return nil - } - - return unsafe.Slice((*uint64)(unsafe.Pointer(ptr)), - len(b)/uint64SizeBytes) -} diff --git a/go/arrow/bitutil/bitutil_test.go b/go/arrow/bitutil/bitutil_test.go deleted file mode 100644 index c03bf5268a5ff..0000000000000 --- a/go/arrow/bitutil/bitutil_test.go +++ /dev/null @@ -1,320 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutil_test - -import ( - "fmt" - "math/rand" - "testing" - - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/testing/tools" - "github.com/stretchr/testify/assert" -) - -func TestIsMultipleOf8(t *testing.T) { - for _, tc := range []struct { - v int64 - want bool - }{ - {-16, true}, - {-9, false}, - {-8, true}, - {-7, false}, - {-4, false}, - {-1, false}, - {-0, true}, - {0, true}, - {1, false}, - {4, false}, - {7, false}, - {8, true}, - {9, false}, - {16, true}, - } { - t.Run(fmt.Sprintf("v=%d", tc.v), func(t *testing.T) { - got := bitutil.IsMultipleOf8(tc.v) - if got != tc.want { - t.Fatalf("IsMultipleOf8(%d): got=%v, want=%v", tc.v, got, tc.want) - } - }) - } -} - -func TestCeilByte(t *testing.T) { - tests := []struct { - name string - in, exp int - }{ - {"zero", 0, 0}, - {"five", 5, 8}, - {"sixteen", 16, 16}, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - got := bitutil.CeilByte(test.in) - assert.Equal(t, test.exp, got) - }) - } -} - -func TestBitIsSet(t *testing.T) { - buf := make([]byte, 2) - buf[0] = 0xa1 - buf[1] = 0xc2 - exp := []bool{true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true} - var got []bool - for i := 0; i < 0x10; i++ { - got = append(got, bitutil.BitIsSet(buf, i)) - } - assert.Equal(t, exp, got) -} - -func TestBitIsNotSet(t *testing.T) { - buf := make([]byte, 2) - buf[0] = 0xa1 - buf[1] = 0xc2 - exp := []bool{false, true, true, true, true, false, true, false, true, false, true, true, true, true, false, false} - var got []bool - for i := 0; i < 0x10; i++ { - got = append(got, bitutil.BitIsNotSet(buf, i)) - } - assert.Equal(t, exp, got) -} - -func TestClearBit(t *testing.T) { - buf := make([]byte, 2) - buf[0] = 0xff - buf[1] = 0xff - for i, v := range []bool{false, true, true, true, true, false, true, false, true, false, true, true, true, true, false, false} { - if v { - bitutil.ClearBit(buf, i) - } - } - assert.Equal(t, []byte{0xa1, 0xc2}, buf) -} - -func TestSetBit(t *testing.T) { - buf := make([]byte, 2) - for i, v := range []bool{true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true} { - if v { - bitutil.SetBit(buf, i) - } - } - assert.Equal(t, []byte{0xa1, 0xc2}, buf) -} - -func TestSetBitTo(t *testing.T) { - buf := make([]byte, 2) - for i, v := range []bool{true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true} { - bitutil.SetBitTo(buf, i, v) - } - assert.Equal(t, []byte{0xa1, 0xc2}, buf) -} - -func TestCountSetBits(t *testing.T) { - tests := []struct { - name string - buf []byte - off int - n int - exp int - }{ - {"some 03 bits", bbits(0x11000000), 0, 3, 2}, - {"some 11 bits", bbits(0x11000011, 0x01000000), 0, 11, 5}, - {"some 72 bits", bbits(0x11001010, 0x11110000, 0x00001111, 0x11000011, 0x11001010, 0x11110000, 0x00001111, 0x11000011, 0x10001001), 0, 9 * 8, 35}, - {"all 08 bits", bbits(0x11111110), 0, 8, 7}, - {"all 03 bits", bbits(0x11100001), 0, 3, 3}, - {"all 11 bits", bbits(0x11111111, 0x11111111), 0, 11, 11}, - {"all 72 bits", bbits(0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111, 0x11111111), 0, 9 * 8, 72}, - {"none 03 bits", bbits(0x00000001), 0, 3, 0}, - {"none 11 bits", bbits(0x00000000, 0x00000000), 0, 11, 0}, - {"none 72 bits", bbits(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), 0, 9 * 8, 0}, - - {"some 03 bits - offset+1", bbits(0x11000000), 1, 3, 1}, - {"some 03 bits - offset+2", bbits(0x11000000), 2, 3, 0}, - {"some 11 bits - offset+1", bbits(0x11000011, 0x01000000, 0x00000000), 1, 11, 4}, - {"some 11 bits - offset+2", bbits(0x11000011, 0x01000000, 0x00000000), 2, 11, 3}, - {"some 11 bits - offset+3", bbits(0x11000011, 0x01000000, 0x00000000), 3, 11, 3}, - {"some 11 bits - offset+6", bbits(0x11000011, 0x01000000, 0x00000000), 6, 11, 3}, - {"some 11 bits - offset+7", bbits(0x11000011, 0x01000000, 0x00000000), 7, 11, 2}, - {"some 11 bits - offset+8", bbits(0x11000011, 0x01000000, 0x00000000), 8, 11, 1}, - } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - got := bitutil.CountSetBits(test.buf, test.off, test.n) - assert.Equal(t, test.exp, got) - }) - } -} - -func TestCountSetBitsOffset(t *testing.T) { - slowCountSetBits := func(buf []byte, offset, n int) int { - count := 0 - for i := offset; i < offset+n; i++ { - if bitutil.BitIsSet(buf, i) { - count++ - } - } - return count - } - - const ( - bufSize = 1000 - nbits = bufSize * 8 - ) - - offsets := []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16, 32, 37, 63, 64, 128, nbits - 30, nbits - 64} - - buf := make([]byte, bufSize) - - rng := rand.New(rand.NewSource(0)) - _, err := rng.Read(buf) - if err != nil { - t.Fatal(err) - } - - for i, offset := range offsets { - want := slowCountSetBits(buf, offset, nbits-offset) - got := bitutil.CountSetBits(buf, offset, nbits-offset) - if got != want { - t.Errorf("offset[%2d/%2d]=%5d. got=%5d, want=%5d", i+1, len(offsets), offset, got, want) - } - } -} - -func TestSetBitsTo(t *testing.T) { - for _, fillByte := range []byte{0x00, 0xFF} { - { - // set within a byte - bm := []byte{fillByte, fillByte, fillByte, fillByte} - bitutil.SetBitsTo(bm, 2, 2, true) - bitutil.SetBitsTo(bm, 4, 2, false) - assert.Equal(t, []byte{(fillByte &^ 0x3C) | 0xC}, bm[:1]) - } - { - // test straddling a single byte boundary - bm := []byte{fillByte, fillByte, fillByte, fillByte} - bitutil.SetBitsTo(bm, 4, 7, true) - bitutil.SetBitsTo(bm, 11, 7, false) - assert.Equal(t, []byte{(fillByte & 0xF) | 0xF0, 0x7, fillByte &^ 0x3}, bm[:3]) - } - { - // test byte aligned end - bm := []byte{fillByte, fillByte, fillByte, fillByte} - bitutil.SetBitsTo(bm, 4, 4, true) - bitutil.SetBitsTo(bm, 8, 8, false) - assert.Equal(t, []byte{(fillByte & 0xF) | 0xF0, 0x00, fillByte}, bm[:3]) - } - { - // test byte aligned end, multiple bytes - bm := []byte{fillByte, fillByte, fillByte, fillByte} - bitutil.SetBitsTo(bm, 0, 24, false) - falseByte := byte(0) - assert.Equal(t, []byte{falseByte, falseByte, falseByte, fillByte}, bm) - } - } -} - -func bbits(v ...int32) []byte { - return tools.IntsToBitsLSB(v...) -} - -func BenchmarkBitIsSet(b *testing.B) { - buf := make([]byte, 32) - b.ResetTimer() - for i := 0; i < b.N; i++ { - bitutil.BitIsSet(buf, (i%32)&0x1a) - } -} - -func BenchmarkSetBit(b *testing.B) { - buf := make([]byte, 32) - b.ResetTimer() - for i := 0; i < b.N; i++ { - bitutil.SetBit(buf, (i%32)&0x1a) - } -} - -func BenchmarkSetBitTo(b *testing.B) { - vals := []bool{true, false, false, false, false, true, false, true, false, true, false, false, false, false, true, true} - buf := make([]byte, 32) - b.ResetTimer() - for i := 0; i < b.N; i++ { - bitutil.SetBitTo(buf, i%32, vals[i%len(vals)]) - } -} - -var ( - intval int -) - -func benchmarkCountSetBitsN(b *testing.B, offset, n int) { - nn := n/8 + 1 - buf := make([]byte, nn) - //src := [4]byte{0x1f, 0xaa, 0xba, 0x11} - src := [4]byte{0x01, 0x01, 0x01, 0x01} - for i := 0; i < nn; i++ { - buf[i] = src[i&0x3] - } - b.ResetTimer() - var res int - for i := 0; i < b.N; i++ { - res = bitutil.CountSetBits(buf, offset, n-offset) - } - intval = res -} - -func BenchmarkCountSetBits_3(b *testing.B) { - benchmarkCountSetBitsN(b, 0, 3) -} - -func BenchmarkCountSetBits_32(b *testing.B) { - benchmarkCountSetBitsN(b, 0, 32) -} - -func BenchmarkCountSetBits_128(b *testing.B) { - benchmarkCountSetBitsN(b, 0, 128) -} - -func BenchmarkCountSetBits_1000(b *testing.B) { - benchmarkCountSetBitsN(b, 0, 1000) -} - -func BenchmarkCountSetBits_1024(b *testing.B) { - benchmarkCountSetBitsN(b, 0, 1024) -} - -func BenchmarkCountSetBitsOffset_3(b *testing.B) { - benchmarkCountSetBitsN(b, 1, 3) -} - -func BenchmarkCountSetBitsOffset_32(b *testing.B) { - benchmarkCountSetBitsN(b, 1, 32) -} - -func BenchmarkCountSetBitsOffset_128(b *testing.B) { - benchmarkCountSetBitsN(b, 1, 128) -} - -func BenchmarkCountSetBitsOffset_1000(b *testing.B) { - benchmarkCountSetBitsN(b, 1, 1000) -} - -func BenchmarkCountSetBitsOffset_1024(b *testing.B) { - benchmarkCountSetBitsN(b, 1, 1024) -} diff --git a/go/arrow/bitutil/endian_default.go b/go/arrow/bitutil/endian_default.go deleted file mode 100644 index ecbbaa70d04b6..0000000000000 --- a/go/arrow/bitutil/endian_default.go +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !s390x -// +build !s390x - -package bitutil - -import ( - "unsafe" -) - -var toFromLEFunc = func(in uint64) uint64 { return in } - -func getLSB(v uint64) byte { - return (*[8]byte)(unsafe.Pointer(&v))[0] -} - -func setLSB(v *uint64, b byte) { - (*[8]byte)(unsafe.Pointer(v))[0] = b -} diff --git a/go/arrow/bitutil/endian_s390x.go b/go/arrow/bitutil/endian_s390x.go deleted file mode 100644 index e99605f5848fa..0000000000000 --- a/go/arrow/bitutil/endian_s390x.go +++ /dev/null @@ -1,32 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutil - -import ( - "math/bits" - "unsafe" -) - -var toFromLEFunc = bits.ReverseBytes64 - -func getLSB(v uint64) byte { - return (*[8]byte)(unsafe.Pointer(&v))[7] -} - -func setLSB(v *uint64, b byte) { - (*[8]byte)(unsafe.Pointer(v))[7] = b -} diff --git a/go/arrow/cdata/arrow/c/abi.h b/go/arrow/cdata/arrow/c/abi.h deleted file mode 100644 index d58417e6fbcf2..0000000000000 --- a/go/arrow/cdata/arrow/c/abi.h +++ /dev/null @@ -1,111 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef ARROW_C_DATA_INTERFACE -#define ARROW_C_DATA_INTERFACE - -#define ARROW_FLAG_DICTIONARY_ORDERED 1 -#define ARROW_FLAG_NULLABLE 2 -#define ARROW_FLAG_MAP_KEYS_SORTED 4 - -struct ArrowSchema { - // Array type description - const char* format; - const char* name; - const char* metadata; - int64_t flags; - int64_t n_children; - struct ArrowSchema** children; - struct ArrowSchema* dictionary; - - // Release callback - void (*release)(struct ArrowSchema*); - // Opaque producer-specific data - void* private_data; -}; - -struct ArrowArray { - // Array data description - int64_t length; - int64_t null_count; - int64_t offset; - int64_t n_buffers; - int64_t n_children; - const void** buffers; - struct ArrowArray** children; - struct ArrowArray* dictionary; - - // Release callback - void (*release)(struct ArrowArray*); - // Opaque producer-specific data - void* private_data; -}; - -#endif // ARROW_C_DATA_INTERFACE - -#ifndef ARROW_C_STREAM_INTERFACE -#define ARROW_C_STREAM_INTERFACE - -struct ArrowArrayStream { - // Callback to get the stream type - // (will be the same for all arrays in the stream). - // - // Return value: 0 if successful, an `errno`-compatible error code otherwise. - // - // If successful, the ArrowSchema must be released independently from the stream. - int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); - - // Callback to get the next array - // (if no error and the array is released, the stream has ended) - // - // Return value: 0 if successful, an `errno`-compatible error code otherwise. - // - // If successful, the ArrowArray must be released independently from the stream. - int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); - - // Callback to get optional detailed error information. - // This must only be called if the last stream operation failed - // with a non-0 return code. - // - // Return value: pointer to a null-terminated character array describing - // the last error, or NULL if no description is available. - // - // The returned pointer is only valid until the next operation on this stream - // (including release). - const char* (*get_last_error)(struct ArrowArrayStream*); - - // Release callback: release the stream's own resources. - // Note that arrays returned by `get_next` must be individually released. - void (*release)(struct ArrowArrayStream*); - - // Opaque producer-specific data - void* private_data; -}; - -#endif // ARROW_C_STREAM_INTERFACE - -#ifdef __cplusplus -} -#endif diff --git a/go/arrow/cdata/arrow/c/helpers.h b/go/arrow/cdata/arrow/c/helpers.h deleted file mode 100644 index 6581403b57c46..0000000000000 --- a/go/arrow/cdata/arrow/c/helpers.h +++ /dev/null @@ -1,117 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "arrow/c/abi.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/// Query whether the C schema is released -static inline int ArrowSchemaIsReleased(const struct ArrowSchema* schema) { - return schema->release == NULL; -} - -/// Mark the C schema released (for use in release callbacks) -static inline void ArrowSchemaMarkReleased(struct ArrowSchema* schema) { - schema->release = NULL; -} - -/// Move the C schema from `src` to `dest` -/// -/// Note `dest` must *not* point to a valid schema already, otherwise there -/// will be a memory leak. -static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dest) { - assert(dest != src); - assert(!ArrowSchemaIsReleased(src)); - memcpy(dest, src, sizeof(struct ArrowSchema)); - ArrowSchemaMarkReleased(src); -} - -/// Release the C schema, if necessary, by calling its release callback -static inline void ArrowSchemaRelease(struct ArrowSchema* schema) { - if (!ArrowSchemaIsReleased(schema)) { - schema->release(schema); - assert(ArrowSchemaIsReleased(schema)); - } -} - -/// Query whether the C array is released -static inline int ArrowArrayIsReleased(const struct ArrowArray* array) { - return array->release == NULL; -} - -/// Mark the C array released (for use in release callbacks) -static inline void ArrowArrayMarkReleased(struct ArrowArray* array) { array->release = NULL; } - -/// Move the C array from `src` to `dest` -/// -/// Note `dest` must *not* point to a valid array already, otherwise there -/// will be a memory leak. -static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) { - assert(dest != src); - assert(!ArrowArrayIsReleased(src)); - memcpy(dest, src, sizeof(struct ArrowArray)); - ArrowArrayMarkReleased(src); -} - -/// Release the C array, if necessary, by calling its release callback -static inline void ArrowArrayRelease(struct ArrowArray* array) { - if (!ArrowArrayIsReleased(array)) { - array->release(array); - assert(ArrowArrayIsReleased(array)); - } -} - -/// Query whether the C array stream is released -static inline int ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) { - return stream->release == NULL; -} - -/// Mark the C array stream released (for use in release callbacks) -static inline void ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) { - stream->release = NULL; -} - -/// Move the C array stream from `src` to `dest` -/// -/// Note `dest` must *not* point to a valid stream already, otherwise there -/// will be a memory leak. -static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, - struct ArrowArrayStream* dest) { - assert(dest != src); - assert(!ArrowArrayStreamIsReleased(src)); - memcpy(dest, src, sizeof(struct ArrowArrayStream)); - ArrowArrayStreamMarkReleased(src); -} - -/// Release the C array stream, if necessary, by calling its release callback -static inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) { - if (!ArrowArrayStreamIsReleased(stream)) { - stream->release(stream); - assert(ArrowArrayStreamIsReleased(stream)); - } -} - -#ifdef __cplusplus -} -#endif diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go deleted file mode 100644 index 0562eaed0fb7a..0000000000000 --- a/go/arrow/cdata/cdata.go +++ /dev/null @@ -1,1028 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build cgo -// +build cgo - -package cdata - -// implement handling of the Arrow C Data Interface. At least from a consuming side. - -// #include "arrow/c/abi.h" -// #include "arrow/c/helpers.h" -// #include -// int stream_get_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { return st->get_schema(st, out); } -// int stream_get_next(struct ArrowArrayStream* st, struct ArrowArray* out) { return st->get_next(st, out); } -// const char* stream_get_last_error(struct ArrowArrayStream* st) { return st->get_last_error(st); } -// struct ArrowArray* get_arr() { -// struct ArrowArray* out = (struct ArrowArray*)(malloc(sizeof(struct ArrowArray))); -// memset(out, 0, sizeof(struct ArrowArray)); -// return out; -// } -// struct ArrowArrayStream* get_stream() { -// struct ArrowArrayStream* out = (struct ArrowArrayStream*)malloc(sizeof(struct ArrowArrayStream)); -// memset(out, 0, sizeof(struct ArrowArrayStream)); -// return out; -// } -// -import "C" - -import ( - "errors" - "fmt" - "io" - "runtime" - "strconv" - "strings" - "syscall" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "golang.org/x/xerrors" -) - -type ( - // CArrowSchema is the C Data Interface for ArrowSchemas defined in abi.h - CArrowSchema = C.struct_ArrowSchema - // CArrowArray is the C Data Interface object for Arrow Arrays as defined in abi.h - CArrowArray = C.struct_ArrowArray - // CArrowArrayStream is the C Stream Interface object for handling streams of record batches. - CArrowArrayStream = C.struct_ArrowArrayStream -) - -// Map from the defined strings to their corresponding arrow.DataType interface -// object instances, for types that don't require params. -var formatToSimpleType = map[string]arrow.DataType{ - "n": arrow.Null, - "b": arrow.FixedWidthTypes.Boolean, - "c": arrow.PrimitiveTypes.Int8, - "C": arrow.PrimitiveTypes.Uint8, - "s": arrow.PrimitiveTypes.Int16, - "S": arrow.PrimitiveTypes.Uint16, - "i": arrow.PrimitiveTypes.Int32, - "I": arrow.PrimitiveTypes.Uint32, - "l": arrow.PrimitiveTypes.Int64, - "L": arrow.PrimitiveTypes.Uint64, - "e": arrow.FixedWidthTypes.Float16, - "f": arrow.PrimitiveTypes.Float32, - "g": arrow.PrimitiveTypes.Float64, - "z": arrow.BinaryTypes.Binary, - "Z": arrow.BinaryTypes.LargeBinary, - "u": arrow.BinaryTypes.String, - "U": arrow.BinaryTypes.LargeString, - "vz": arrow.BinaryTypes.BinaryView, - "vu": arrow.BinaryTypes.StringView, - "tdD": arrow.FixedWidthTypes.Date32, - "tdm": arrow.FixedWidthTypes.Date64, - "tts": arrow.FixedWidthTypes.Time32s, - "ttm": arrow.FixedWidthTypes.Time32ms, - "ttu": arrow.FixedWidthTypes.Time64us, - "ttn": arrow.FixedWidthTypes.Time64ns, - "tDs": arrow.FixedWidthTypes.Duration_s, - "tDm": arrow.FixedWidthTypes.Duration_ms, - "tDu": arrow.FixedWidthTypes.Duration_us, - "tDn": arrow.FixedWidthTypes.Duration_ns, - "tiM": arrow.FixedWidthTypes.MonthInterval, - "tiD": arrow.FixedWidthTypes.DayTimeInterval, - "tin": arrow.FixedWidthTypes.MonthDayNanoInterval, -} - -// decode metadata from C which is encoded as -// -// [int32] -> number of metadata pairs -// for 0..n -// [int32] -> number of bytes in key -// [n bytes] -> key value -// [int32] -> number of bytes in value -// [n bytes] -> value -func decodeCMetadata(md *C.char) arrow.Metadata { - if md == nil { - return arrow.Metadata{} - } - - // don't copy the bytes, just reference them directly - const maxlen = 0x7fffffff - data := (*[maxlen]byte)(unsafe.Pointer(md))[:] - - readint32 := func() int32 { - v := *(*int32)(unsafe.Pointer(&data[0])) - data = data[arrow.Int32SizeBytes:] - return v - } - - readstr := func() string { - l := readint32() - s := string(data[:l]) - data = data[l:] - return s - } - - npairs := readint32() - if npairs == 0 { - return arrow.Metadata{} - } - - keys := make([]string, npairs) - vals := make([]string, npairs) - - for i := int32(0); i < npairs; i++ { - keys[i] = readstr() - vals[i] = readstr() - } - - return arrow.NewMetadata(keys, vals) -} - -// convert a C.ArrowSchema to an arrow.Field to maintain metadata with the schema -func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { - // always release, even on error - defer C.ArrowSchemaRelease(schema) - - var childFields []arrow.Field - if schema.n_children > 0 { - // call ourselves recursively if there are children. - // set up a slice to reference safely - schemaChildren := unsafe.Slice(schema.children, schema.n_children) - childFields = make([]arrow.Field, schema.n_children) - for i, c := range schemaChildren { - childFields[i], err = importSchema((*CArrowSchema)(c)) - if err != nil { - return - } - } - } - - // copy the schema name from the c-string - ret.Name = C.GoString(schema.name) - ret.Nullable = (schema.flags & C.ARROW_FLAG_NULLABLE) != 0 - ret.Metadata = decodeCMetadata(schema.metadata) - - // copies the c-string here, but it's very small - f := C.GoString(schema.format) - // handle our non-parameterized simple types. - dt, ok := formatToSimpleType[f] - if ok { - ret.Type = dt - - if schema.dictionary != nil { - valueField, err := importSchema(schema.dictionary) - if err != nil { - return ret, err - } - - ret.Type = &arrow.DictionaryType{ - IndexType: ret.Type, - ValueType: valueField.Type, - Ordered: schema.dictionary.flags&C.ARROW_FLAG_DICTIONARY_ORDERED != 0} - } - - return - } - - // handle types with params via colon - typs := strings.Split(f, ":") - defaulttz := "" - switch typs[0] { - case "tss": - tz := typs[1] - if len(typs[1]) == 0 { - tz = defaulttz - } - dt = &arrow.TimestampType{Unit: arrow.Second, TimeZone: tz} - case "tsm": - tz := typs[1] - if len(typs[1]) == 0 { - tz = defaulttz - } - dt = &arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: tz} - case "tsu": - tz := typs[1] - if len(typs[1]) == 0 { - tz = defaulttz - } - dt = &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: tz} - case "tsn": - tz := typs[1] - if len(typs[1]) == 0 { - tz = defaulttz - } - dt = &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: tz} - case "w": // fixed size binary is "w:##" where ## is the byteWidth - byteWidth, err := strconv.Atoi(typs[1]) - if err != nil { - return ret, err - } - dt = &arrow.FixedSizeBinaryType{ByteWidth: byteWidth} - case "d": // decimal types are d:,[,] size is assumed 128 if left out - props := typs[1] - propList := strings.Split(props, ",") - bitwidth := 128 - var precision, scale int - - if len(propList) < 2 || len(propList) > 3 { - return ret, xerrors.Errorf("invalid decimal spec '%s': wrong number of properties", f) - } else if len(propList) == 3 { - bitwidth, err = strconv.Atoi(propList[2]) - if err != nil { - return ret, xerrors.Errorf("could not parse decimal bitwidth in '%s': %s", f, err.Error()) - } - } - - precision, err = strconv.Atoi(propList[0]) - if err != nil { - return ret, xerrors.Errorf("could not parse decimal precision in '%s': %s", f, err.Error()) - } - - scale, err = strconv.Atoi(propList[1]) - if err != nil { - return ret, xerrors.Errorf("could not parse decimal scale in '%s': %s", f, err.Error()) - } - - if bitwidth == 128 { - dt = &arrow.Decimal128Type{Precision: int32(precision), Scale: int32(scale)} - } else if bitwidth == 256 { - dt = &arrow.Decimal256Type{Precision: int32(precision), Scale: int32(scale)} - } else { - return ret, xerrors.Errorf("only decimal128 and decimal256 are supported, got '%s'", f) - } - } - - if f[0] == '+' { // types with children - switch f[1] { - case 'l': // list - dt = arrow.ListOfField(childFields[0]) - case 'L': // large list - dt = arrow.LargeListOfField(childFields[0]) - case 'v': // list view/large list view - if f[2] == 'l' { - dt = arrow.ListViewOfField(childFields[0]) - } else if f[2] == 'L' { - dt = arrow.LargeListViewOfField(childFields[0]) - } - case 'w': // fixed size list is w:# where # is the list size. - listSize, err := strconv.Atoi(strings.Split(f, ":")[1]) - if err != nil { - return ret, err - } - - dt = arrow.FixedSizeListOfField(int32(listSize), childFields[0]) - case 's': // struct - dt = arrow.StructOf(childFields...) - case 'r': // run-end encoded - if len(childFields) != 2 { - return ret, fmt.Errorf("%w: run-end encoded arrays must have 2 children", arrow.ErrInvalid) - } - dt = arrow.RunEndEncodedOf(childFields[0].Type, childFields[1].Type) - case 'm': // map type is basically a list of structs. - st := childFields[0].Type.(*arrow.StructType) - dt = arrow.MapOf(st.Field(0).Type, st.Field(1).Type) - dt.(*arrow.MapType).KeysSorted = (schema.flags & C.ARROW_FLAG_MAP_KEYS_SORTED) != 0 - case 'u': // union - var mode arrow.UnionMode - switch f[2] { - case 'd': - mode = arrow.DenseMode - case 's': - mode = arrow.SparseMode - default: - err = fmt.Errorf("%w: invalid union type", arrow.ErrInvalid) - return - } - - codes := strings.Split(strings.Split(f, ":")[1], ",") - typeCodes := make([]arrow.UnionTypeCode, 0, len(codes)) - for _, i := range codes { - v, e := strconv.ParseInt(i, 10, 8) - if e != nil { - err = fmt.Errorf("%w: invalid type code: %s", arrow.ErrInvalid, e) - return - } - if v < 0 { - err = fmt.Errorf("%w: negative type code in union: format string %s", arrow.ErrInvalid, f) - return - } - typeCodes = append(typeCodes, arrow.UnionTypeCode(v)) - } - - if len(childFields) != len(typeCodes) { - err = fmt.Errorf("%w: ArrowArray struct number of children incompatible with format string", arrow.ErrInvalid) - return - } - - dt = arrow.UnionOf(mode, childFields, typeCodes) - } - } - - if dt == nil { - // if we didn't find a type, then it's something we haven't implemented. - err = xerrors.New("unimplemented type") - } else { - ret.Type = dt - } - - return -} - -// importer to keep track when importing C ArrowArray objects. -type cimporter struct { - dt arrow.DataType - arr *CArrowArray - data arrow.ArrayData - parent *cimporter - children []cimporter - cbuffers []*C.void - - alloc *importAllocator -} - -func (imp *cimporter) importChild(parent *cimporter, src *CArrowArray) error { - imp.parent, imp.arr, imp.alloc = parent, src, parent.alloc - return imp.doImport() -} - -// import any child arrays for lists, structs, and so on. -func (imp *cimporter) doImportChildren() error { - children := unsafe.Slice(imp.arr.children, imp.arr.n_children) - - if len(children) > 0 { - imp.children = make([]cimporter, len(children)) - } - - // handle the cases - switch imp.dt.ID() { - case arrow.LIST: // only one child to import - imp.children[0].dt = imp.dt.(*arrow.ListType).Elem() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - case arrow.LARGE_LIST: // only one child to import - imp.children[0].dt = imp.dt.(*arrow.LargeListType).Elem() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - case arrow.LIST_VIEW: // only one child to import - imp.children[0].dt = imp.dt.(*arrow.ListViewType).Elem() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - case arrow.LARGE_LIST_VIEW: // only one child to import - imp.children[0].dt = imp.dt.(*arrow.LargeListViewType).Elem() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - case arrow.FIXED_SIZE_LIST: // only one child to import - imp.children[0].dt = imp.dt.(*arrow.FixedSizeListType).Elem() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - case arrow.STRUCT: // import all the children - st := imp.dt.(*arrow.StructType) - for i, c := range children { - imp.children[i].dt = st.Field(i).Type - imp.children[i].importChild(imp, c) - } - case arrow.RUN_END_ENCODED: // import run-ends and values - st := imp.dt.(*arrow.RunEndEncodedType) - imp.children[0].dt = st.RunEnds() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - imp.children[1].dt = st.Encoded() - if err := imp.children[1].importChild(imp, children[1]); err != nil { - return err - } - case arrow.MAP: // only one child to import, it's a struct array - imp.children[0].dt = imp.dt.(*arrow.MapType).Elem() - if err := imp.children[0].importChild(imp, children[0]); err != nil { - return err - } - case arrow.DENSE_UNION: - dt := imp.dt.(*arrow.DenseUnionType) - for i, c := range children { - imp.children[i].dt = dt.Fields()[i].Type - imp.children[i].importChild(imp, c) - } - case arrow.SPARSE_UNION: - dt := imp.dt.(*arrow.SparseUnionType) - for i, c := range children { - imp.children[i].dt = dt.Fields()[i].Type - imp.children[i].importChild(imp, c) - } - } - - return nil -} - -func (imp *cimporter) initarr() { - imp.arr = C.get_arr() - if imp.alloc == nil { - imp.alloc = &importAllocator{arr: imp.arr} - } -} - -func (imp *cimporter) doImportArr(src *CArrowArray) error { - imp.arr = C.get_arr() - C.ArrowArrayMove(src, imp.arr) - if imp.alloc == nil { - imp.alloc = &importAllocator{arr: imp.arr} - } - - // we tie the releasing of the array to when the buffers are - // cleaned up, so if there are no buffers that we've imported - // such as for a null array or a nested array with no bitmap - // and only null columns, then we can release the CArrowArray - // struct immediately after import, since we have no imported - // memory that we have to track the lifetime of. - defer func() { - if imp.alloc.bufCount == 0 { - C.ArrowArrayRelease(imp.arr) - C.free(unsafe.Pointer(imp.arr)) - } - }() - - return imp.doImport() -} - -// import is called recursively as needed for importing an array and its children -// in order to generate array.Data objects -func (imp *cimporter) doImport() error { - // move the array from the src object passed in to the one referenced by - // this importer. That way we can set up a finalizer on the created - // arrow.ArrayData object so we clean up our Array's memory when garbage collected. - defer func(arr *CArrowArray) { - // this should only occur in the case of an error happening - // during import, at which point we need to clean up the - // ArrowArray struct we allocated. - if imp.data == nil { - C.free(unsafe.Pointer(arr)) - } - }(imp.arr) - - // import any children - if err := imp.doImportChildren(); err != nil { - return err - } - - for _, c := range imp.children { - if c.data != nil { - defer c.data.Release() - } - } - - if imp.arr.n_buffers > 0 { - // get a view of the buffers, zero-copy. we're just looking at the pointers - imp.cbuffers = unsafe.Slice((**C.void)(unsafe.Pointer(imp.arr.buffers)), imp.arr.n_buffers) - } - - // handle each of our type cases - switch dt := imp.dt.(type) { - case *arrow.NullType: - if err := imp.checkNoChildren(); err != nil { - return err - } - - imp.data = array.NewData(dt, int(imp.arr.length), nil, nil, int(imp.arr.null_count), int(imp.arr.offset)) - case arrow.FixedWidthDataType: - return imp.importFixedSizePrimitive() - case *arrow.StringType: - return imp.importStringLike(int64(arrow.Int32SizeBytes)) - case *arrow.BinaryType: - return imp.importStringLike(int64(arrow.Int32SizeBytes)) - case *arrow.LargeStringType: - return imp.importStringLike(int64(arrow.Int64SizeBytes)) - case *arrow.LargeBinaryType: - return imp.importStringLike(int64(arrow.Int64SizeBytes)) - case *arrow.StringViewType: - return imp.importBinaryViewLike() - case *arrow.BinaryViewType: - return imp.importBinaryViewLike() - case *arrow.ListType: - return imp.importListLike() - case *arrow.LargeListType: - return imp.importListLike() - case *arrow.ListViewType: - return imp.importListViewLike() - case *arrow.LargeListViewType: - return imp.importListViewLike() - case *arrow.MapType: - return imp.importListLike() - case *arrow.FixedSizeListType: - if err := imp.checkNumChildren(1); err != nil { - return err - } - - if err := imp.checkNumBuffers(1); err != nil { - return err - } - - nulls, err := imp.importNullBitmap(0) - if err != nil { - return err - } - if nulls != nil { - defer nulls.Release() - } - - imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nulls}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) - case *arrow.StructType: - if err := imp.checkNumBuffers(1); err != nil { - return err - } - - nulls, err := imp.importNullBitmap(0) - if err != nil { - return err - } - if nulls != nil { - defer nulls.Release() - } - - children := make([]arrow.ArrayData, len(imp.children)) - for i := range imp.children { - children[i] = imp.children[i].data - } - - imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nulls}, children, int(imp.arr.null_count), int(imp.arr.offset)) - case *arrow.RunEndEncodedType: - if err := imp.checkNumBuffers(0); err != nil { - return err - } - - if len(imp.children) != 2 { - return fmt.Errorf("%w: run-end encoded array should have 2 children", arrow.ErrInvalid) - } - - children := []arrow.ArrayData{imp.children[0].data, imp.children[1].data} - imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{}, children, int(imp.arr.null_count), int(imp.arr.offset)) - case *arrow.DenseUnionType: - if err := imp.checkNoNulls(); err != nil { - return err - } - - bufs := []*memory.Buffer{nil, nil, nil} - var err error - if imp.arr.n_buffers == 3 { - // legacy format exported by older arrow c++ versions - if bufs[1], err = imp.importFixedSizeBuffer(1, 1); err != nil { - return err - } - defer bufs[1].Release() - if bufs[2], err = imp.importFixedSizeBuffer(2, int64(arrow.Int32SizeBytes)); err != nil { - return err - } - defer bufs[2].Release() - } else { - if err := imp.checkNumBuffers(2); err != nil { - return err - } - - if bufs[1], err = imp.importFixedSizeBuffer(0, 1); err != nil { - return err - } - defer bufs[1].Release() - if bufs[2], err = imp.importFixedSizeBuffer(1, int64(arrow.Int32SizeBytes)); err != nil { - return err - } - defer bufs[2].Release() - } - - children := make([]arrow.ArrayData, len(imp.children)) - for i := range imp.children { - children[i] = imp.children[i].data - } - imp.data = array.NewData(dt, int(imp.arr.length), bufs, children, 0, int(imp.arr.offset)) - case *arrow.SparseUnionType: - if err := imp.checkNoNulls(); err != nil { - return err - } - - var buf *memory.Buffer - var err error - if imp.arr.n_buffers == 2 { - // legacy format exported by older Arrow C++ versions - if buf, err = imp.importFixedSizeBuffer(1, 1); err != nil { - return err - } - defer buf.Release() - } else { - if err := imp.checkNumBuffers(1); err != nil { - return err - } - - if buf, err = imp.importFixedSizeBuffer(0, 1); err != nil { - return err - } - defer buf.Release() - } - - children := make([]arrow.ArrayData, len(imp.children)) - for i := range imp.children { - children[i] = imp.children[i].data - } - imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nil, buf}, children, 0, int(imp.arr.offset)) - default: - return fmt.Errorf("unimplemented type %s", dt) - } - - return nil -} - -func (imp *cimporter) importStringLike(offsetByteWidth int64) (err error) { - if err = imp.checkNoChildren(); err != nil { - return - } - - if err = imp.checkNumBuffers(3); err != nil { - return - } - - var ( - nulls, offsets, values *memory.Buffer - ) - if nulls, err = imp.importNullBitmap(0); err != nil { - return - } - if nulls != nil { - defer nulls.Release() - } - - if offsets, err = imp.importOffsetsBuffer(1, offsetByteWidth); err != nil { - return - } - defer offsets.Release() - - var nvals int64 - switch offsetByteWidth { - case 4: - typedOffsets := arrow.Int32Traits.CastFromBytes(offsets.Bytes()) - nvals = int64(typedOffsets[imp.arr.offset+imp.arr.length]) - case 8: - typedOffsets := arrow.Int64Traits.CastFromBytes(offsets.Bytes()) - nvals = typedOffsets[imp.arr.offset+imp.arr.length] - } - if values, err = imp.importVariableValuesBuffer(2, 1, nvals); err != nil { - return - } - defer values.Release() - - imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets, values}, nil, int(imp.arr.null_count), int(imp.arr.offset)) - return -} - -func (imp *cimporter) importBinaryViewLike() (err error) { - if err = imp.checkNoChildren(); err != nil { - return - } - - buffers := make([]*memory.Buffer, len(imp.cbuffers)-1) - defer memory.ReleaseBuffers(buffers) - - if buffers[0], err = imp.importNullBitmap(0); err != nil { - return - } - - if buffers[1], err = imp.importFixedSizeBuffer(1, int64(arrow.ViewHeaderSizeBytes)); err != nil { - return - } - - dataBufferSizes := unsafe.Slice((*int64)(unsafe.Pointer(imp.cbuffers[len(buffers)])), len(buffers)-2) - for i, size := range dataBufferSizes { - if buffers[i+2], err = imp.importVariableValuesBuffer(i+2, 1, size); err != nil { - return - } - } - - imp.data = array.NewData(imp.dt, int(imp.arr.length), buffers, nil, int(imp.arr.null_count), int(imp.arr.offset)) - return -} - -func (imp *cimporter) importListLike() (err error) { - if err = imp.checkNumChildren(1); err != nil { - return err - } - - if err = imp.checkNumBuffers(2); err != nil { - return err - } - - var nulls, offsets *memory.Buffer - if nulls, err = imp.importNullBitmap(0); err != nil { - return - } - if nulls != nil { - defer nulls.Release() - } - - offsetSize := imp.dt.Layout().Buffers[1].ByteWidth - if offsets, err = imp.importOffsetsBuffer(1, int64(offsetSize)); err != nil { - return - } - if offsets != nil { - defer offsets.Release() - } - - imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) - return -} - -func (imp *cimporter) importListViewLike() (err error) { - offsetSize := int64(imp.dt.Layout().Buffers[1].ByteWidth) - - if err = imp.checkNumChildren(1); err != nil { - return err - } - - if err = imp.checkNumBuffers(3); err != nil { - return err - } - - var nulls, offsets, sizes *memory.Buffer - if nulls, err = imp.importNullBitmap(0); err != nil { - return - } - if nulls != nil { - defer nulls.Release() - } - - if offsets, err = imp.importFixedSizeBuffer(1, offsetSize); err != nil { - return - } - if offsets != nil { - defer offsets.Release() - } - - if sizes, err = imp.importFixedSizeBuffer(2, offsetSize); err != nil { - return - } - if sizes != nil { - defer sizes.Release() - } - - imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets, sizes}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) - return -} - -func (imp *cimporter) importFixedSizePrimitive() error { - if err := imp.checkNoChildren(); err != nil { - return err - } - - if err := imp.checkNumBuffers(2); err != nil { - return err - } - - nulls, err := imp.importNullBitmap(0) - if err != nil { - return err - } - - var values *memory.Buffer - - fw := imp.dt.(arrow.FixedWidthDataType) - if bitutil.IsMultipleOf8(int64(fw.BitWidth())) { - values, err = imp.importFixedSizeBuffer(1, bitutil.BytesForBits(int64(fw.BitWidth()))) - } else { - if fw.BitWidth() != 1 { - return xerrors.New("invalid bitwidth") - } - values, err = imp.importBitsBuffer(1) - } - - if err != nil { - return err - } - - var dict *array.Data - if dt, ok := imp.dt.(*arrow.DictionaryType); ok { - dictImp := &cimporter{dt: dt.ValueType} - if err := dictImp.importChild(imp, imp.arr.dictionary); err != nil { - return err - } - defer dictImp.data.Release() - - dict = dictImp.data.(*array.Data) - } - - if nulls != nil { - defer nulls.Release() - } - if values != nil { - defer values.Release() - } - - imp.data = array.NewDataWithDictionary(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, values}, int(imp.arr.null_count), int(imp.arr.offset), dict) - return nil -} - -func (imp *cimporter) checkNoChildren() error { return imp.checkNumChildren(0) } - -func (imp *cimporter) checkNoNulls() error { - if imp.arr.null_count != 0 { - return fmt.Errorf("%w: unexpected non-zero null count for imported type %s", arrow.ErrInvalid, imp.dt) - } - return nil -} - -func (imp *cimporter) checkNumChildren(n int64) error { - if int64(imp.arr.n_children) != n { - return fmt.Errorf("expected %d children, for imported type %s, ArrowArray has %d", n, imp.dt, imp.arr.n_children) - } - return nil -} - -func (imp *cimporter) checkNumBuffers(n int64) error { - if int64(imp.arr.n_buffers) != n { - return fmt.Errorf("expected %d buffers for imported type %s, ArrowArray has %d", n, imp.dt, imp.arr.n_buffers) - } - return nil -} - -func (imp *cimporter) importBuffer(bufferID int, sz int64) (*memory.Buffer, error) { - // this is not a copy, we're just having a slice which points at the data - // it's still owned by the C.ArrowArray object and its backing C++ object. - if imp.cbuffers[bufferID] == nil { - if sz != 0 { - return nil, errors.New("invalid buffer") - } - return memory.NewBufferBytes([]byte{}), nil - } - data := unsafe.Slice((*byte)(unsafe.Pointer(imp.cbuffers[bufferID])), sz) - imp.alloc.addBuffer() - return memory.NewBufferWithAllocator(data, imp.alloc), nil -} - -func (imp *cimporter) importBitsBuffer(bufferID int) (*memory.Buffer, error) { - bufsize := bitutil.BytesForBits(int64(imp.arr.length) + int64(imp.arr.offset)) - return imp.importBuffer(bufferID, bufsize) -} - -func (imp *cimporter) importNullBitmap(bufferID int) (*memory.Buffer, error) { - if imp.arr.null_count > 0 && imp.cbuffers[bufferID] == nil { - return nil, fmt.Errorf("arrowarray struct has null bitmap buffer, but non-zero null_count %d", imp.arr.null_count) - } - - if imp.arr.null_count == 0 && imp.cbuffers[bufferID] == nil { - return nil, nil - } - - return imp.importBitsBuffer(bufferID) -} - -func (imp *cimporter) importFixedSizeBuffer(bufferID int, byteWidth int64) (*memory.Buffer, error) { - bufsize := byteWidth * int64(imp.arr.length+imp.arr.offset) - return imp.importBuffer(bufferID, bufsize) -} - -func (imp *cimporter) importOffsetsBuffer(bufferID int, offsetsize int64) (*memory.Buffer, error) { - bufsize := offsetsize * int64((imp.arr.length + imp.arr.offset + 1)) - return imp.importBuffer(bufferID, bufsize) -} - -func (imp *cimporter) importVariableValuesBuffer(bufferID int, byteWidth, nvals int64) (*memory.Buffer, error) { - bufsize := byteWidth * nvals - return imp.importBuffer(bufferID, int64(bufsize)) -} - -func importCArrayAsType(arr *CArrowArray, dt arrow.DataType) (imp *cimporter, err error) { - imp = &cimporter{dt: dt} - err = imp.doImportArr(arr) - return -} - -func initReader(rdr *nativeCRecordBatchReader, stream *CArrowArrayStream) error { - rdr.stream = C.get_stream() - C.ArrowArrayStreamMove(stream, rdr.stream) - rdr.arr = C.get_arr() - runtime.SetFinalizer(rdr, func(r *nativeCRecordBatchReader) { - if r.cur != nil { - r.cur.Release() - } - C.ArrowArrayStreamRelease(r.stream) - C.ArrowArrayRelease(r.arr) - C.free(unsafe.Pointer(r.stream)) - C.free(unsafe.Pointer(r.arr)) - }) - - var sc CArrowSchema - errno := C.stream_get_schema(rdr.stream, &sc) - if errno != 0 { - return rdr.getError(int(errno)) - } - defer C.ArrowSchemaRelease(&sc) - s, err := ImportCArrowSchema((*CArrowSchema)(&sc)) - if err != nil { - return err - } - rdr.schema = s - - return nil -} - -// Record Batch reader that conforms to arrio.Reader for the ArrowArrayStream interface -type nativeCRecordBatchReader struct { - stream *CArrowArrayStream - arr *CArrowArray - schema *arrow.Schema - - cur arrow.Record - err error -} - -// No need to implement retain and release here as we used runtime.SetFinalizer when constructing -// the reader to free up the ArrowArrayStream memory when the garbage collector cleans it up. -func (n *nativeCRecordBatchReader) Retain() {} -func (n *nativeCRecordBatchReader) Release() {} - -func (n *nativeCRecordBatchReader) Err() error { return n.err } -func (n *nativeCRecordBatchReader) Record() arrow.Record { return n.cur } - -func (n *nativeCRecordBatchReader) Next() bool { - err := n.next() - switch { - case err == nil: - return true - case err == io.EOF: - return false - } - n.err = err - return false -} - -func (n *nativeCRecordBatchReader) next() error { - if n.schema == nil { - var sc CArrowSchema - errno := C.stream_get_schema(n.stream, &sc) - if errno != 0 { - return n.getError(int(errno)) - } - defer C.ArrowSchemaRelease(&sc) - s, err := ImportCArrowSchema((*CArrowSchema)(&sc)) - if err != nil { - return err - } - - n.schema = s - } - - if n.cur != nil { - n.cur.Release() - n.cur = nil - } - - errno := C.stream_get_next(n.stream, n.arr) - if errno != 0 { - return n.getError(int(errno)) - } - - if C.ArrowArrayIsReleased(n.arr) == 1 { - return io.EOF - } - - rec, err := ImportCRecordBatchWithSchema(n.arr, n.schema) - if err != nil { - return err - } - - n.cur = rec - return nil -} - -func (n *nativeCRecordBatchReader) Schema() *arrow.Schema { - return n.schema -} - -func (n *nativeCRecordBatchReader) getError(errno int) error { - return fmt.Errorf("%w: %s", syscall.Errno(errno), C.GoString(C.stream_get_last_error(n.stream))) -} - -func (n *nativeCRecordBatchReader) Read() (arrow.Record, error) { - if err := n.next(); err != nil { - n.err = err - return nil, err - } - return n.cur, nil -} - -func releaseArr(arr *CArrowArray) { - C.ArrowArrayRelease(arr) -} - -func releaseSchema(schema *CArrowSchema) { - C.ArrowSchemaRelease(schema) -} diff --git a/go/arrow/cdata/cdata_allocate.go b/go/arrow/cdata/cdata_allocate.go deleted file mode 100644 index da0bd957de1df..0000000000000 --- a/go/arrow/cdata/cdata_allocate.go +++ /dev/null @@ -1,57 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.20 || tinygo - -package cdata - -// #include -// #include "arrow/c/abi.h" -import "C" - -import ( - "unsafe" -) - -func allocateArrowSchemaArr(n int) (out []CArrowSchema) { - return unsafe.Slice((*CArrowSchema)(C.calloc(C.size_t(n), - C.sizeof_struct_ArrowSchema)), n) -} - -func allocateArrowSchemaPtrArr(n int) (out []*CArrowSchema) { - return unsafe.Slice((**CArrowSchema)(C.calloc(C.size_t(n), - C.size_t(unsafe.Sizeof((*CArrowSchema)(nil))))), n) -} - -func allocateArrowArrayArr(n int) (out []CArrowArray) { - return unsafe.Slice((*CArrowArray)(C.calloc(C.size_t(n), - C.sizeof_struct_ArrowArray)), n) -} - -func allocateArrowArrayPtrArr(n int) (out []*CArrowArray) { - return unsafe.Slice((**CArrowArray)(C.calloc(C.size_t(n), - C.size_t(unsafe.Sizeof((*CArrowArray)(nil))))), n) -} - -func allocateBufferPtrArr(n int) (out []*C.void) { - return unsafe.Slice((**C.void)(C.calloc(C.size_t(n), - C.size_t(unsafe.Sizeof((*C.void)(nil))))), n) -} - -func allocateBufferSizeArr(n int) (out []C.int64_t) { - return unsafe.Slice((*C.int64_t)(C.calloc(C.size_t(n), - C.sizeof_int64_t)), n) -} diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go deleted file mode 100644 index 59775926d7ef8..0000000000000 --- a/go/arrow/cdata/cdata_exports.go +++ /dev/null @@ -1,480 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cdata - -// #include -// #include -// #include -// #include "arrow/c/abi.h" -// #include "arrow/c/helpers.h" -// -// extern void releaseExportedSchema(struct ArrowSchema* schema); -// extern void releaseExportedArray(struct ArrowArray* array); -// -// const uint8_t kGoCdataZeroRegion[8] = {0}; -// -// void goReleaseArray(struct ArrowArray* array) { -// releaseExportedArray(array); -// } -// void goReleaseSchema(struct ArrowSchema* schema) { -// releaseExportedSchema(schema); -// } -import "C" - -import ( - "bytes" - "encoding/binary" - "fmt" - "runtime/cgo" - "strconv" - "strings" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/endian" - "github.com/apache/arrow/go/v18/arrow/internal" - "github.com/apache/arrow/go/v18/arrow/ipc" -) - -func encodeCMetadata(keys, values []string) []byte { - if len(keys) != len(values) { - panic("unequal metadata key/values length") - } - npairs := int32(len(keys)) - - var b bytes.Buffer - totalSize := 4 - for i := range keys { - totalSize += 8 + len(keys[i]) + len(values[i]) - } - b.Grow(totalSize) - - b.Write((*[4]byte)(unsafe.Pointer(&npairs))[:]) - for i := range keys { - binary.Write(&b, endian.Native, int32(len(keys[i]))) - b.WriteString(keys[i]) - binary.Write(&b, endian.Native, int32(len(values[i]))) - b.WriteString(values[i]) - } - return b.Bytes() -} - -type schemaExporter struct { - format, name string - - extraMeta arrow.Metadata - metadata []byte - flags int64 - children []schemaExporter - dict *schemaExporter -} - -func (exp *schemaExporter) handleExtension(dt arrow.DataType) arrow.DataType { - if dt.ID() != arrow.EXTENSION { - return dt - } - - ext := dt.(arrow.ExtensionType) - exp.extraMeta = arrow.NewMetadata([]string{ipc.ExtensionTypeKeyName, ipc.ExtensionMetadataKeyName}, []string{ext.ExtensionName(), ext.Serialize()}) - return ext.StorageType() -} - -func (exp *schemaExporter) exportMeta(m *arrow.Metadata) { - var ( - finalKeys []string - finalValues []string - ) - - if m == nil { - if exp.extraMeta.Len() > 0 { - finalKeys = exp.extraMeta.Keys() - finalValues = exp.extraMeta.Values() - } - exp.metadata = encodeCMetadata(finalKeys, finalValues) - return - } - - finalKeys = m.Keys() - finalValues = m.Values() - - if exp.extraMeta.Len() > 0 { - for i, k := range exp.extraMeta.Keys() { - if m.FindKey(k) != -1 { - continue - } - finalKeys = append(finalKeys, k) - finalValues = append(finalValues, exp.extraMeta.Values()[i]) - } - } - exp.metadata = encodeCMetadata(finalKeys, finalValues) -} - -func (exp *schemaExporter) exportFormat(dt arrow.DataType) string { - switch dt := dt.(type) { - case *arrow.NullType: - return "n" - case *arrow.BooleanType: - return "b" - case *arrow.Int8Type: - return "c" - case *arrow.Uint8Type: - return "C" - case *arrow.Int16Type: - return "s" - case *arrow.Uint16Type: - return "S" - case *arrow.Int32Type: - return "i" - case *arrow.Uint32Type: - return "I" - case *arrow.Int64Type: - return "l" - case *arrow.Uint64Type: - return "L" - case *arrow.Float16Type: - return "e" - case *arrow.Float32Type: - return "f" - case *arrow.Float64Type: - return "g" - case *arrow.FixedSizeBinaryType: - return fmt.Sprintf("w:%d", dt.ByteWidth) - case *arrow.Decimal128Type: - return fmt.Sprintf("d:%d,%d", dt.Precision, dt.Scale) - case *arrow.Decimal256Type: - return fmt.Sprintf("d:%d,%d,256", dt.Precision, dt.Scale) - case *arrow.BinaryType: - return "z" - case *arrow.LargeBinaryType: - return "Z" - case *arrow.StringType: - return "u" - case *arrow.LargeStringType: - return "U" - case *arrow.BinaryViewType: - return "vz" - case *arrow.StringViewType: - return "vu" - case *arrow.Date32Type: - return "tdD" - case *arrow.Date64Type: - return "tdm" - case *arrow.Time32Type: - switch dt.Unit { - case arrow.Second: - return "tts" - case arrow.Millisecond: - return "ttm" - default: - panic(fmt.Sprintf("invalid time unit for time32: %s", dt.Unit)) - } - case *arrow.Time64Type: - switch dt.Unit { - case arrow.Microsecond: - return "ttu" - case arrow.Nanosecond: - return "ttn" - default: - panic(fmt.Sprintf("invalid time unit for time64: %s", dt.Unit)) - } - case *arrow.TimestampType: - var b strings.Builder - switch dt.Unit { - case arrow.Second: - b.WriteString("tss:") - case arrow.Millisecond: - b.WriteString("tsm:") - case arrow.Microsecond: - b.WriteString("tsu:") - case arrow.Nanosecond: - b.WriteString("tsn:") - default: - panic(fmt.Sprintf("invalid time unit for timestamp: %s", dt.Unit)) - } - b.WriteString(dt.TimeZone) - return b.String() - case *arrow.DurationType: - switch dt.Unit { - case arrow.Second: - return "tDs" - case arrow.Millisecond: - return "tDm" - case arrow.Microsecond: - return "tDu" - case arrow.Nanosecond: - return "tDn" - default: - panic(fmt.Sprintf("invalid time unit for duration: %s", dt.Unit)) - } - case *arrow.MonthIntervalType: - return "tiM" - case *arrow.DayTimeIntervalType: - return "tiD" - case *arrow.MonthDayNanoIntervalType: - return "tin" - case *arrow.ListType: - return "+l" - case *arrow.LargeListType: - return "+L" - case *arrow.ListViewType: - return "+vl" - case *arrow.LargeListViewType: - return "+vL" - case *arrow.FixedSizeListType: - return fmt.Sprintf("+w:%d", dt.Len()) - case *arrow.StructType: - return "+s" - case *arrow.RunEndEncodedType: - return "+r" - case *arrow.MapType: - if dt.KeysSorted { - exp.flags |= C.ARROW_FLAG_MAP_KEYS_SORTED - } - return "+m" - case *arrow.DictionaryType: - if dt.Ordered { - exp.flags |= C.ARROW_FLAG_DICTIONARY_ORDERED - } - return exp.exportFormat(dt.IndexType) - case arrow.UnionType: - var b strings.Builder - if dt.Mode() == arrow.SparseMode { - b.WriteString("+us:") - } else { - b.WriteString("+ud:") - } - for i, c := range dt.TypeCodes() { - if i != 0 { - b.WriteByte(',') - } - b.WriteString(strconv.Itoa(int(c))) - } - return b.String() - } - panic("unsupported data type for export") -} - -func (exp *schemaExporter) export(field arrow.Field) { - exp.name = field.Name - exp.format = exp.exportFormat(exp.handleExtension(field.Type)) - if field.Nullable { - exp.flags |= C.ARROW_FLAG_NULLABLE - } - - switch dt := field.Type.(type) { - case *arrow.DictionaryType: - exp.dict = new(schemaExporter) - exp.dict.export(arrow.Field{Type: dt.ValueType}) - case arrow.NestedType: - exp.children = make([]schemaExporter, dt.NumFields()) - for i, f := range dt.Fields() { - exp.children[i].export(f) - } - } - - exp.exportMeta(&field.Metadata) -} - -func (exp *schemaExporter) finish(out *CArrowSchema) { - out.dictionary = nil - if exp.dict != nil { - out.dictionary = (*CArrowSchema)(C.calloc(C.sizeof_struct_ArrowSchema, C.size_t(1))) - exp.dict.finish(out.dictionary) - } - out.name = C.CString(exp.name) - out.format = C.CString(exp.format) - out.metadata = (*C.char)(C.CBytes(exp.metadata)) - out.flags = C.int64_t(exp.flags) - out.n_children = C.int64_t(len(exp.children)) - - if len(exp.children) > 0 { - children := allocateArrowSchemaArr(len(exp.children)) - childPtrs := allocateArrowSchemaPtrArr(len(exp.children)) - - for i, c := range exp.children { - c.finish(&children[i]) - childPtrs[i] = &children[i] - } - - out.children = (**CArrowSchema)(unsafe.Pointer(&childPtrs[0])) - } else { - out.children = nil - } - - out.release = (*[0]byte)(C.goReleaseSchema) -} - -func exportField(field arrow.Field, out *CArrowSchema) { - var exp schemaExporter - exp.export(field) - exp.finish(out) -} - -func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { - if outSchema != nil { - exportField(arrow.Field{Type: arr.DataType()}, outSchema) - } - - buffers := arr.Data().Buffers() - // Some types don't have validity bitmaps, but we keep them shifted - // to make processing easier in other contexts. This means that - // we have to adjust when exporting. - has_validity_bitmap := internal.DefaultHasValidityBitmap(arr.DataType().ID()) - if len(buffers) > 0 && !has_validity_bitmap { - buffers = buffers[1:] - } - nbuffers := len(buffers) - - has_buffer_sizes_buffer := internal.HasBufferSizesBuffer(arr.DataType().ID()) - if has_buffer_sizes_buffer { - nbuffers++ - } - - out.dictionary = nil - out.null_count = C.int64_t(arr.NullN()) - out.length = C.int64_t(arr.Len()) - out.offset = C.int64_t(arr.Data().Offset()) - out.n_buffers = C.int64_t(nbuffers) - out.buffers = nil - - if nbuffers > 0 { - cBufs := allocateBufferPtrArr(nbuffers) - for i, buf := range buffers { - if buf == nil || buf.Len() == 0 { - if i > 0 || !has_validity_bitmap { - // apache/arrow#33936: export a dummy buffer to be friendly to - // implementations that don't import NULL properly - cBufs[i] = (*C.void)(unsafe.Pointer(&C.kGoCdataZeroRegion)) - } else { - // null pointer permitted for the validity bitmap - // (assuming null count is 0) - cBufs[i] = nil - } - continue - } - - cBufs[i] = (*C.void)(unsafe.Pointer(&buf.Bytes()[0])) - } - - if has_buffer_sizes_buffer { - sizes := allocateBufferSizeArr(len(buffers[2:])) - for i, buf := range buffers[2:] { - sizes[i] = C.int64_t(buf.Len()) - } - if len(sizes) > 0 { - cBufs[nbuffers-1] = (*C.void)(unsafe.Pointer(&sizes[0])) - } - } - out.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cBufs[0])) - } - - arr.Data().Retain() - h := cgo.NewHandle(arr.Data()) - out.private_data = createHandle(h) - out.release = (*[0]byte)(C.goReleaseArray) - switch arr := arr.(type) { - case array.ListLike: - out.n_children = 1 - childPtrs := allocateArrowArrayPtrArr(1) - children := allocateArrowArrayArr(1) - exportArray(arr.ListValues(), &children[0], nil) - childPtrs[0] = &children[0] - out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) - case *array.Struct: - out.n_children = C.int64_t(arr.NumField()) - childPtrs := allocateArrowArrayPtrArr(arr.NumField()) - children := allocateArrowArrayArr(arr.NumField()) - for i := 0; i < arr.NumField(); i++ { - exportArray(arr.Field(i), &children[i], nil) - childPtrs[i] = &children[i] - } - out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) - case *array.RunEndEncoded: - out.n_children = 2 - childPtrs := allocateArrowArrayPtrArr(2) - children := allocateArrowArrayArr(2) - exportArray(arr.RunEndsArr(), &children[0], nil) - exportArray(arr.Values(), &children[1], nil) - childPtrs[0], childPtrs[1] = &children[0], &children[1] - out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) - case *array.Dictionary: - out.dictionary = (*CArrowArray)(C.calloc(C.sizeof_struct_ArrowArray, C.size_t(1))) - exportArray(arr.Dictionary(), out.dictionary, nil) - case array.Union: - out.n_children = C.int64_t(arr.NumFields()) - childPtrs := allocateArrowArrayPtrArr(arr.NumFields()) - children := allocateArrowArrayArr(arr.NumFields()) - for i := 0; i < arr.NumFields(); i++ { - exportArray(arr.Field(i), &children[i], nil) - childPtrs[i] = &children[i] - } - out.children = (**CArrowArray)(unsafe.Pointer(&childPtrs[0])) - default: - out.n_children = 0 - out.children = nil - } -} - -type cRecordReader struct { - rdr array.RecordReader - err *C.char -} - -func (rr cRecordReader) getSchema(out *CArrowSchema) int { - schema := rr.rdr.Schema() - if schema == nil { - return rr.maybeError() - } - ExportArrowSchema(schema, out) - return 0 -} - -func (rr cRecordReader) next(out *CArrowArray) int { - if rr.rdr.Next() { - ExportArrowRecordBatch(rr.rdr.Record(), out, nil) - return 0 - } - C.ArrowArrayMarkReleased(out) - return rr.maybeError() -} - -func (rr cRecordReader) maybeError() int { - err := rr.rdr.Err() - if err != nil { - return C.EIO - } - return 0 -} - -func (rr cRecordReader) getLastError() *C.char { - err := rr.rdr.Err() - if err != nil { - if rr.err != nil { - C.free(unsafe.Pointer(rr.err)) - } - rr.err = C.CString(err.Error()) - } - return rr.err -} - -func (rr cRecordReader) release() { - if rr.err != nil { - C.free(unsafe.Pointer(rr.err)) - } - rr.rdr.Release() -} diff --git a/go/arrow/cdata/cdata_fulltest.c b/go/arrow/cdata/cdata_fulltest.c deleted file mode 100644 index 4291cfff865b5..0000000000000 --- a/go/arrow/cdata/cdata_fulltest.c +++ /dev/null @@ -1,494 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build cgo -// +build test - -#include -#include -#include -#include -#include -#include -#include "arrow/c/abi.h" -#include "arrow/c/helpers.h" -#include "utils.h" - -int is_little_endian() -{ - unsigned int x = 1; - char *c = (char*) &x; - return (int)*c; -} - -static const int64_t kDefaultFlags = ARROW_FLAG_NULLABLE; - -extern void releaseTestArr(struct ArrowArray* array); -void goReleaseTestArray(struct ArrowArray* array) { - releaseTestArr(array); -} - -static void release_int32_type(struct ArrowSchema* schema) { - // mark released - schema->release = NULL; -} - -void export_int32_type(struct ArrowSchema* schema) { - const char* encoded_metadata; - if (is_little_endian() == 1) { - encoded_metadata = kEncodedMeta1LE; - } else { - encoded_metadata = kEncodedMeta1BE; - } - *schema = (struct ArrowSchema) { - // Type description - .format = "i", - .name = "", - .metadata = encoded_metadata, - .flags = 0, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // bookkeeping - .release = &release_int32_type, - }; -} - -static bool test1_released = false; - -int test1_is_released() { return test1_released; } - -static void release_int32_array(struct ArrowArray* array) { - assert(array->n_buffers == 2); - // free the buffers and buffers array - free((void *) array->buffers[1]); - free(array->buffers); - // mark released - array->release = NULL; - test1_released = true; -} - -void export_int32_array(const int32_t* data, int64_t nitems, struct ArrowArray* array) { - // initialize primitive fields - *array = (struct ArrowArray) { - .length = nitems, - .offset = 0, - .null_count = 0, - .n_buffers = 2, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // bookkeeping - .release = &release_int32_array - }; - - // allocate list of buffers - array->buffers = (const void**)malloc(sizeof(void*) * array->n_buffers); - assert(array->buffers != NULL); - array->buffers[0] = NULL; // no nulls, null bitmap can be omitted - array->buffers[1] = data; -} - - -static void release_primitive(struct ArrowSchema* schema) { - free((void *)schema->format); - schema->release = NULL; -} - -static void release_nested_internal(struct ArrowSchema* schema, - int is_dynamic) { - assert(!ArrowSchemaIsReleased(schema)); - for (int i = 0; i < schema->n_children; ++i) { - ArrowSchemaRelease(schema->children[i]); - free(schema->children[i]); - } - if (is_dynamic) { - free((void*)schema->format); - free((void*)schema->name); - } - ArrowSchemaMarkReleased(schema); -} - -static void release_nested_static(struct ArrowSchema* schema) { - release_nested_internal(schema, /*is_dynamic=*/0); -} - -static void release_nested_dynamic(struct ArrowSchema* schema) { - release_nested_internal(schema, /*is_dynamic=*/1); -} - -static void release_nested_dynamic_toplevel(struct ArrowSchema* schema) { - assert(!ArrowSchemaIsReleased(schema)); - for (int i = 0; i < schema->n_children; ++i) { - ArrowSchemaRelease(schema->children[i]); - free(schema->children[i]); - } - free((void*)schema->format); - if (strlen(schema->name) > 0) { - free((void*)schema->name); - } - ArrowSchemaMarkReleased(schema); -} - -void test_primitive(struct ArrowSchema* schema, const char* fmt) { - *schema = (struct ArrowSchema) { - // Type description - .format = fmt, - .name = "", - .metadata = NULL, - .flags = 0, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // bookkeeping - .release = &release_primitive, - }; -} - -// Since test_lists et al. allocate an entirely array of ArrowSchema pointers, -// need to expose a function to free it. -void free_malloced_schemas(struct ArrowSchema** schemas) { - free(schemas); -} - -struct ArrowSchema** test_lists(const char** fmts, const char** names, const int* nullflags, const int n) { - struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); - for (int i = 0; i < n; ++i) { - schemas[i] = malloc(sizeof(struct ArrowSchema)); - *schemas[i] = (struct ArrowSchema) { - .format = fmts[i], - .name = names[i], - .metadata = NULL, - .flags = 0, - .children = NULL, - .n_children = 0, - .dictionary = NULL, - .release = &release_nested_dynamic, - }; - if (i != 0) { - schemas[i-1]->n_children = 1; - schemas[i-1]->children = &schemas[i]; - schemas[i]->flags = nullflags[i-1]; - } - } - return schemas; -} - -struct ArrowSchema** fill_structs(const char** fmts, const char** names, int64_t* flags, const int n) { - struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); - for (int i = 0; i < n; ++i) { - schemas[i] = malloc(sizeof(struct ArrowSchema)); - *schemas[i] = (struct ArrowSchema) { - .format = fmts[i], - .name = names[i], - .metadata = NULL, - .flags = flags[i], - .children = NULL, - .n_children = 0, - .dictionary = NULL, - .release = &release_nested_dynamic, - }; - } - - schemas[0]->children = &schemas[1]; - schemas[0]->n_children = n-1; - return schemas; -} - -struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* flags, const int n) { - struct ArrowSchema** schemas = fill_structs(fmts, names, flags, n); - - if (is_little_endian() == 1) { - schemas[n-1]->metadata = kEncodedMeta2LE; - } else { - schemas[n-1]->metadata = kEncodedMeta2BE; - } - - return schemas; -} - -struct ArrowSchema** test_schema(const char** fmts, const char** names, int64_t* flags, const int n) { - struct ArrowSchema** schemas = fill_structs(fmts, names, flags, n); - - if (is_little_endian() == 1) { - schemas[0]->metadata = kEncodedMeta2LE; - schemas[n-1]->metadata = kEncodedMeta1LE; - } else { - schemas[0]->metadata = kEncodedMeta2BE; - schemas[n-1]->metadata = kEncodedMeta1BE; - } - return schemas; -} - -struct ArrowSchema** test_map(const char** fmts, const char** names, int64_t* flags, const int n) { - struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); - for (int i = 0; i < n; ++i) { - schemas[i] = malloc(sizeof(struct ArrowSchema)); - *schemas[i] = (struct ArrowSchema) { - .format = fmts[i], - .name = names[i], - .metadata = NULL, - .flags = flags[i], - .children = NULL, - .n_children = 0, - .dictionary = NULL, - .release = &release_nested_dynamic, - }; - } - - schemas[0]->n_children = 1; - schemas[0]->children = &schemas[1]; - schemas[1]->n_children = n-2; - schemas[1]->children = &schemas[2]; - - return schemas; -} - -struct ArrowSchema** test_union(const char** fmts, const char** names, int64_t* flags, const int n) { - struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); - for (int i = 0; i < n; ++i) { - schemas[i] = malloc(sizeof(struct ArrowSchema)); - *schemas[i] = (struct ArrowSchema) { - .format = fmts[i], - .name = names[i], - .metadata = NULL, - .flags = flags[i], - .children = NULL, - .n_children = 0, - .dictionary = NULL, - .release = &release_nested_dynamic, - }; - } - - schemas[0]->n_children = n-1; - schemas[0]->children = &schemas[1]; - return schemas; -} - -struct streamcounter { - int n; - int max; -}; - -static int stream_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { - out->children = malloc(sizeof(struct ArrowSchema*)*2); - out->n_children = 2; - - out->children[0] = malloc(sizeof(struct ArrowSchema)); - *out->children[0] = (struct ArrowSchema) { - .format = "i", - .name = "a", - .metadata = NULL, - .flags = ARROW_FLAG_NULLABLE, - .children = NULL, - .n_children = 0, - .dictionary = NULL, - .release = &release_nested_static, - }; - - out->children[1] = malloc(sizeof(struct ArrowSchema)); - *out->children[1] = (struct ArrowSchema) { - .format = "u", - .name = "b", - .metadata = NULL, - .flags = ARROW_FLAG_NULLABLE, - .children = NULL, - .n_children = 0, - .dictionary = NULL, - .release = &release_nested_static, - }; - - out->format = "+s"; - out->release = &release_nested_static; - - return 0; -} - -static void release_stream(struct ArrowArrayStream* st) { - free(st->private_data); - ArrowArrayStreamMarkReleased(st); -} - -static void release_the_array(struct ArrowArray* out) { - for (int i = 0; i < out->n_children; ++i) { - ArrowArrayRelease(out->children[i]); - } - free((void*)out->children); - free(out->buffers); - out->release = NULL; -} - -void export_int32_array(const int32_t*, int64_t, struct ArrowArray*); - -static void release_str_array(struct ArrowArray* array) { - assert(array->n_buffers == 3); - free((void*) array->buffers[1]); - free((void*) array->buffers[2]); - free(array->buffers); - array->release = NULL; -} - -void export_str_array(const char* data, const int32_t* offsets, int64_t nitems, struct ArrowArray* out) { - *out = (struct ArrowArray) { - .length = nitems, - .offset = 0, - .null_count = 0, - .n_buffers = 3, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // bookkeeping - .release = &release_str_array - }; - - out->buffers = (const void**)malloc(sizeof(void*) * out->n_buffers); - assert(out->buffers != NULL); - out->buffers[0] = NULL; - out->buffers[1] = offsets; - out->buffers[2] = data; -} - -static int next_record(struct ArrowArrayStream* st, struct ArrowArray* out) { - struct streamcounter* cnter = (struct streamcounter*)(st->private_data); - if (cnter->n == cnter->max) { - ArrowArrayMarkReleased(out); - return 0; - } - - cnter->n++; - - *out = (struct ArrowArray) { - .offset = 0, - .dictionary = NULL, - .length = 3, - .null_count = 0, - .buffers = (const void**)malloc(sizeof(void*)), - .n_children = 2, - .n_buffers = 1, - .release = &release_the_array - }; - - out->buffers[0] = NULL; - out->children = (struct ArrowArray**)malloc(sizeof(struct ArrowArray*)*2); - int32_t* intdata = malloc(sizeof(int32_t)*3); - for (int i = 0; i < 3; ++i) { - intdata[i] = cnter->n * (i+1); - } - - out->children[0] = malloc(sizeof(struct ArrowArray)); - export_int32_array(intdata, 3, out->children[0]); - out->children[1] = malloc(sizeof(struct ArrowArray)); - char* strdata = strdup("foobarbaz"); - int32_t* offsets = malloc(sizeof(int32_t)*4); - offsets[0] = 0; - offsets[1] = 3; - offsets[2] = 6; - offsets[3] = 9; - export_str_array(strdata, offsets, 3, out->children[1]); - - return 0; -} - -void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out) { - struct streamcounter* cnt = malloc(sizeof(struct streamcounter)); - cnt->max = n_batches; - cnt->n = 0; - - out->get_next = &next_record; - out->get_schema = &stream_schema; - out->release = &release_stream; - out->private_data = cnt; -} - -int test_exported_stream(struct ArrowArrayStream* stream) { - while (1) { - struct ArrowArray array; - memset(&array, 0, sizeof(array)); - // Garbage - implementation should not try to call it, though! - array.release = (void*)0xDEADBEEF; - int rc = stream->get_next(stream, &array); - if (rc != 0) return rc; - - if (array.release == NULL) { - stream->release(stream); - break; - } - } - return 0; -} - -struct FallibleStream { - // empty structs are a GNU extension - int dummy; -}; - -const char* FallibleGetLastError(struct ArrowArrayStream* stream) { - return "Expected error message"; -} - -int FallibleGetSchema(struct ArrowArrayStream* stream, struct ArrowSchema* schema) { - return EINVAL; -} - -int FallibleGetNext(struct ArrowArrayStream* stream, struct ArrowArray* array) { - return EINVAL; -} - -void FallibleRelease(struct ArrowArrayStream* stream) { - memset(stream, 0, sizeof(*stream)); -} - -static struct FallibleStream kFallibleStream; - -void test_stream_schema_fallible(struct ArrowArrayStream* stream) { - stream->get_last_error = FallibleGetLastError; - stream->get_schema = FallibleGetSchema; - stream->get_next = FallibleGetNext; - stream->private_data = &kFallibleStream; - stream->release = FallibleRelease; -} - -int confuse_go_gc(struct ArrowArrayStream* stream, unsigned int seed) { - struct ArrowSchema schema; - // Try to confuse the Go GC by putting what looks like a Go pointer here. -#ifdef _WIN32 - // Thread-safe on Windows with the multithread CRT -#define DORAND rand() -#else -#define DORAND rand_r(&seed) -#endif - schema.name = (char*)(0xc000000000L + (DORAND % 0x2000)); - schema.format = (char*)(0xc000000000L + (DORAND % 0x2000)); - int rc = stream->get_schema(stream, &schema); - if (rc != 0) return rc; - schema.release(&schema); - - while (1) { - struct ArrowArray array; - array.release = (void*)(0xc000000000L + (DORAND % 0x2000)); - array.private_data = (void*)(0xc000000000L + (DORAND % 0x2000)); - int rc = stream->get_next(stream, &array); - if (rc != 0) return rc; - - if (array.release == NULL) { - stream->release(stream); - break; - } - array.release(&array); - } - return 0; -#undef DORAND -} diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go deleted file mode 100644 index 3563aeb5f0f1e..0000000000000 --- a/go/arrow/cdata/cdata_test.go +++ /dev/null @@ -1,1027 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build cgo && test -// +build cgo,test - -// use test tag so that we only run these tests when the "test" tag is present -// so that the .c and other framework infrastructure is only compiled in during -// testing, and the .c files and symbols are not present in release builds. - -package cdata - -import ( - "encoding/json" - "errors" - "fmt" - "io" - "runtime" - "runtime/cgo" - "sync" - "testing" - "time" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/internal/arrdata" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/memory/mallocator" - "github.com/stretchr/testify/assert" -) - -func TestSchemaExport(t *testing.T) { - sc := exportInt32TypeSchema() - f, err := importSchema(&sc) - assert.NoError(t, err) - - keys, _ := getMetadataKeys() - vals, _ := getMetadataValues() - - assert.Equal(t, arrow.PrimitiveTypes.Int32, f.Type) - assert.Equal(t, keys, f.Metadata.Keys()) - assert.Equal(t, vals, f.Metadata.Values()) - - // schema was released when importing - assert.True(t, schemaIsReleased(&sc)) -} - -func TestSimpleArrayExport(t *testing.T) { - assert.False(t, test1IsReleased()) - - testarr := exportInt32Array() - arr, err := ImportCArrayWithType(testarr, arrow.PrimitiveTypes.Int32) - assert.NoError(t, err) - - assert.False(t, test1IsReleased()) - assert.True(t, isReleased(testarr)) - - arr.Release() - runtime.GC() - assert.Eventually(t, test1IsReleased, 1*time.Second, 10*time.Millisecond) -} - -func TestSimpleArrayAndSchema(t *testing.T) { - sc := exportInt32TypeSchema() - testarr := exportInt32Array() - - // grab address of the buffer we stuck into the ArrowArray object - buflist := (*[2]unsafe.Pointer)(unsafe.Pointer(testarr.buffers)) - origvals := (*[10]int32)(unsafe.Pointer(buflist[1])) - - fld, arr, err := ImportCArray(testarr, &sc) - assert.NoError(t, err) - assert.Equal(t, arrow.PrimitiveTypes.Int32, fld.Type) - assert.EqualValues(t, 10, arr.Len()) - - // verify that the address is the same of the first integer for the - // slice that is being used by the arrow.Array and the original buffer - vals := arr.(*array.Int32).Int32Values() - assert.Same(t, &vals[0], &origvals[0]) - - // and that the values are correct - for i, v := range vals { - assert.Equal(t, int32(i+1), v) - } -} - -func TestPrimitiveSchemas(t *testing.T) { - tests := []struct { - typ arrow.DataType - fmt string - }{ - {arrow.PrimitiveTypes.Int8, "c"}, - {arrow.PrimitiveTypes.Int16, "s"}, - {arrow.PrimitiveTypes.Int32, "i"}, - {arrow.PrimitiveTypes.Int64, "l"}, - {arrow.PrimitiveTypes.Uint8, "C"}, - {arrow.PrimitiveTypes.Uint16, "S"}, - {arrow.PrimitiveTypes.Uint32, "I"}, - {arrow.PrimitiveTypes.Uint64, "L"}, - {arrow.FixedWidthTypes.Boolean, "b"}, - {arrow.Null, "n"}, - {arrow.FixedWidthTypes.Float16, "e"}, - {arrow.PrimitiveTypes.Float32, "f"}, - {arrow.PrimitiveTypes.Float64, "g"}, - {&arrow.FixedSizeBinaryType{ByteWidth: 3}, "w:3"}, - {arrow.BinaryTypes.Binary, "z"}, - {arrow.BinaryTypes.LargeBinary, "Z"}, - {arrow.BinaryTypes.String, "u"}, - {arrow.BinaryTypes.LargeString, "U"}, - {&arrow.Decimal128Type{Precision: 16, Scale: 4}, "d:16,4"}, - {&arrow.Decimal128Type{Precision: 15, Scale: 0}, "d:15,0"}, - {&arrow.Decimal128Type{Precision: 15, Scale: -4}, "d:15,-4"}, - {&arrow.Decimal256Type{Precision: 15, Scale: -4}, "d:15,-4,256"}, - } - - for _, tt := range tests { - t.Run(tt.typ.Name(), func(t *testing.T) { - sc := testPrimitive(tt.fmt) - - f, err := ImportCArrowField(&sc) - assert.NoError(t, err) - - assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - - assert.True(t, schemaIsReleased(&sc)) - }) - } -} - -func TestDecimalSchemaErrors(t *testing.T) { - tests := []struct { - fmt string - errorMessage string - }{ - {"d:", "invalid decimal spec 'd:': wrong number of properties"}, - {"d:1", "invalid decimal spec 'd:1': wrong number of properties"}, - {"d:1,2,3,4", "invalid decimal spec 'd:1,2,3,4': wrong number of properties"}, - {"d:a,2,3", "could not parse decimal precision in 'd:a,2,3':"}, - {"d:1,a,3", "could not parse decimal scale in 'd:1,a,3':"}, - {"d:1,2,a", "could not parse decimal bitwidth in 'd:1,2,a':"}, - {"d:1,2,384", "only decimal128 and decimal256 are supported, got 'd:1,2,384'"}, - } - - for _, tt := range tests { - t.Run(tt.fmt, func(t *testing.T) { - sc := testPrimitive(tt.fmt) - - _, err := ImportCArrowField(&sc) - assert.Error(t, err) - assert.Contains(t, err.Error(), tt.errorMessage) - }) - } -} - -func TestImportTemporalSchema(t *testing.T) { - tests := []struct { - typ arrow.DataType - fmt string - }{ - {arrow.FixedWidthTypes.Date32, "tdD"}, - {arrow.FixedWidthTypes.Date64, "tdm"}, - {arrow.FixedWidthTypes.Time32s, "tts"}, - {arrow.FixedWidthTypes.Time32ms, "ttm"}, - {arrow.FixedWidthTypes.Time64us, "ttu"}, - {arrow.FixedWidthTypes.Time64ns, "ttn"}, - {arrow.FixedWidthTypes.Duration_s, "tDs"}, - {arrow.FixedWidthTypes.Duration_ms, "tDm"}, - {arrow.FixedWidthTypes.Duration_us, "tDu"}, - {arrow.FixedWidthTypes.Duration_ns, "tDn"}, - {arrow.FixedWidthTypes.MonthInterval, "tiM"}, - {arrow.FixedWidthTypes.DayTimeInterval, "tiD"}, - {arrow.FixedWidthTypes.MonthDayNanoInterval, "tin"}, - {arrow.FixedWidthTypes.Timestamp_s, "tss:UTC"}, - {&arrow.TimestampType{Unit: arrow.Second}, "tss:"}, - {&arrow.TimestampType{Unit: arrow.Second, TimeZone: "Europe/Paris"}, "tss:Europe/Paris"}, - {arrow.FixedWidthTypes.Timestamp_ms, "tsm:UTC"}, - {&arrow.TimestampType{Unit: arrow.Millisecond}, "tsm:"}, - {&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "Europe/Paris"}, "tsm:Europe/Paris"}, - {arrow.FixedWidthTypes.Timestamp_us, "tsu:UTC"}, - {&arrow.TimestampType{Unit: arrow.Microsecond}, "tsu:"}, - {&arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "Europe/Paris"}, "tsu:Europe/Paris"}, - {arrow.FixedWidthTypes.Timestamp_ns, "tsn:UTC"}, - {&arrow.TimestampType{Unit: arrow.Nanosecond}, "tsn:"}, - {&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Europe/Paris"}, "tsn:Europe/Paris"}, - } - - for _, tt := range tests { - t.Run(tt.typ.Name(), func(t *testing.T) { - sc := testPrimitive(tt.fmt) - - f, err := ImportCArrowField(&sc) - assert.NoError(t, err) - - assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - - assert.True(t, schemaIsReleased(&sc)) - }) - } -} - -func TestListSchemas(t *testing.T) { - tests := []struct { - typ arrow.DataType - fmts []string - names []string - isnull []bool - }{ - {arrow.ListOf(arrow.PrimitiveTypes.Int8), []string{"+l", "c"}, []string{"", "item"}, []bool{true}}, - {arrow.FixedSizeListOfNonNullable(2, arrow.PrimitiveTypes.Int64), []string{"+w:2", "l"}, []string{"", "item"}, []bool{false}}, - {arrow.ListOfNonNullable(arrow.ListOf(arrow.PrimitiveTypes.Int32)), []string{"+l", "+l", "i"}, []string{"", "item", "item"}, []bool{false, true}}, - } - - for _, tt := range tests { - t.Run(tt.typ.Name(), func(t *testing.T) { - sc := testNested(tt.fmts, tt.names, tt.isnull) - defer freeMallocedSchemas(sc) - - top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] - f, err := ImportCArrowField(top) - assert.NoError(t, err) - - assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - - assert.True(t, schemaIsReleased(top)) - }) - } -} - -func TestStructSchemas(t *testing.T) { - tests := []struct { - typ arrow.DataType - fmts []string - names []string - flags []int64 - }{ - {arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.BinaryTypes.String, Nullable: true, Metadata: metadata2}, - ), []string{"+s", "c", "u"}, []string{"", "a", "b"}, []int64{flagIsNullable, flagIsNullable, flagIsNullable}}, - } - - for _, tt := range tests { - t.Run(tt.typ.Name(), func(t *testing.T) { - sc := testStruct(tt.fmts, tt.names, tt.flags) - defer freeMallocedSchemas(sc) - - top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] - f, err := ImportCArrowField(top) - assert.NoError(t, err) - - assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - - assert.True(t, schemaIsReleased(top)) - }) - } -} - -func TestMapSchemas(t *testing.T) { - tests := []struct { - typ *arrow.MapType - keysSorted bool - fmts []string - names []string - flags []int64 - }{ - {arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), false, []string{"+m", "+s", "c", "u"}, []string{"", "entries", "key", "value"}, []int64{flagIsNullable, 0, 0, flagIsNullable}}, - {arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), true, []string{"+m", "+s", "c", "u"}, []string{"", "entries", "key", "value"}, []int64{flagIsNullable | flagMapKeysSorted, 0, 0, flagIsNullable}}, - } - - for _, tt := range tests { - t.Run(tt.typ.Name(), func(t *testing.T) { - sc := testMap(tt.fmts, tt.names, tt.flags) - defer freeMallocedSchemas(sc) - - top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] - f, err := ImportCArrowField(top) - assert.NoError(t, err) - - tt.typ.KeysSorted = tt.keysSorted - assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - - assert.True(t, schemaIsReleased(top)) - }) - } -} - -func TestSchema(t *testing.T) { - // schema is exported as an equivalent struct type (+ top-level metadata) - sc := arrow.NewSchema([]arrow.Field{ - {Name: "nulls", Type: arrow.Null, Nullable: false}, - {Name: "values", Type: arrow.PrimitiveTypes.Int64, Nullable: true, Metadata: metadata1}, - }, &metadata2) - - cst := testSchema([]string{"+s", "n", "l"}, []string{"", "nulls", "values"}, []int64{0, 0, flagIsNullable}) - defer freeMallocedSchemas(cst) - - top := (*[1]*CArrowSchema)(unsafe.Pointer(cst))[0] - out, err := ImportCArrowSchema(top) - assert.NoError(t, err) - - assert.True(t, sc.Equal(out)) - assert.True(t, sc.Metadata().Equal(out.Metadata())) - - assert.True(t, schemaIsReleased(top)) -} - -func createTestInt8Arr() arrow.Array { - bld := array.NewInt8Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]int8{1, 2, 0, -3}, []bool{true, true, false, true}) - return bld.NewInt8Array() -} - -func createTestInt16Arr() arrow.Array { - bld := array.NewInt16Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]int16{1, 2, -3}, []bool{true, true, true}) - return bld.NewInt16Array() -} - -func createTestInt32Arr() arrow.Array { - bld := array.NewInt32Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]int32{1, 2, 0, -3}, []bool{true, true, false, true}) - return bld.NewInt32Array() -} - -func createTestInt64Arr() arrow.Array { - bld := array.NewInt64Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]int64{1, 2, -3}, []bool{true, true, true}) - return bld.NewInt64Array() -} - -func createTestUint8Arr() arrow.Array { - bld := array.NewUint8Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]uint8{1, 2, 0, 3}, []bool{true, true, false, true}) - return bld.NewUint8Array() -} - -func createTestUint16Arr() arrow.Array { - bld := array.NewUint16Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]uint16{1, 2, 3}, []bool{true, true, true}) - return bld.NewUint16Array() -} - -func createTestUint32Arr() arrow.Array { - bld := array.NewUint32Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]uint32{1, 2, 0, 3}, []bool{true, true, false, true}) - return bld.NewUint32Array() -} - -func createTestUint64Arr() arrow.Array { - bld := array.NewUint64Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]uint64{1, 2, 3}, []bool{true, true, true}) - return bld.NewUint64Array() -} - -func createTestBoolArr() arrow.Array { - bld := array.NewBooleanBuilder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]bool{true, false, false}, []bool{true, true, false}) - return bld.NewBooleanArray() -} - -func createTestNullArr() arrow.Array { - return array.NewNull(2) -} - -func createTestFloat32Arr() arrow.Array { - bld := array.NewFloat32Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]float32{1.5, 0}, []bool{true, false}) - return bld.NewFloat32Array() -} - -func createTestFloat64Arr() arrow.Array { - bld := array.NewFloat64Builder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]float64{1.5, 0}, []bool{true, false}) - return bld.NewFloat64Array() -} - -func createTestFSBArr() arrow.Array { - bld := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 3}) - defer bld.Release() - - bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) - return bld.NewFixedSizeBinaryArray() -} - -func createTestBinaryArr() arrow.Array { - bld := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary) - defer bld.Release() - - bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) - return bld.NewBinaryArray() -} - -func createTestStrArr() arrow.Array { - bld := array.NewStringBuilder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]string{"foo", "bar", ""}, []bool{true, true, false}) - return bld.NewStringArray() -} - -func createTestLargeBinaryArr() arrow.Array { - bld := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.LargeBinary) - defer bld.Release() - - bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) - return bld.NewLargeBinaryArray() -} - -func createTestLargeStrArr() arrow.Array { - bld := array.NewLargeStringBuilder(memory.DefaultAllocator) - defer bld.Release() - - bld.AppendValues([]string{"foo", "bar", ""}, []bool{true, true, false}) - return bld.NewLargeStringArray() -} - -func createTestDecimalArr() arrow.Array { - bld := array.NewDecimal128Builder(memory.DefaultAllocator, &arrow.Decimal128Type{Precision: 16, Scale: 4}) - defer bld.Release() - - bld.AppendValues([]decimal128.Num{decimal128.FromU64(12345670), decimal128.FromU64(0)}, []bool{true, false}) - return bld.NewDecimal128Array() -} - -func TestPrimitiveArrs(t *testing.T) { - tests := []struct { - name string - fn func() arrow.Array - }{ - {"int8", createTestInt8Arr}, - {"uint8", createTestUint8Arr}, - {"int16", createTestInt16Arr}, - {"uint16", createTestUint16Arr}, - {"int32", createTestInt32Arr}, - {"uint32", createTestUint32Arr}, - {"int64", createTestInt64Arr}, - {"uint64", createTestUint64Arr}, - {"bool", createTestBoolArr}, - {"null", createTestNullArr}, - {"float32", createTestFloat32Arr}, - {"float64", createTestFloat64Arr}, - {"fixed size binary", createTestFSBArr}, - {"binary", createTestBinaryArr}, - {"utf8", createTestStrArr}, - {"largebinary", createTestLargeBinaryArr}, - {"largeutf8", createTestLargeStrArr}, - {"decimal128", createTestDecimalArr}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - arr := tt.fn() - defer arr.Release() - - mem := mallocator.NewMallocator() - defer mem.AssertSize(t, 0) - - carr := createCArr(arr, mem) - defer freeTestMallocatorArr(carr, mem) - - imported, err := ImportCArrayWithType(carr, arr.DataType()) - assert.NoError(t, err) - assert.True(t, array.Equal(arr, imported)) - assert.True(t, isReleased(carr)) - - imported.Release() - }) - } -} - -func TestPrimitiveSliced(t *testing.T) { - arr := createTestInt16Arr() - defer arr.Release() - - sl := array.NewSlice(arr, 1, 2) - defer sl.Release() - - mem := mallocator.NewMallocator() - defer mem.AssertSize(t, 0) - - carr := createCArr(sl, mem) - defer freeTestMallocatorArr(carr, mem) - - imported, err := ImportCArrayWithType(carr, arr.DataType()) - assert.NoError(t, err) - assert.True(t, array.Equal(sl, imported)) - assert.True(t, array.SliceEqual(arr, 1, 2, imported, 0, int64(imported.Len()))) - assert.True(t, isReleased(carr)) - - imported.Release() -} - -func createTestListArr() arrow.Array { - bld := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) - defer bld.Release() - - vb := bld.ValueBuilder().(*array.Int8Builder) - - bld.Append(true) - vb.AppendValues([]int8{1, 2}, []bool{true, true}) - - bld.Append(true) - vb.AppendValues([]int8{3, 0}, []bool{true, false}) - - bld.AppendNull() - - return bld.NewArray() -} - -func createTestLargeListArr() arrow.Array { - bld := array.NewLargeListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) - defer bld.Release() - - vb := bld.ValueBuilder().(*array.Int8Builder) - - bld.Append(true) - vb.AppendValues([]int8{1, 2}, []bool{true, true}) - - bld.Append(true) - vb.AppendValues([]int8{3, 0}, []bool{true, false}) - - bld.AppendNull() - - return bld.NewArray() -} - -func createTestFixedSizeList() arrow.Array { - bld := array.NewFixedSizeListBuilder(memory.DefaultAllocator, 2, arrow.PrimitiveTypes.Int64) - defer bld.Release() - - vb := bld.ValueBuilder().(*array.Int64Builder) - - bld.Append(true) - vb.AppendValues([]int64{1, 2}, []bool{true, true}) - - bld.Append(true) - vb.AppendValues([]int64{3, 0}, []bool{true, false}) - - bld.AppendNull() - return bld.NewArray() -} - -func createTestStructArr() arrow.Array { - bld := array.NewStructBuilder(memory.DefaultAllocator, arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.BinaryTypes.String, Nullable: true}, - )) - defer bld.Release() - - f1bld := bld.FieldBuilder(0).(*array.Int8Builder) - f2bld := bld.FieldBuilder(1).(*array.StringBuilder) - - bld.Append(true) - f1bld.Append(1) - f2bld.Append("foo") - - bld.Append(true) - f1bld.Append(2) - f2bld.AppendNull() - - return bld.NewArray() -} - -func createTestRunEndsArr() arrow.Array { - bld := array.NewRunEndEncodedBuilder(memory.DefaultAllocator, - arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int8) - defer bld.Release() - - if err := json.Unmarshal([]byte(`[1, 2, 2, 3, null, null, null, 4]`), bld); err != nil { - panic(err) - } - - return bld.NewArray() -} - -func createTestMapArr() arrow.Array { - bld := array.NewMapBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String, false) - defer bld.Release() - - kb := bld.KeyBuilder().(*array.Int8Builder) - vb := bld.ItemBuilder().(*array.StringBuilder) - - bld.Append(true) - kb.Append(1) - vb.Append("foo") - kb.Append(2) - vb.AppendNull() - - bld.Append(true) - kb.Append(3) - vb.Append("bar") - - return bld.NewArray() -} - -func createTestSparseUnion() arrow.Array { - return createTestUnionArr(arrow.SparseMode) -} - -func createTestDenseUnion() arrow.Array { - return createTestUnionArr(arrow.DenseMode) -} - -func createTestUnionArr(mode arrow.UnionMode) arrow.Array { - fields := []arrow.Field{ - arrow.Field{Name: "u0", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - arrow.Field{Name: "u1", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}, - } - typeCodes := []arrow.UnionTypeCode{5, 10} - bld := array.NewBuilder(memory.DefaultAllocator, arrow.UnionOf(mode, fields, typeCodes)).(array.UnionBuilder) - defer bld.Release() - - u0Bld := bld.Child(0).(*array.Int32Builder) - u1Bld := bld.Child(1).(*array.Uint8Builder) - - bld.Append(5) - if mode == arrow.SparseMode { - u1Bld.AppendNull() - } - u0Bld.Append(128) - bld.Append(5) - if mode == arrow.SparseMode { - u1Bld.AppendNull() - } - u0Bld.Append(256) - bld.Append(10) - if mode == arrow.SparseMode { - u0Bld.AppendNull() - } - u1Bld.Append(127) - bld.Append(10) - if mode == arrow.SparseMode { - u0Bld.AppendNull() - } - u1Bld.Append(25) - - return bld.NewArray() -} - -func TestNestedArrays(t *testing.T) { - tests := []struct { - name string - fn func() arrow.Array - }{ - {"list", createTestListArr}, - {"large list", createTestLargeListArr}, - {"fixed size list", createTestFixedSizeList}, - {"struct", createTestStructArr}, - {"map", createTestMapArr}, - {"sparse union", createTestSparseUnion}, - {"dense union", createTestDenseUnion}, - {"run-end encoded", createTestRunEndsArr}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - arr := tt.fn() - defer arr.Release() - - mem := mallocator.NewMallocator() - defer mem.AssertSize(t, 0) - - carr := createCArr(arr, mem) - defer freeTestMallocatorArr(carr, mem) - - imported, err := ImportCArrayWithType(carr, arr.DataType()) - assert.NoError(t, err) - assert.True(t, array.Equal(arr, imported)) - assert.True(t, isReleased(carr)) - - imported.Release() - }) - } -} - -func TestRecordBatch(t *testing.T) { - mem := mallocator.NewMallocator() - defer mem.AssertSize(t, 0) - - arr := createTestStructArr() - defer arr.Release() - - carr := createCArr(arr, mem) - defer freeTestMallocatorArr(carr, mem) - - sc := testStruct([]string{"+s", "c", "u"}, []string{"", "a", "b"}, []int64{0, flagIsNullable, flagIsNullable}) - defer freeMallocedSchemas(sc) - - top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] - rb, err := ImportCRecordBatch(carr, top) - assert.NoError(t, err) - defer rb.Release() - - assert.EqualValues(t, 2, rb.NumCols()) - rbschema := rb.Schema() - assert.Equal(t, "a", rbschema.Field(0).Name) - assert.Equal(t, "b", rbschema.Field(1).Name) - - rec := array.NewRecord(rbschema, []arrow.Array{arr.(*array.Struct).Field(0), arr.(*array.Struct).Field(1)}, -1) - defer rec.Release() - - assert.True(t, array.RecordEqual(rb, rec)) -} - -func TestRecordReaderStream(t *testing.T) { - stream := arrayStreamTest() - defer releaseStream(stream) - - rdr := ImportCArrayStream(stream, nil) - i := 0 - for { - rec, err := rdr.Read() - if err != nil { - if errors.Is(err, io.EOF) { - break - } - assert.NoError(t, err) - } - - assert.EqualValues(t, 2, rec.NumCols()) - assert.Equal(t, "a", rec.ColumnName(0)) - assert.Equal(t, "b", rec.ColumnName(1)) - i++ - for j := 0; j < int(rec.NumRows()); j++ { - assert.Equal(t, int32((j+1)*i), rec.Column(0).(*array.Int32).Value(j)) - } - assert.Equal(t, "foo", rec.Column(1).(*array.String).Value(0)) - assert.Equal(t, "bar", rec.Column(1).(*array.String).Value(1)) - assert.Equal(t, "baz", rec.Column(1).(*array.String).Value(2)) - } -} - -func TestExportRecordReaderStream(t *testing.T) { - reclist := arrdata.Records["primitives"] - rdr, _ := array.NewRecordReader(reclist[0].Schema(), reclist) - - out := createTestStreamObj() - ExportRecordReader(rdr, out) - - assert.NotNil(t, out.get_schema) - assert.NotNil(t, out.get_next) - assert.NotNil(t, out.get_last_error) - assert.NotNil(t, out.release) - assert.NotNil(t, out.private_data) - - h := *(*cgo.Handle)(out.private_data) - assert.Same(t, rdr, h.Value().(cRecordReader).rdr) - - importedRdr := ImportCArrayStream(out, nil) - i := 0 - for { - rec, err := importedRdr.Read() - if err != nil { - if errors.Is(err, io.EOF) { - break - } - assert.NoError(t, err) - } - - assert.Truef(t, array.RecordEqual(reclist[i], rec), "expected: %s\ngot: %s", reclist[i], rec) - i++ - } - assert.EqualValues(t, len(reclist), i) -} - -func TestExportRecordReaderStreamLifetime(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - schema := arrow.NewSchema([]arrow.Field{ - {Name: "strings", Type: arrow.BinaryTypes.String, Nullable: false}, - }, nil) - - bldr := array.NewBuilder(mem, &arrow.StringType{}) - defer bldr.Release() - - arr := bldr.NewArray() - defer arr.Release() - - rec := array.NewRecord(schema, []arrow.Array{arr}, 0) - defer rec.Release() - - rdr, _ := array.NewRecordReader(schema, []arrow.Record{rec}) - defer rdr.Release() - - out := createTestStreamObj() - ExportRecordReader(rdr, out) - - // C Stream is holding on to memory - assert.NotEqual(t, 0, mem.CurrentAlloc()) - releaseStream(out) -} - -func TestEmptyListExport(t *testing.T) { - bldr := array.NewBuilder(memory.DefaultAllocator, arrow.LargeListOf(arrow.PrimitiveTypes.Int32)) - defer bldr.Release() - - arr := bldr.NewArray() - defer arr.Release() - - var out CArrowArray - ExportArrowArray(arr, &out, nil) - - assert.Zero(t, out.length) - assert.Zero(t, out.null_count) - assert.Zero(t, out.offset) - assert.EqualValues(t, 2, out.n_buffers) - assert.NotNil(t, out.buffers) - assert.EqualValues(t, 1, out.n_children) - assert.NotNil(t, out.children) -} - -func TestEmptyDictExport(t *testing.T) { - bldr := array.NewBuilder(memory.DefaultAllocator, &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String, Ordered: true}) - defer bldr.Release() - - arr := bldr.NewArray() - defer arr.Release() - - var out CArrowArray - var sc CArrowSchema - ExportArrowArray(arr, &out, &sc) - - assert.EqualValues(t, 'c', *sc.format) - assert.NotZero(t, sc.flags&1) - assert.Zero(t, sc.n_children) - assert.NotNil(t, sc.dictionary) - assert.EqualValues(t, 'u', *sc.dictionary.format) - - assert.Zero(t, out.length) - assert.Zero(t, out.null_count) - assert.Zero(t, out.offset) - assert.EqualValues(t, 2, out.n_buffers) - assert.Zero(t, out.n_children) - assert.Nil(t, out.children) - assert.NotNil(t, out.dictionary) - - assert.Zero(t, out.dictionary.length) - assert.Zero(t, out.dictionary.null_count) - assert.Zero(t, out.dictionary.offset) - assert.EqualValues(t, 3, out.dictionary.n_buffers) - assert.Zero(t, out.dictionary.n_children) - assert.Nil(t, out.dictionary.children) - assert.Nil(t, out.dictionary.dictionary) -} - -func TestEmptyStringExport(t *testing.T) { - // apache/arrow#33936: regression test - bldr := array.NewBuilder(memory.DefaultAllocator, &arrow.StringType{}) - defer bldr.Release() - - arr := bldr.NewArray() - defer arr.Release() - - var out CArrowArray - var sc CArrowSchema - ExportArrowArray(arr, &out, &sc) - - assert.EqualValues(t, 'u', *sc.format) - assert.Zero(t, sc.n_children) - assert.Nil(t, sc.dictionary) - - assert.EqualValues(t, 3, out.n_buffers) - buffers := (*[3]unsafe.Pointer)(unsafe.Pointer(out.buffers)) - assert.EqualValues(t, unsafe.Pointer(nil), buffers[0]) - assert.NotEqualValues(t, unsafe.Pointer(nil), buffers[1]) - assert.NotEqualValues(t, unsafe.Pointer(nil), buffers[2]) -} - -func TestEmptyUnionExport(t *testing.T) { - // apache/arrow#33936: regression test - bldr := array.NewBuilder(memory.DefaultAllocator, arrow.SparseUnionOf([]arrow.Field{ - {Name: "child", Type: &arrow.Int64Type{}}, - }, []arrow.UnionTypeCode{0})) - defer bldr.Release() - - arr := bldr.NewArray() - defer arr.Release() - - var out CArrowArray - var sc CArrowSchema - ExportArrowArray(arr, &out, &sc) - - assert.EqualValues(t, 1, sc.n_children) - assert.Nil(t, sc.dictionary) - - assert.EqualValues(t, 1, out.n_buffers) - buffers := (*[1]unsafe.Pointer)(unsafe.Pointer(out.buffers)) - assert.NotEqualValues(t, unsafe.Pointer(nil), buffers[0]) -} - -func TestRecordReaderExport(t *testing.T) { - // Regression test for apache/arrow#33767 - reclist := arrdata.Records["primitives"] - rdr, _ := array.NewRecordReader(reclist[0].Schema(), reclist) - - if err := exportedStreamTest(rdr); err != nil { - t.Fatalf("Failed to test exported stream: %#v", err) - } -} - -type failingReader struct { - opCount int -} - -func (r *failingReader) Retain() {} -func (r *failingReader) Release() {} -func (r *failingReader) Schema() *arrow.Schema { - r.opCount -= 1 - if r.opCount == 0 { - return nil - } - return arrdata.Records["primitives"][0].Schema() -} -func (r *failingReader) Next() bool { - r.opCount -= 1 - return r.opCount > 0 -} -func (r *failingReader) Record() arrow.Record { - arrdata.Records["primitives"][0].Retain() - return arrdata.Records["primitives"][0] -} -func (r *failingReader) Err() error { - if r.opCount == 0 { - return fmt.Errorf("Expected error message") - } - return nil -} - -func TestRecordReaderError(t *testing.T) { - // Regression test for apache/arrow#33789 - err := roundTripStreamTest(&failingReader{opCount: 1}) - if err == nil { - t.Fatalf("Expected error but got none") - } - assert.Contains(t, err.Error(), "Expected error message") - - err = roundTripStreamTest(&failingReader{opCount: 2}) - if err == nil { - t.Fatalf("Expected error but got none") - } - assert.Contains(t, err.Error(), "Expected error message") - - err = roundTripStreamTest(&failingReader{opCount: 3}) - if err == nil { - t.Fatalf("Expected error but got none") - } - assert.Contains(t, err.Error(), "Expected error message") -} - -func TestRecordReaderImportError(t *testing.T) { - // Regression test for apache/arrow#35974 - - err := fallibleSchemaTestDeprecated() - if err == nil { - t.Fatalf("Expected error but got nil") - } - assert.Contains(t, err.Error(), "Expected error message") - - err = fallibleSchemaTest() - if err == nil { - t.Fatalf("Expected error but got nil") - } - assert.Contains(t, err.Error(), "Expected error message") -} - -func TestConfuseGoGc(t *testing.T) { - // Regression test for https://github.com/apache/arrow-adbc/issues/729 - reclist := arrdata.Records["primitives"] - - var wg sync.WaitGroup - concurrency := 32 - wg.Add(concurrency) - - // XXX: this test is a bit expensive - for i := 0; i < concurrency; i++ { - go func() { - for i := 0; i < 256; i++ { - rdr, err := array.NewRecordReader(reclist[0].Schema(), reclist) - assert.NoError(t, err) - runtime.GC() - assert.NoError(t, confuseGoGc(rdr)) - runtime.GC() - } - wg.Done() - }() - } - - wg.Wait() -} diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go deleted file mode 100644 index 968b28b4e4afb..0000000000000 --- a/go/arrow/cdata/cdata_test_framework.go +++ /dev/null @@ -1,451 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build test -// +build test - -package cdata - -// #include -// #include -// #include -// #include "arrow/c/abi.h" -// #include "arrow/c/helpers.h" -// -// void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out); -// static struct ArrowArray* get_test_arr() { -// struct ArrowArray* array = (struct ArrowArray*)malloc(sizeof(struct ArrowArray)); -// memset(array, 0, sizeof(*array)); -// return array; -// } -// static struct ArrowArrayStream* get_test_stream() { -// struct ArrowArrayStream* out = (struct ArrowArrayStream*)malloc(sizeof(struct ArrowArrayStream)); -// memset(out, 0, sizeof(struct ArrowArrayStream)); -// return out; -// } -// -// void release_test_arr(struct ArrowArray* arr); -// -// static int32_t* get_data() { -// int32_t* data = malloc(sizeof(int32_t)*10); -// for (int i = 0; i < 10; ++i) { data[i] = i+1; } -// return data; -// } -// void export_int32_type(struct ArrowSchema* schema); -// void export_int32_array(const int32_t*, int64_t, struct ArrowArray*); -// int test1_is_released(); -// void test_primitive(struct ArrowSchema* schema, const char* fmt); -// void free_malloced_schemas(struct ArrowSchema**); -// struct ArrowSchema** test_lists(const char** fmts, const char** names, const int* nullflags, const int n); -// struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* flags, const int n); -// struct ArrowSchema** test_map(const char** fmts, const char** names, int64_t* flags, const int n); -// struct ArrowSchema** test_schema(const char** fmts, const char** names, int64_t* flags, const int n); -// struct ArrowSchema** test_union(const char** fmts, const char** names, int64_t* flags, const int n); -// int test_exported_stream(struct ArrowArrayStream* stream); -// void test_stream_schema_fallible(struct ArrowArrayStream* stream); -// int confuse_go_gc(struct ArrowArrayStream* stream, unsigned int seed); -// extern void releaseTestArr(struct ArrowArray* array); -// extern void goReleaseTestArray(struct ArrowArray* array); -import "C" - -import ( - "errors" - "fmt" - "io" - "math/rand" - "runtime/cgo" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/internal" - "github.com/apache/arrow/go/v18/arrow/memory/mallocator" -) - -const ( - flagIsNullable = C.ARROW_FLAG_NULLABLE - flagMapKeysSorted = C.ARROW_FLAG_MAP_KEYS_SORTED -) - -var ( - metadata1 = arrow.NewMetadata([]string{"key1", "key2"}, []string{"", "bar"}) - metadata2 = arrow.NewMetadata([]string{"key"}, []string{"abcde"}) -) - -func exportInt32TypeSchema() CArrowSchema { - var s CArrowSchema - C.export_int32_type(&s) - return s -} - -func releaseStream(s *CArrowArrayStream) { - C.ArrowArrayStreamRelease(s) -} - -func schemaIsReleased(s *CArrowSchema) bool { - return C.ArrowSchemaIsReleased(s) == 1 -} - -func getMetadataKeys() ([]string, []string) { - return []string{"key1", "key2"}, []string{"key"} -} - -func getMetadataValues() ([]string, []string) { - return []string{"", "bar"}, []string{"abcde"} -} - -func exportInt32Array() *CArrowArray { - arr := C.get_test_arr() - C.export_int32_array(C.get_data(), C.int64_t(10), arr) - return arr -} - -func isReleased(arr *CArrowArray) bool { - return C.ArrowArrayIsReleased(arr) == 1 -} - -func test1IsReleased() bool { - return C.test1_is_released() == 1 -} - -func testPrimitive(fmtstr string) CArrowSchema { - var s CArrowSchema - fmt := C.CString(fmtstr) - C.test_primitive(&s, fmt) - return s -} - -func freeMallocedSchemas(schemas **CArrowSchema) { - C.free_malloced_schemas(schemas) -} - -func testNested(fmts, names []string, isnull []bool) **CArrowSchema { - if len(fmts) != len(names) { - panic("testing nested lists must have same size fmts and names") - } - cfmts := make([]*C.char, len(fmts)) - cnames := make([]*C.char, len(names)) - nulls := make([]C.int, len(isnull)) - - for i := range fmts { - cfmts[i] = C.CString(fmts[i]) - cnames[i] = C.CString(names[i]) - } - - for i, v := range isnull { - if v { - nulls[i] = C.ARROW_FLAG_NULLABLE - } else { - nulls[i] = 0 - } - } - - return C.test_lists((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int)(unsafe.Pointer(&nulls[0])), C.int(len(fmts))) -} - -func testStruct(fmts, names []string, flags []int64) **CArrowSchema { - if len(fmts) != len(names) || len(names) != len(flags) { - panic("testing structs must all have the same size slices in args") - } - - cfmts := make([]*C.char, len(fmts)) - cnames := make([]*C.char, len(names)) - cflags := make([]C.int64_t, len(flags)) - - for i := range fmts { - cfmts[i] = C.CString(fmts[i]) - cnames[i] = C.CString(names[i]) - cflags[i] = C.int64_t(flags[i]) - } - - return C.test_struct((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) -} - -func testMap(fmts, names []string, flags []int64) **CArrowSchema { - if len(fmts) != len(names) || len(names) != len(flags) { - panic("testing maps must all have the same size slices in args") - } - - cfmts := make([]*C.char, len(fmts)) - cnames := make([]*C.char, len(names)) - cflags := make([]C.int64_t, len(flags)) - - for i := range fmts { - cfmts[i] = C.CString(fmts[i]) - cnames[i] = C.CString(names[i]) - cflags[i] = C.int64_t(flags[i]) - } - - return C.test_map((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) -} - -func testUnion(fmts, names []string, flags []int64) **CArrowSchema { - if len(fmts) != len(names) || len(names) != len(flags) { - panic("testing unions must all have the same size slices in args") - } - - cfmts := make([]*C.char, len(fmts)) - cnames := make([]*C.char, len(names)) - cflags := make([]C.int64_t, len(flags)) - - for i := range fmts { - cfmts[i] = C.CString(fmts[i]) - cnames[i] = C.CString(names[i]) - cflags[i] = C.int64_t(flags[i]) - } - - return C.test_union((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) -} - -func testSchema(fmts, names []string, flags []int64) **CArrowSchema { - if len(fmts) != len(names) || len(names) != len(flags) { - panic("testing structs must all have the same size slices in args") - } - - cfmts := make([]*C.char, len(fmts)) - cnames := make([]*C.char, len(names)) - cflags := make([]C.int64_t, len(flags)) - - for i := range fmts { - cfmts[i] = C.CString(fmts[i]) - cnames[i] = C.CString(names[i]) - cflags[i] = C.int64_t(flags[i]) - } - - return C.test_schema((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) -} - -func freeAny[T any](alloc *mallocator.Mallocator, p *T, n int) { - raw := unsafe.Slice((*byte)(unsafe.Pointer(p)), int(unsafe.Sizeof(*p))*n) - alloc.Free(raw) -} - -func freeTestMallocatorArr(carr *CArrowArray, alloc *mallocator.Mallocator) { - freeAny(alloc, carr, 1) -} - -func getTestArr(alloc *mallocator.Mallocator) *CArrowArray { - raw := alloc.Allocate(C.sizeof_struct_ArrowArray) - return (*CArrowArray)(unsafe.Pointer(&raw[0])) -} - -type testReleaser struct { - alloc *mallocator.Mallocator - bufs [][]byte -} - -//export releaseTestArr -func releaseTestArr(arr *CArrowArray) { - if C.ArrowArrayIsReleased(arr) == 1 { - return - } - defer C.ArrowArrayMarkReleased(arr) - - h := getHandle(arr.private_data) - tr := h.Value().(*testReleaser) - - alloc := tr.alloc - for _, b := range tr.bufs { - alloc.Free(b) - } - - if arr.n_buffers > 0 { - freeAny(alloc, arr.buffers, int(arr.n_buffers)) - } - - if arr.dictionary != nil { - C.ArrowArrayRelease(arr.dictionary) - freeAny(alloc, arr.dictionary, 1) - } - - if arr.n_children > 0 { - children := unsafe.Slice(arr.children, arr.n_children) - for _, c := range children { - C.ArrowArrayRelease(c) - freeTestMallocatorArr(c, alloc) - } - - freeAny(alloc, arr.children, int(arr.n_children)) - } - - h.Delete() - C.free(unsafe.Pointer(arr.private_data)) -} - -func allocateBufferMallocatorPtrArr(alloc *mallocator.Mallocator, n int) []*C.void { - raw := alloc.Allocate(int(unsafe.Sizeof((*C.void)(nil))) * n) - return unsafe.Slice((**C.void)(unsafe.Pointer(&raw[0])), n) -} - -func allocateChildrenPtrArr(alloc *mallocator.Mallocator, n int) []*CArrowArray { - raw := alloc.Allocate(int(unsafe.Sizeof((*CArrowArray)(nil))) * n) - return unsafe.Slice((**CArrowArray)(unsafe.Pointer(&raw[0])), n) -} - -func createCArr(arr arrow.Array, alloc *mallocator.Mallocator) *CArrowArray { - var ( - carr = getTestArr(alloc) - children = (**CArrowArray)(nil) - nchildren = C.int64_t(0) - ) - - switch arr := arr.(type) { - case array.ListLike: - clist := allocateChildrenPtrArr(alloc, 1) - clist[0] = createCArr(arr.ListValues(), alloc) - children = (**CArrowArray)(unsafe.Pointer(&clist[0])) - nchildren += 1 - case *array.Struct: - clist := allocateChildrenPtrArr(alloc, arr.NumField()) - for i := 0; i < arr.NumField(); i++ { - clist[i] = createCArr(arr.Field(i), alloc) - nchildren += 1 - } - children = (**CArrowArray)(unsafe.Pointer(&clist[0])) - case *array.RunEndEncoded: - clist := allocateChildrenPtrArr(alloc, 2) - clist[0] = createCArr(arr.RunEndsArr(), alloc) - clist[1] = createCArr(arr.Values(), alloc) - children = (**CArrowArray)(unsafe.Pointer(&clist[0])) - nchildren += 2 - case array.Union: - clist := allocateChildrenPtrArr(alloc, arr.NumFields()) - for i := 0; i < arr.NumFields(); i++ { - clist[i] = createCArr(arr.Field(i), alloc) - nchildren += 1 - } - children = (**CArrowArray)(unsafe.Pointer(&clist[0])) - } - - carr.children = children - carr.n_children = nchildren - carr.dictionary = nil - carr.length = C.int64_t(arr.Len()) - carr.null_count = C.int64_t(arr.NullN()) - carr.offset = C.int64_t(arr.Data().Offset()) - carr.release = (*[0]byte)(C.goReleaseTestArray) - tr := &testReleaser{alloc: alloc} - h := cgo.NewHandle(tr) - carr.private_data = createHandle(h) - - buffers := arr.Data().Buffers() - bufOffset, nbuffers := 0, len(buffers) - hasValidityBitmap := internal.DefaultHasValidityBitmap(arr.DataType().ID()) - if nbuffers > 0 && !hasValidityBitmap { - nbuffers-- - bufOffset++ - } - - if nbuffers == 0 { - return carr - } - - tr.bufs = make([][]byte, 0, nbuffers) - cbufs := allocateBufferMallocatorPtrArr(alloc, nbuffers) - for i, b := range buffers[bufOffset:] { - if b != nil { - raw := alloc.Allocate(b.Len()) - copy(raw, b.Bytes()) - tr.bufs = append(tr.bufs, raw) - cbufs[i] = (*C.void)(unsafe.Pointer(&raw[0])) - } else { - cbufs[i] = nil - } - } - - carr.n_buffers = C.int64_t(len(cbufs)) - if len(cbufs) > 0 { - carr.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cbufs[0])) - } - - return carr -} - -func createTestStreamObj() *CArrowArrayStream { - return C.get_test_stream() -} - -func arrayStreamTest() *CArrowArrayStream { - st := C.get_test_stream() - C.setup_array_stream_test(2, st) - return st -} - -func exportedStreamTest(reader array.RecordReader) error { - out := C.get_test_stream() - ExportRecordReader(reader, out) - rc := C.test_exported_stream(out) - C.free(unsafe.Pointer(out)) - if rc == 0 { - return nil - } - return fmt.Errorf("Exported stream test failed with return code %d", int(rc)) -} - -func roundTripStreamTest(reader array.RecordReader) error { - out := C.get_test_stream() - ExportRecordReader(reader, out) - rdr, err := ImportCRecordReader(out, nil) - - if err != nil { - return err - } - - for { - _, err = rdr.Read() - if errors.Is(err, io.EOF) { - break - } else if err != nil { - return err - } - } - return nil -} - -func fallibleSchemaTestDeprecated() (err error) { - stream := CArrowArrayStream{} - C.test_stream_schema_fallible(&stream) - - defer func() { - if r := recover(); r != nil { - err = fmt.Errorf("Panicked: %#v", r) - } - }() - _ = ImportCArrayStream(&stream, nil) - return nil -} - -func fallibleSchemaTest() error { - stream := CArrowArrayStream{} - C.test_stream_schema_fallible(&stream) - - _, err := ImportCRecordReader(&stream, nil) - if err != nil { - return err - } - return nil -} - -func confuseGoGc(reader array.RecordReader) error { - out := C.get_test_stream() - ExportRecordReader(reader, out) - rc := C.confuse_go_gc(out, C.uint(rand.Int())) - C.free(unsafe.Pointer(out)) - if rc == 0 { - return nil - } - return fmt.Errorf("Exported stream test failed with return code %d", int(rc)) -} diff --git a/go/arrow/cdata/exports.go b/go/arrow/cdata/exports.go deleted file mode 100644 index 6dbcde831d889..0000000000000 --- a/go/arrow/cdata/exports.go +++ /dev/null @@ -1,157 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cdata - -import ( - "runtime/cgo" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" -) - -// #include -// #include "arrow/c/helpers.h" -// -// typedef const char cchar_t; -// extern int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*); -// extern int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*); -// extern const char* streamGetError(struct ArrowArrayStream*); -// extern void streamRelease(struct ArrowArrayStream*); -// // XXX(https://github.com/apache/arrow-adbc/issues/729) -// int streamGetSchemaTrampoline(struct ArrowArrayStream* stream, struct ArrowSchema* out); -// int streamGetNextTrampoline(struct ArrowArrayStream* stream, struct ArrowArray* out); -// -import "C" - -//export releaseExportedSchema -func releaseExportedSchema(schema *CArrowSchema) { - if C.ArrowSchemaIsReleased(schema) == 1 { - return - } - defer C.ArrowSchemaMarkReleased(schema) - - C.free(unsafe.Pointer(schema.name)) - C.free(unsafe.Pointer(schema.format)) - C.free(unsafe.Pointer(schema.metadata)) - - if schema.n_children == 0 { - return - } - - if schema.dictionary != nil { - C.ArrowSchemaRelease(schema.dictionary) - C.free(unsafe.Pointer(schema.dictionary)) - } - - children := unsafe.Slice(schema.children, schema.n_children) - for _, c := range children { - C.ArrowSchemaRelease(c) - } - - C.free(unsafe.Pointer(children[0])) - C.free(unsafe.Pointer(schema.children)) -} - -// apache/arrow#33864: allocate a new cgo.Handle and store its address -// in a heap-allocated uintptr_t. -func createHandle(hndl cgo.Handle) unsafe.Pointer { - // uintptr_t* hptr = malloc(sizeof(uintptr_t)); - hptr := (*C.uintptr_t)(C.malloc(C.sizeof_uintptr_t)) - // *hptr = (uintptr)hndl; - *hptr = C.uintptr_t(uintptr(hndl)) - return unsafe.Pointer(hptr) -} - -func getHandle(ptr unsafe.Pointer) cgo.Handle { - // uintptr_t* hptr = (uintptr_t*)ptr; - hptr := (*C.uintptr_t)(ptr) - return cgo.Handle((uintptr)(*hptr)) -} - -//export releaseExportedArray -func releaseExportedArray(arr *CArrowArray) { - if C.ArrowArrayIsReleased(arr) == 1 { - return - } - defer C.ArrowArrayMarkReleased(arr) - - if arr.n_buffers > 0 { - C.free(unsafe.Pointer(arr.buffers)) - } - - if arr.dictionary != nil { - C.ArrowArrayRelease(arr.dictionary) - C.free(unsafe.Pointer(arr.dictionary)) - } - - if arr.n_children > 0 { - children := unsafe.Slice(arr.children, arr.n_children) - - for _, c := range children { - C.ArrowArrayRelease(c) - } - C.free(unsafe.Pointer(children[0])) - C.free(unsafe.Pointer(arr.children)) - } - - h := getHandle(arr.private_data) - h.Value().(arrow.ArrayData).Release() - h.Delete() - C.free(unsafe.Pointer(arr.private_data)) -} - -//export streamGetSchema -func streamGetSchema(handle *CArrowArrayStream, out *CArrowSchema) C.int { - h := getHandle(handle.private_data) - rdr := h.Value().(cRecordReader) - return C.int(rdr.getSchema(out)) -} - -//export streamGetNext -func streamGetNext(handle *CArrowArrayStream, out *CArrowArray) C.int { - h := getHandle(handle.private_data) - rdr := h.Value().(cRecordReader) - return C.int(rdr.next(out)) -} - -//export streamGetError -func streamGetError(handle *CArrowArrayStream) *C.cchar_t { - h := getHandle(handle.private_data) - rdr := h.Value().(cRecordReader) - return rdr.getLastError() -} - -//export streamRelease -func streamRelease(handle *CArrowArrayStream) { - h := getHandle(handle.private_data) - h.Value().(cRecordReader).release() - h.Delete() - C.free(unsafe.Pointer(handle.private_data)) - handle.release = nil - handle.private_data = nil -} - -func exportStream(rdr array.RecordReader, out *CArrowArrayStream) { - out.get_schema = (*[0]byte)(C.streamGetSchemaTrampoline) - out.get_next = (*[0]byte)(C.streamGetNextTrampoline) - out.get_last_error = (*[0]byte)(C.streamGetError) - out.release = (*[0]byte)(C.streamRelease) - rdr.Retain() - h := cgo.NewHandle(cRecordReader{rdr: rdr, err: nil}) - out.private_data = createHandle(h) -} diff --git a/go/arrow/cdata/import_allocator.go b/go/arrow/cdata/import_allocator.go deleted file mode 100644 index 4e5c2a7b38c72..0000000000000 --- a/go/arrow/cdata/import_allocator.go +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package cdata - -import ( - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow/internal/debug" -) - -// #include "arrow/c/helpers.h" -// #include -import "C" - -type importAllocator struct { - bufCount int64 - - arr *CArrowArray -} - -func (i *importAllocator) addBuffer() { - atomic.AddInt64(&i.bufCount, 1) -} - -func (*importAllocator) Allocate(int) []byte { - panic("cannot allocate from importAllocator") -} - -func (*importAllocator) Reallocate(int, []byte) []byte { - panic("cannot reallocate from importAllocator") -} - -func (i *importAllocator) Free([]byte) { - debug.Assert(atomic.LoadInt64(&i.bufCount) > 0, "too many releases") - - if atomic.AddInt64(&i.bufCount, -1) == 0 { - defer C.free(unsafe.Pointer(i.arr)) - C.ArrowArrayRelease(i.arr) - if C.ArrowArrayIsReleased(i.arr) != 1 { - panic("did not release C mem") - } - } -} diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go deleted file mode 100644 index 005dda73ff0ec..0000000000000 --- a/go/arrow/cdata/interface.go +++ /dev/null @@ -1,284 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build cgo -// +build cgo - -package cdata - -import ( - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/arrio" - "github.com/apache/arrow/go/v18/arrow/memory" - "golang.org/x/xerrors" -) - -// SchemaFromPtr is a simple helper function to cast a uintptr to a *CArrowSchema -func SchemaFromPtr(ptr uintptr) *CArrowSchema { return (*CArrowSchema)(unsafe.Pointer(ptr)) } - -// ArrayFromPtr is a simple helper function to cast a uintptr to a *CArrowArray -func ArrayFromPtr(ptr uintptr) *CArrowArray { return (*CArrowArray)(unsafe.Pointer(ptr)) } - -// ImportCArrowField takes in an ArrowSchema from the C Data interface, it -// will copy the metadata and type definitions rather than keep direct references -// to them. It is safe to call C.ArrowSchemaRelease after receiving the field -// from this function. -func ImportCArrowField(out *CArrowSchema) (arrow.Field, error) { - return importSchema(out) -} - -// ImportCArrowSchema takes in the ArrowSchema from the C Data Interface, it -// will copy the metadata and schema definitions over from the C object rather -// than keep direct references to them. This function will call ArrowSchemaRelease -// on the passed in schema regardless of whether or not there is an error returned. -// -// This version is intended to take in a schema for a record batch, which means -// that the top level of the schema should be a struct of the schema fields. If -// importing a single array's schema, then use ImportCArrowField instead. -func ImportCArrowSchema(out *CArrowSchema) (*arrow.Schema, error) { - ret, err := importSchema(out) - if err != nil { - return nil, err - } - - return arrow.NewSchema(ret.Type.(*arrow.StructType).Fields(), &ret.Metadata), nil -} - -// ImportCArrayWithType takes a pointer to a C Data ArrowArray and interprets the values -// as an array with the given datatype. If err is not nil, then ArrowArrayRelease must still -// be called on arr to release the memory. -// -// The underlying buffers will not be copied, but will instead be referenced directly -// by the resulting array interface object. The passed in ArrowArray will have it's ownership -// transferred to the resulting arrow.Array via ArrowArrayMove. The underlying array.Data -// object that is owned by the Array will now be the owner of the memory pointer and -// will call ArrowArrayRelease when it is released and garbage collected via runtime.SetFinalizer. -// -// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, -// it does not take ownership of the actual arr object itself. -func ImportCArrayWithType(arr *CArrowArray, dt arrow.DataType) (arrow.Array, error) { - imp, err := importCArrayAsType(arr, dt) - if err != nil { - return nil, err - } - defer imp.data.Release() - return array.MakeFromData(imp.data), nil -} - -// ImportCArray takes a pointer to both a C Data ArrowArray and C Data ArrowSchema in order -// to import them into usable Go Objects. If err is not nil, then ArrowArrayRelease must still -// be called on arr to release the memory. The ArrowSchemaRelease will be called on the passed in -// schema regardless of whether there is an error or not. -// -// The Schema will be copied with the information used to populate the returned Field, complete -// with metadata. The array will reference the same memory that is referred to by the ArrowArray -// object and take ownership of it as per ImportCArrayWithType. The returned arrow.Array will -// own the C memory and call ArrowArrayRelease when the array.Data object is cleaned up. -// -// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, -// it does not take ownership of the actual arr object itself. -func ImportCArray(arr *CArrowArray, schema *CArrowSchema) (arrow.Field, arrow.Array, error) { - field, err := importSchema(schema) - if err != nil { - return field, nil, err - } - - ret, err := ImportCArrayWithType(arr, field.Type) - return field, ret, err -} - -// ImportCRecordBatchWithSchema is used for importing a Record Batch array when the schema -// is already known such as when receiving record batches through a stream. -// -// All of the semantics regarding memory ownership are the same as when calling -// ImportCRecordBatch directly with a schema. -// -// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, -// it does not take ownership of the actual arr object itself. -func ImportCRecordBatchWithSchema(arr *CArrowArray, sc *arrow.Schema) (arrow.Record, error) { - imp, err := importCArrayAsType(arr, arrow.StructOf(sc.Fields()...)) - if err != nil { - return nil, err - } - defer imp.data.Release() - - st := array.NewStructData(imp.data) - defer st.Release() - - // now that we have our fields, we can split them out into the slice of arrays - // and construct a record batch from them to return. - cols := make([]arrow.Array, st.NumField()) - for i := 0; i < st.NumField(); i++ { - cols[i] = st.Field(i) - } - - return array.NewRecord(sc, cols, int64(st.Len())), nil -} - -// ImportCRecordBatch imports an ArrowArray from C as a record batch. If err is not nil, -// then ArrowArrayRelease must still be called to release the memory. -// -// A record batch is represented in the C Data Interface as a Struct Array whose fields -// are the columns of the record batch. Thus after importing the schema passed in here, -// if it is not a Struct type, this will return an error. As with ImportCArray, the -// columns in the record batch will take ownership of the CArrowArray memory if successful. -// Since ArrowArrayMove is used, it's still safe to call ArrowArrayRelease on the source -// regardless. But if there is an error, it *MUST* be called to ensure there is no memory leak. -// -// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, -// it does not take ownership of the actual arr object itself. -func ImportCRecordBatch(arr *CArrowArray, sc *CArrowSchema) (arrow.Record, error) { - field, err := importSchema(sc) - if err != nil { - return nil, err - } - - if field.Type.ID() != arrow.STRUCT { - return nil, xerrors.New("recordbatch array import must be of struct type") - } - - return ImportCRecordBatchWithSchema(arr, arrow.NewSchema(field.Type.(*arrow.StructType).Fields(), &field.Metadata)) -} - -// ImportCArrayStream creates an arrio.Reader from an ArrowArrayStream taking ownership -// of the underlying stream object via ArrowArrayStreamMove. -// -// The records returned by this reader must be released manually after they are returned. -// The reader itself will release the stream via SetFinalizer when it is garbage collected. -// It will return (nil, io.EOF) from the Read function when there are no more records to return. -// -// NOTE: The reader takes ownership of the underlying memory buffers via ArrowArrayStreamMove, -// it does not take ownership of the actual stream object itself. -// -// Deprecated: This will panic if importing the schema fails (which is possible). -// Prefer ImportCRecordReader instead. -func ImportCArrayStream(stream *CArrowArrayStream, schema *arrow.Schema) arrio.Reader { - reader, err := ImportCRecordReader(stream, schema) - if err != nil { - panic(err) - } - return reader -} - -// ImportCStreamReader creates an arrio.Reader from an ArrowArrayStream taking ownership -// of the underlying stream object via ArrowArrayStreamMove. -// -// The records returned by this reader must be released manually after they are returned. -// The reader itself will release the stream via SetFinalizer when it is garbage collected. -// It will return (nil, io.EOF) from the Read function when there are no more records to return. -// -// NOTE: The reader takes ownership of the underlying memory buffers via ArrowArrayStreamMove, -// it does not take ownership of the actual stream object itself. -func ImportCRecordReader(stream *CArrowArrayStream, schema *arrow.Schema) (arrio.Reader, error) { - out := &nativeCRecordBatchReader{schema: schema} - if err := initReader(out, stream); err != nil { - return nil, err - } - return out, nil -} - -// ExportArrowSchema populates the passed in CArrowSchema with the schema passed in so -// that it can be passed to some consumer of the C Data Interface. The `release` function -// is tied to a callback in order to properly release any memory that was allocated during -// the populating of the struct. Any memory allocated will be allocated using malloc -// which means that it is invisible to the Go Garbage Collector and must be freed manually -// using the callback on the CArrowSchema object. -// -// WARNING: the output ArrowSchema MUST BE ZERO INITIALIZED, or the Go garbage collector -// may error at runtime, due to CGO rules ("the current implementation may sometimes -// cause a runtime error if the contents of the C memory appear to be a Go pointer"). -// You have been warned! -func ExportArrowSchema(schema *arrow.Schema, out *CArrowSchema) { - dummy := arrow.Field{Type: arrow.StructOf(schema.Fields()...), Metadata: schema.Metadata()} - exportField(dummy, out) -} - -// ExportArrowRecordBatch populates the passed in CArrowArray (and optionally the schema too) -// by sharing the memory used for the buffers of each column's arrays. It does not -// copy the data, and will internally increment the reference counters so that releasing -// the record will not free the memory prematurely. -// -// When using CGO, memory passed to C is pinned so that the Go garbage collector won't -// move where it is allocated out from under the C pointer locations, ensuring the C pointers -// stay valid. This is only true until the CGO call returns, at which point the garbage collector -// is free to move things around again. As a result, if the function you're calling is going to -// hold onto the pointers or otherwise continue to reference the memory *after* the call returns, -// you should use the CgoArrowAllocator rather than the GoAllocator (or DefaultAllocator) so that -// the memory which is allocated for the record batch in the first place is allocated in C, -// not by the Go runtime and is therefore not subject to the Garbage collection. -// -// The release function on the populated CArrowArray will properly decrease the reference counts, -// and release the memory if the record has already been released. But since this must be explicitly -// done, make sure it is released so that you do not create a memory leak. -// -// WARNING: the output ArrowArray MUST BE ZERO INITIALIZED, or the Go garbage collector -// may error at runtime, due to CGO rules ("the current implementation may sometimes -// cause a runtime error if the contents of the C memory appear to be a Go pointer"). -// You have been warned! -func ExportArrowRecordBatch(rb arrow.Record, out *CArrowArray, outSchema *CArrowSchema) { - children := make([]arrow.ArrayData, rb.NumCols()) - for i := range rb.Columns() { - children[i] = rb.Column(i).Data() - } - - data := array.NewData(arrow.StructOf(rb.Schema().Fields()...), int(rb.NumRows()), []*memory.Buffer{nil}, - children, 0, 0) - defer data.Release() - arr := array.NewStructData(data) - defer arr.Release() - - if outSchema != nil { - ExportArrowSchema(rb.Schema(), outSchema) - } - - exportArray(arr, out, nil) -} - -// ExportArrowArray populates the CArrowArray that is passed in with the pointers to the memory -// being used by the arrow.Array passed in, in order to share with zero-copy across the C -// Data Interface. See the documentation for ExportArrowRecordBatch for details on how to ensure -// you do not leak memory and prevent unwanted, undefined or strange behaviors. -// -// WARNING: the output ArrowArray MUST BE ZERO INITIALIZED, or the Go garbage collector -// may error at runtime, due to CGO rules ("the current implementation may sometimes -// cause a runtime error if the contents of the C memory appear to be a Go pointer"). -// You have been warned! -func ExportArrowArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { - exportArray(arr, out, outSchema) -} - -// ExportRecordReader populates the CArrowArrayStream that is passed in with the appropriate -// callbacks to be a working ArrowArrayStream utilizing the passed in RecordReader. The -// CArrowArrayStream takes ownership of the RecordReader until the consumer calls the release -// callback, as such it is unnecessary to call Release on the passed in reader unless it has -// previously been retained. -// -// WARNING: the output ArrowArrayStream MUST BE ZERO INITIALIZED, or the Go garbage -// collector may error at runtime, due to CGO rules ("the current implementation may -// sometimes cause a runtime error if the contents of the C memory appear to be a Go -// pointer"). You have been warned! -func ExportRecordReader(reader array.RecordReader, out *CArrowArrayStream) { - exportStream(reader, out) -} - -// ReleaseCArrowArray calls ArrowArrayRelease on the passed in cdata array -func ReleaseCArrowArray(arr *CArrowArray) { releaseArr(arr) } - -// ReleaseCArrowSchema calls ArrowSchemaRelease on the passed in cdata schema -func ReleaseCArrowSchema(schema *CArrowSchema) { releaseSchema(schema) } diff --git a/go/arrow/cdata/test/test_cimport.go b/go/arrow/cdata/test/test_cimport.go deleted file mode 100644 index 5315853fc59ca..0000000000000 --- a/go/arrow/cdata/test/test_cimport.go +++ /dev/null @@ -1,178 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build cdata_test -// +build cdata_test - -package main - -import ( - "fmt" - "runtime" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/cdata" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -// #include -import "C" - -var alloc = memory.NewCheckedAllocator(memory.NewGoAllocator()) - -//export totalAllocated -func totalAllocated() int64 { - return int64(alloc.CurrentAlloc()) -} - -//export runGC -func runGC() { - runtime.GC() -} - -//export importSchema -func importSchema(ptr uintptr) { - schema, err := cdata.ImportCArrowSchema(cdata.SchemaFromPtr(ptr)) - if err != nil { - panic(err) - } - - expectedMetadata := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) - expectedSchema := arrow.NewSchema([]arrow.Field{{Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}}, &expectedMetadata) - if !schema.Equal(expectedSchema) { - panic(fmt.Sprintf("schema didn't match: expected %s, got %s", expectedSchema, schema)) - } - if !schema.Metadata().Equal(expectedMetadata) { - panic(fmt.Sprintf("metadata didn't match: expected %s, got %s", expectedMetadata, schema.Metadata())) - } - - fmt.Println("schema matches! Huzzah!") -} - -//export importRecordBatch -func importRecordBatch(scptr, rbptr uintptr) { - sc := cdata.SchemaFromPtr(scptr) - rb := cdata.ArrayFromPtr(rbptr) - - rec, err := cdata.ImportCRecordBatch(rb, sc) - if err != nil { - panic(err) - } - defer rec.Release() - - expectedMetadata := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) - expectedSchema := arrow.NewSchema([]arrow.Field{{Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}}, &expectedMetadata) - - bldr := array.NewRecordBuilder(alloc, expectedSchema) - defer bldr.Release() - - lb := bldr.Field(0).(*array.ListBuilder) - vb := lb.ValueBuilder().(*array.Int32Builder) - - // [[[1], [], None [2, 42]]] - lb.Append(true) - vb.Append(int32(1)) - - lb.Append(true) - lb.Append(false) - - lb.Append(true) - vb.AppendValues([]int32{2, 42}, nil) - - expectedRec := bldr.NewRecord() - defer expectedRec.Release() - - if !array.RecordEqual(expectedRec, rec) { - panic(fmt.Sprintf("records didn't match: expected %s\n got %s", expectedRec, rec)) - } - - fmt.Println("record batch matches huzzah!") -} - -func makeSchema() *arrow.Schema { - meta := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) - return arrow.NewSchema([]arrow.Field{ - {Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}, - }, &meta) -} - -func makeBatch() arrow.Record { - bldr := array.NewRecordBuilder(alloc, makeSchema()) - defer bldr.Release() - - fbldr := bldr.Field(0).(*array.ListBuilder) - valbldr := fbldr.ValueBuilder().(*array.Int32Builder) - - fbldr.Append(true) - valbldr.Append(1) - - fbldr.Append(true) - fbldr.AppendNull() - fbldr.Append(true) - valbldr.Append(2) - valbldr.Append(42) - - return bldr.NewRecord() -} - -//export exportSchema -func exportSchema(schema uintptr) { - cdata.ExportArrowSchema(makeSchema(), cdata.SchemaFromPtr(schema)) -} - -//export exportRecordBatch -func exportRecordBatch(schema, record uintptr) { - batch := makeBatch() - defer batch.Release() - - cdata.ExportArrowRecordBatch(batch, cdata.ArrayFromPtr(record), cdata.SchemaFromPtr(schema)) -} - -//export importThenExportSchema -func importThenExportSchema(input, output uintptr) { - schema, err := cdata.ImportCArrowSchema(cdata.SchemaFromPtr(input)) - if err != nil { - panic(err) - } - - cdata.ExportArrowSchema(schema, cdata.SchemaFromPtr(output)) -} - -//export importThenExportRecord -func importThenExportRecord(schemaIn, arrIn uintptr, schemaOut, arrOut uintptr) { - rec, err := cdata.ImportCRecordBatch(cdata.ArrayFromPtr(arrIn), cdata.SchemaFromPtr(schemaIn)) - if err != nil { - panic(err) - } - - defer rec.Release() - cdata.ExportArrowRecordBatch(rec, cdata.ArrayFromPtr(arrOut), cdata.SchemaFromPtr(schemaOut)) -} - -//export roundtripArray -func roundtripArray(arrIn, schema, arrOut uintptr) { - _, arr, err := cdata.ImportCArray(cdata.ArrayFromPtr(arrIn), cdata.SchemaFromPtr(schema)) - if err != nil { - panic(err) - } - defer arr.Release() - - outArr := cdata.ArrayFromPtr(arrOut) - cdata.ExportArrowArray(arr, outArr, nil) -} - -func main() {} diff --git a/go/arrow/cdata/test/test_export_to_cgo.py b/go/arrow/cdata/test/test_export_to_cgo.py deleted file mode 100644 index 4b669f6424437..0000000000000 --- a/go/arrow/cdata/test/test_export_to_cgo.py +++ /dev/null @@ -1,230 +0,0 @@ -#!/usr/bin/env python3 -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import contextlib -import gc -import os -import unittest - -import pyarrow as pa -from pyarrow.cffi import ffi - - -def load_cgotest(): - # XXX what about Darwin? - libext = 'so' - if os.name == 'nt': - libext = 'dll' - - ffi.cdef( - """ - long long totalAllocated(); - void importSchema(uintptr_t ptr); - void importRecordBatch(uintptr_t scptr, uintptr_t rbptr); - void runGC(); - void exportSchema(uintptr_t ptr); - void exportRecordBatch(uintptr_t schema, uintptr_t record); - void importThenExportSchema(uintptr_t input, uintptr_t output); - void importThenExportRecord(uintptr_t schemaIn, uintptr_t arrIn, - uintptr_t schemaOut, uintptr_t arrOut); - void roundtripArray(uintptr_t arrIn, uintptr_t schema, uintptr_t arrOut); - """) - return ffi.dlopen(f'./cgotest.{libext}') - - -cgotest = load_cgotest() - -class BaseTestGoPython(unittest.TestCase): - def setUp(self): - self.c_schema = ffi.new("struct ArrowSchema*") - self.ptr_schema = int(ffi.cast("uintptr_t", self.c_schema)) - self.c_array = ffi.new("struct ArrowArray*") - self.ptr_array = int(ffi.cast("uintptr_t", self.c_array)) - - def make_schema(self): - return pa.schema([('ints', pa.list_(pa.int32()))], - metadata={b'key1': b'value1'}) - - def make_batch(self): - return pa.record_batch([[[1], [], None, [2, 42]]], - self.make_schema()) - - def run_gc(self): - # Several Go GC runs can be required to run all finalizers - for i in range(5): - cgotest.runGC() - gc.collect() - - @contextlib.contextmanager - def assert_pyarrow_memory_released(self): - self.run_gc() - old_allocated = pa.total_allocated_bytes() - old_go_allocated = cgotest.totalAllocated() - yield - self.run_gc() - diff = pa.total_allocated_bytes() - old_allocated - godiff = cgotest.totalAllocated() - old_go_allocated - self.assertEqual( - pa.total_allocated_bytes(), old_allocated, - f"PyArrow memory was not adequately released: {diff} bytes lost") - self.assertEqual( - cgotest.totalAllocated(), old_go_allocated, - f"Go memory was not properly released: {godiff} bytes lost") - - -class TestPythonToGo(BaseTestGoPython): - - def test_schema(self): - with self.assert_pyarrow_memory_released(): - self.make_schema()._export_to_c(self.ptr_schema) - # Will panic if expectations are not met - cgotest.importSchema(self.ptr_schema) - - def test_record_batch(self): - with self.assert_pyarrow_memory_released(): - self.make_schema()._export_to_c(self.ptr_schema) - self.make_batch()._export_to_c(self.ptr_array) - # Will panic if expectations are not met - cgotest.importRecordBatch(self.ptr_schema, self.ptr_array) - - -class TestGoToPython(BaseTestGoPython): - - def test_get_schema(self): - with self.assert_pyarrow_memory_released(): - cgotest.exportSchema(self.ptr_schema) - - sc = pa.Schema._import_from_c(self.ptr_schema) - assert sc == self.make_schema() - - def test_get_batch(self): - with self.assert_pyarrow_memory_released(): - cgotest.exportRecordBatch(self.ptr_schema, self.ptr_array) - arrnew = pa.RecordBatch._import_from_c(self.ptr_array, self.ptr_schema) - assert arrnew == self.make_batch() - del arrnew - -class TestRoundTrip(BaseTestGoPython): - - def test_schema_roundtrip(self): - with self.assert_pyarrow_memory_released(): - # make sure that Python -> Go -> Python ends up with - # the same exact schema - schema = self.make_schema() - schema._export_to_c(self.ptr_schema) - del schema - - c_schema = ffi.new("struct ArrowSchema*") - ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - - cgotest.importThenExportSchema(self.ptr_schema, ptr_schema) - schema_new = pa.Schema._import_from_c(ptr_schema) - assert schema_new == self.make_schema() - del c_schema - - def test_batch_roundtrip(self): - with self.assert_pyarrow_memory_released(): - # make sure that Python -> Go -> Python for record - # batches works correctly and gets the same data in the end - schema = self.make_schema() - batch = self.make_batch() - schema._export_to_c(self.ptr_schema) - batch._export_to_c(self.ptr_array) - del schema - del batch - - c_schema = ffi.new("struct ArrowSchema*") - c_batch = ffi.new("struct ArrowArray*") - ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - ptr_batch = int(ffi.cast("uintptr_t", c_batch)) - - cgotest.importThenExportRecord(self.ptr_schema, self.ptr_array, - ptr_schema, ptr_batch) - batch_new = pa.RecordBatch._import_from_c(ptr_batch, ptr_schema) - assert batch_new == self.make_batch() - del batch_new - del c_schema - del c_batch - - # commented out types can be uncommented after - # GH-14875 is addressed - _test_pyarrow_types = [ - pa.null(), - pa.bool_(), - pa.int32(), - pa.time32("s"), - pa.time64("us"), - pa.date32(), - pa.timestamp("us"), - pa.timestamp("us", tz="UTC"), - pa.timestamp("us", tz="Europe/Paris"), - pa.duration("s"), - pa.duration("ms"), - pa.duration("us"), - pa.duration("ns"), - pa.float16(), - pa.float32(), - pa.float64(), - pa.decimal128(19, 4), - pa.string(), - pa.binary(), - pa.binary(10), - pa.large_string(), - pa.large_binary(), - pa.list_(pa.int32()), - pa.list_(pa.int32(), 2), - pa.large_list(pa.uint16()), - pa.struct([ - pa.field("a", pa.int32()), - pa.field("b", pa.int8()), - pa.field("c", pa.string()), - ]), - pa.struct([ - pa.field("a", pa.int32(), nullable=False), - pa.field("b", pa.int8(), nullable=False), - pa.field("c", pa.string()), - ]), - pa.dictionary(pa.int8(), pa.int64()), - pa.dictionary(pa.int8(), pa.string()), - pa.map_(pa.string(), pa.int32()), - pa.map_(pa.int64(), pa.int32()), - # pa.run_end_encoded(pa.int16(), pa.int64()), - ] - - def test_empty_roundtrip(self): - for typ in self._test_pyarrow_types: - with self.subTest(typ=typ): - with self.assert_pyarrow_memory_released(): - a = pa.array([], typ) - a._export_to_c(self.ptr_array) - typ._export_to_c(self.ptr_schema) - - c_arr = ffi.new("struct ArrowArray*") - ptr_arr = int(ffi.cast("uintptr_t", c_arr)) - - cgotest.roundtripArray(self.ptr_array, self.ptr_schema, ptr_arr) - b = pa.Array._import_from_c(ptr_arr, typ) - b.validate(full=True) - assert a.to_pylist() == b.to_pylist() - assert a.type == b.type - del a - del b - -if __name__ == '__main__': - unittest.main(verbosity=2) diff --git a/go/arrow/cdata/trampoline.c b/go/arrow/cdata/trampoline.c deleted file mode 100644 index 01db13fab4845..0000000000000 --- a/go/arrow/cdata/trampoline.c +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "arrow/c/abi.h" - -int streamGetSchema(struct ArrowArrayStream*, struct ArrowSchema*); -int streamGetNext(struct ArrowArrayStream*, struct ArrowArray*); - -int streamGetSchemaTrampoline(struct ArrowArrayStream* stream, struct ArrowSchema* out) { - // XXX(https://github.com/apache/arrow-adbc/issues/729) - memset(out, 0, sizeof(*out)); - return streamGetSchema(stream, out); -} - -int streamGetNextTrampoline(struct ArrowArrayStream* stream, struct ArrowArray* out) { - // XXX(https://github.com/apache/arrow-adbc/issues/729) - memset(out, 0, sizeof(*out)); - return streamGetNext(stream, out); -} diff --git a/go/arrow/cdata/utils.h b/go/arrow/cdata/utils.h deleted file mode 100644 index dda46b72b728b..0000000000000 --- a/go/arrow/cdata/utils.h +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build cgo -// +build test - -// metadata keys 1: {"key1", "key2"} -// metadata values 1: {"", "bar"} -static const char kEncodedMeta1LE[] = { - 2, 0, 0, 0, - 4, 0, 0, 0, 'k', 'e', 'y', '1', 0, 0, 0, 0, - 4, 0, 0, 0, 'k', 'e', 'y', '2', 3, 0, 0, 0, 'b', 'a', 'r'}; - -static const char kEncodedMeta1BE[] = { - 0, 0, 0, 2, - 0, 0, 0, 4, 'k', 'e', 'y', '1', 0, 0, 0, 0, - 0, 0, 0, 4, 'k', 'e', 'y', '2', 0, 0, 0, 3, 'b', 'a', 'r'}; - -static const char* kMetadataKeys2[] = {"key"}; -static const char* kMetadataValues2[] = {"abcde"}; - -// metadata keys 2: {"key"} -// metadata values 2: {"abcde"} -static const char kEncodedMeta2LE[] = { - 1, 0, 0, 0, - 3, 0, 0, 0, 'k', 'e', 'y', 5, 0, 0, 0, 'a', 'b', 'c', 'd', 'e'}; - -static const char kEncodedMeta2BE[] = { - 0, 0, 0, 1, - 0, 0, 0, 3, 'k', 'e', 'y', 0, 0, 0, 5, 'a', 'b', 'c', 'd', 'e'}; - - diff --git a/go/arrow/compare.go b/go/arrow/compare.go deleted file mode 100644 index 58569b332c4f1..0000000000000 --- a/go/arrow/compare.go +++ /dev/null @@ -1,153 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package arrow - -import ( - "reflect" -) - -type typeEqualsConfig struct { - metadata bool -} - -// TypeEqualOption is a functional option type used for configuring type -// equality checks. -type TypeEqualOption func(*typeEqualsConfig) - -// CheckMetadata is an option for TypeEqual that allows checking for metadata -// equality besides type equality. It only makes sense for types with metadata. -func CheckMetadata() TypeEqualOption { - return func(cfg *typeEqualsConfig) { - cfg.metadata = true - } -} - -// TypeEqual checks if two DataType are the same, optionally checking metadata -// equality for STRUCT types. -func TypeEqual(left, right DataType, opts ...TypeEqualOption) bool { - var cfg typeEqualsConfig - for _, opt := range opts { - opt(&cfg) - } - - switch { - case left == nil || right == nil: - return left == nil && right == nil - case left.ID() != right.ID(): - return false - } - - switch l := left.(type) { - case ExtensionType: - return l.ExtensionEquals(right.(ExtensionType)) - case *ListType: - if !TypeEqual(l.Elem(), right.(*ListType).Elem(), opts...) { - return false - } - if cfg.metadata && !l.elem.Metadata.Equal(right.(*ListType).elem.Metadata) { - return false - } - return l.elem.Nullable == right.(*ListType).elem.Nullable - case *FixedSizeListType: - if !TypeEqual(l.Elem(), right.(*FixedSizeListType).Elem(), opts...) { - return false - } - if cfg.metadata && !l.elem.Metadata.Equal(right.(*FixedSizeListType).elem.Metadata) { - return false - } - return l.n == right.(*FixedSizeListType).n && l.elem.Nullable == right.(*FixedSizeListType).elem.Nullable - case *MapType: - if !TypeEqual(l.KeyType(), right.(*MapType).KeyType(), opts...) { - return false - } - if !TypeEqual(l.ItemType(), right.(*MapType).ItemType(), opts...) { - return false - } - if l.KeyField().Nullable != right.(*MapType).KeyField().Nullable { - return false - } - if l.ItemField().Nullable != right.(*MapType).ItemField().Nullable { - return false - } - if cfg.metadata { - if !l.KeyField().Metadata.Equal(right.(*MapType).KeyField().Metadata) { - return false - } - if !l.ItemField().Metadata.Equal(right.(*MapType).ItemField().Metadata) { - return false - } - } - return true - case *StructType: - r := right.(*StructType) - switch { - case len(l.fields) != len(r.fields): - return false - case !reflect.DeepEqual(l.index, r.index): - return false - } - for i := range l.fields { - leftField, rightField := l.fields[i], r.fields[i] - switch { - case leftField.Name != rightField.Name: - return false - case leftField.Nullable != rightField.Nullable: - return false - case !TypeEqual(leftField.Type, rightField.Type, opts...): - return false - case cfg.metadata && !leftField.Metadata.Equal(rightField.Metadata): - return false - } - } - return true - case UnionType: - r := right.(UnionType) - if l.Mode() != r.Mode() { - return false - } - - if !reflect.DeepEqual(l.ChildIDs(), r.ChildIDs()) { - return false - } - - for i := range l.Fields() { - leftField, rightField := l.Fields()[i], r.Fields()[i] - switch { - case leftField.Name != rightField.Name: - return false - case leftField.Nullable != rightField.Nullable: - return false - case !TypeEqual(leftField.Type, rightField.Type, opts...): - return false - case cfg.metadata && !leftField.Metadata.Equal(rightField.Metadata): - return false - case l.TypeCodes()[i] != r.TypeCodes()[i]: - return false - } - } - return true - case *TimestampType: - r := right.(*TimestampType) - return l.Unit == r.Unit && l.TimeZone == r.TimeZone - case *RunEndEncodedType: - r := right.(*RunEndEncodedType) - return TypeEqual(l.Encoded(), r.Encoded(), opts...) && - TypeEqual(l.runEnds, r.runEnds, opts...) - default: - return reflect.DeepEqual(left, right) - } -} diff --git a/go/arrow/compare_test.go b/go/arrow/compare_test.go deleted file mode 100644 index ca87621eadcb9..0000000000000 --- a/go/arrow/compare_test.go +++ /dev/null @@ -1,397 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package arrow - -import ( - "testing" - "time" -) - -func TestTypeEqual(t *testing.T) { - tests := []struct { - left, right DataType - want bool - checkMetadata bool - }{ - { - nil, nil, true, false, - }, - { - nil, PrimitiveTypes.Uint8, false, false, - }, - { - PrimitiveTypes.Float32, nil, false, false, - }, - { - PrimitiveTypes.Float64, PrimitiveTypes.Int32, false, false, - }, - { - Null, Null, true, false, - }, - { - Null, new(NullType), true, false, - }, - { - &BinaryType{}, &StringType{}, false, false, - }, - { - &LargeBinaryType{}, &LargeStringType{}, false, false, - }, - { - BinaryTypes.LargeBinary, &LargeBinaryType{}, true, false, - }, - { - BinaryTypes.LargeString, &LargeStringType{}, true, false, - }, - { - &Time32Type{Unit: Second}, &Time32Type{Unit: Second}, true, false, - }, - { - &Time32Type{Unit: Millisecond}, &Time32Type{Unit: Second}, false, false, - }, - { - &Time64Type{Unit: Nanosecond}, &Time64Type{Unit: Nanosecond}, true, false, - }, - { - &Time64Type{Unit: Nanosecond}, &Time64Type{Unit: Microsecond}, false, false, - }, - { - &TimestampType{Unit: Second, TimeZone: "UTC"}, &TimestampType{Unit: Second, TimeZone: "UTC"}, true, false, - }, - { - &TimestampType{Unit: Microsecond, TimeZone: "UTC"}, &TimestampType{Unit: Millisecond, TimeZone: "UTC"}, false, false, - }, - { - &TimestampType{Unit: Second, TimeZone: "UTC"}, &TimestampType{Unit: Second, TimeZone: "CET"}, false, false, - }, - { - &TimestampType{Unit: Second, TimeZone: "UTC"}, &TimestampType{Unit: Nanosecond, TimeZone: "CET"}, false, false, - }, - { - &ListType{elem: Field{Type: PrimitiveTypes.Uint64}}, &ListType{elem: Field{Type: PrimitiveTypes.Uint64}}, true, false, - }, - { - &ListType{elem: Field{Type: PrimitiveTypes.Uint64}}, &ListType{elem: Field{Type: PrimitiveTypes.Uint32}}, false, false, - }, - { - &ListType{elem: Field{Type: &Time32Type{Unit: Millisecond}}}, &ListType{elem: Field{Type: &Time32Type{Unit: Millisecond}}}, true, false, - }, - { - &ListType{elem: Field{Type: &Time32Type{Unit: Millisecond}}}, &ListType{elem: Field{Type: &Time32Type{Unit: Second}}}, false, false, - }, - { - &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint16}}}}, &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint16}}}}, true, false, - }, - { - &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint16}}}}, &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint8}}}}, false, false, - }, - { - &ListType{elem: Field{Type: &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint16}}}}}}, &ListType{elem: Field{Type: &ListType{elem: Field{Type: PrimitiveTypes.Uint8}}}}, false, false, - }, - { - &ListType{elem: Field{Type: PrimitiveTypes.Uint64, Nullable: true}}, &ListType{elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, false, true, - }, - { - &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, &FixedSizeListType{n: 3, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, false, true, - }, - { - &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, true, true, - }, - { - &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: false}}, &FixedSizeListType{n: 2, elem: Field{Type: PrimitiveTypes.Uint64, Nullable: true}}, false, true, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - false, true, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - false, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f0", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f0": {0}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - false, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - false, true, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - false, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - }, - &StructType{ - fields: []Field{ - {Name: "f2", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f2": {0}}, - }, - false, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - true, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - true, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - meta: MetadataFrom(map[string]string{"k1": "v1", "k2": "v2"}), - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - meta: MetadataFrom(map[string]string{"k2": "v2", "k1": "v1"}), - }, - true, true, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - meta: MetadataFrom(map[string]string{"k1": "v1"}), - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0}}, - meta: MetadataFrom(map[string]string{"k1": "v2"}), - }, - true, false, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true, Metadata: MetadataFrom(map[string]string{"k1": "v1"})}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true, Metadata: MetadataFrom(map[string]string{"k1": "v2"})}, - {Name: "f2", Type: PrimitiveTypes.Float32, Nullable: false}, - }, - index: map[string][]int{"f1": {0}, "f2": {1}}, - }, - false, true, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0, 1}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0, 1}}, - }, - true, true, - }, - { - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - }, - index: map[string][]int{"f1": {0, 1}}, - }, - &StructType{ - fields: []Field{ - {Name: "f1", Type: PrimitiveTypes.Uint16, Nullable: true}, - {Name: "f1", Type: PrimitiveTypes.Uint32, Nullable: true}, - }, - index: map[string][]int{"f1": {0, 1}}, - }, - false, true, - }, - { - MapOf(BinaryTypes.String, PrimitiveTypes.Int32), - MapOf(BinaryTypes.String, PrimitiveTypes.Int32), - true, false, - }, - { - MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_ns), - MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_ns), - true, false, - }, - { - MapOf(BinaryTypes.String, &TimestampType{ - Unit: 0, - TimeZone: "UTC", - loc: time.UTC, - }), - MapOf(BinaryTypes.String, &TimestampType{ - Unit: 0, - TimeZone: "UTC", - }), - true, false, - }, - { - MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_ns), - MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_us), - false, false, - }, - { - MapOf(BinaryTypes.String, FixedWidthTypes.Timestamp_ns), - MapOf(PrimitiveTypes.Int32, FixedWidthTypes.Timestamp_ns), - false, false, - }, - { - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), - true, true, - }, - { - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v2"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v2"})), - true, false, - }, - { - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v2"})), - false, true, - }, - { - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v1"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), - MapOfWithMetadata(BinaryTypes.String, MetadataFrom(map[string]string{"key": "v2"}), FixedWidthTypes.Timestamp_ns, MetadataFrom(map[string]string{"item": "v1"})), - false, true, - }, - } - - for _, test := range tests { - t.Run("", func(t *testing.T) { - var got bool - if test.checkMetadata { - got = TypeEqual(test.left, test.right, CheckMetadata()) - } else { - got = TypeEqual(test.left, test.right) - } - if got != test.want { - t.Fatalf("TypeEqual(%v, %v, %v): got=%v, want=%v", test.left, test.right, test.checkMetadata, got, test.want) - } - }) - } -} diff --git a/go/arrow/compute/arithmetic.go b/go/arrow/compute/arithmetic.go deleted file mode 100644 index 51ca027d53375..0000000000000 --- a/go/arrow/compute/arithmetic.go +++ /dev/null @@ -1,1229 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "context" - "fmt" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/scalar" -) - -type ( - RoundOptions = kernels.RoundOptions - RoundMode = kernels.RoundMode - RoundToMultipleOptions = kernels.RoundToMultipleOptions -) - -const ( - // Round to nearest integer less than or equal in magnitude (aka "floor") - RoundDown = kernels.RoundDown - // Round to nearest integer greater than or equal in magnitude (aka "ceil") - RoundUp = kernels.RoundUp - // Get integral part without fractional digits (aka "trunc") - RoundTowardsZero = kernels.TowardsZero - // Round negative values with DOWN and positive values with UP - RoundTowardsInfinity = kernels.AwayFromZero - // Round ties with DOWN (aka "round half towards negative infinity") - RoundHalfDown = kernels.HalfDown - // Round ties with UP (aka "round half towards positive infinity") - RoundHalfUp = kernels.HalfUp - // Round ties with TowardsZero (aka "round half away from infinity") - RoundHalfTowardsZero = kernels.HalfTowardsZero - // Round ties with AwayFromZero (aka "round half towards infinity") - RoundHalfTowardsInfinity = kernels.HalfAwayFromZero - // Round ties to nearest even integer - RoundHalfToEven = kernels.HalfToEven - // Round ties to nearest odd integer - RoundHalfToOdd = kernels.HalfToOdd -) - -var ( - DefaultRoundOptions = RoundOptions{NDigits: 0, Mode: RoundHalfToEven} - DefaultRoundToMultipleOptions = RoundToMultipleOptions{ - Multiple: scalar.NewFloat64Scalar(1), Mode: RoundHalfToEven} -) - -type arithmeticFunction struct { - ScalarFunction - - promote decimalPromotion -} - -func (fn *arithmeticFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - return execInternal(ctx, fn, opts, -1, args...) -} - -func (fn *arithmeticFunction) checkDecimals(vals ...arrow.DataType) error { - if !hasDecimal(vals...) { - return nil - } - - if len(vals) != 2 { - return nil - } - - if fn.promote == decPromoteNone { - return fmt.Errorf("%w: invalid decimal function: %s", arrow.ErrInvalid, fn.name) - } - - return castBinaryDecimalArgs(fn.promote, vals...) -} - -func (fn *arithmeticFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { - if err := fn.checkArity(len(vals)); err != nil { - return nil, err - } - - if err := fn.checkDecimals(vals...); err != nil { - return nil, err - } - - if kn, err := fn.DispatchExact(vals...); err == nil { - return kn, nil - } - - ensureDictionaryDecoded(vals...) - - // only promote types for binary funcs - if len(vals) == 2 { - replaceNullWithOtherType(vals...) - if unit, istime := commonTemporalResolution(vals...); istime { - replaceTemporalTypes(unit, vals...) - } else { - if dt := commonNumeric(vals...); dt != nil { - replaceTypes(dt, vals...) - } - } - } - - return fn.DispatchExact(vals...) -} - -// an arithmetic function which promotes integers and decimal -// arguments to doubles. -type arithmeticFloatingPointFunc struct { - arithmeticFunction -} - -func (fn *arithmeticFloatingPointFunc) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - return execInternal(ctx, fn, opts, -1, args...) -} - -func (fn *arithmeticFloatingPointFunc) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { - if err := fn.checkArity(len(vals)); err != nil { - return nil, err - } - - if kn, err := fn.DispatchExact(vals...); err == nil { - return kn, nil - } - - ensureDictionaryDecoded(vals...) - - if len(vals) == 2 { - replaceNullWithOtherType(vals...) - } - - for i, v := range vals { - if arrow.IsInteger(v.ID()) || arrow.IsDecimal(v.ID()) { - vals[i] = arrow.PrimitiveTypes.Float64 - } - } - - if dt := commonNumeric(vals...); dt != nil { - replaceTypes(dt, vals...) - } - - return fn.DispatchExact(vals...) -} - -// function that promotes only decimal arguments to float64 -type arithmeticDecimalToFloatingPointFunc struct { - arithmeticFunction -} - -func (fn *arithmeticDecimalToFloatingPointFunc) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - return execInternal(ctx, fn, opts, -1, args...) -} - -func (fn *arithmeticDecimalToFloatingPointFunc) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { - if err := fn.checkArity(len(vals)); err != nil { - return nil, err - } - - if kn, err := fn.DispatchExact(vals...); err == nil { - return kn, nil - } - - ensureDictionaryDecoded(vals...) - if len(vals) == 2 { - replaceNullWithOtherType(vals...) - } - - for i, t := range vals { - if arrow.IsDecimal(t.ID()) { - vals[i] = arrow.PrimitiveTypes.Float64 - } - } - - if dt := commonNumeric(vals...); dt != nil { - replaceTypes(dt, vals...) - } - - return fn.DispatchExact(vals...) -} - -// function that promotes only integer arguments to float64 -type arithmeticIntegerToFloatingPointFunc struct { - arithmeticFunction -} - -func (fn *arithmeticIntegerToFloatingPointFunc) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - return execInternal(ctx, fn, opts, -1, args...) -} - -func (fn *arithmeticIntegerToFloatingPointFunc) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { - if err := fn.checkArity(len(vals)); err != nil { - return nil, err - } - - if err := fn.checkDecimals(vals...); err != nil { - return nil, err - } - - if kn, err := fn.DispatchExact(vals...); err == nil { - return kn, nil - } - - ensureDictionaryDecoded(vals...) - if len(vals) == 2 { - replaceNullWithOtherType(vals...) - } - - for i, t := range vals { - if arrow.IsInteger(t.ID()) { - vals[i] = arrow.PrimitiveTypes.Float64 - } - } - - if dt := commonNumeric(vals...); dt != nil { - replaceTypes(dt, vals...) - } - - return fn.DispatchExact(vals...) -} - -var ( - absoluteValueUncheckedDoc = FunctionDoc{ - Summary: "Calculate the absolute value of the argument, element-wise", - Description: `Results will wrap around on integer overflow -Use function "abs" if you want overflows to return an error`, - ArgNames: []string{"x"}, - } - absoluteValueDoc = FunctionDoc{ - Summary: "Calculate the absolute value of the argument element-wise", - Description: `This function returns an error on overflow. For a variant that -won't fail on overflow, use function "abs_unchecked"`, - ArgNames: []string{"x"}, - } - addUncheckedDoc = FunctionDoc{ - Summary: "Add the arguments element-wise", - Description: `Results will wrap around on integer overflow -Use the function "add" if you want overflow to return an error`, - ArgNames: []string{"x", "y"}, - } - addDoc = FunctionDoc{ - Summary: "Add the arguments element-wise", - Description: `This function returns an error on overflow. -For a variant that won't fail on overflow, use function "add_unchecked"`, - ArgNames: []string{"x", "y"}, - } - subUncheckedDoc = FunctionDoc{ - Summary: "Subtract the arguments element-wise", - Description: `This Results will wrap around on integer overflow. -Use the function "sub" if you want overflow to return an error`, - ArgNames: []string{"x", "y"}, - } - subDoc = FunctionDoc{ - Summary: "Subtract the arguments element-wise", - Description: `This function returns an error on overflow. -For a variant that won't fail on overflow, use the function "sub_unchecked"`, - ArgNames: []string{"x", "y"}, - } - mulUncheckedDoc = FunctionDoc{ - Summary: "Multiply the arguments element-wise", - Description: `Results will wrap around on integer overflow. -Use function "multiply" if you want overflow to return an error`, - ArgNames: []string{"x", "y"}, - } - mulDoc = FunctionDoc{ - Summary: "Multiply the arguments element-wise", - Description: `This function returns an error on overflow. -For a variant that won't fail on overflow, use the function -"multiply_unchecked"`, - ArgNames: []string{"x", "y"}, - } - divUncheckedDoc = FunctionDoc{ - Summary: "Divide the arguments element-wise", - Description: `Integer division by zero returns an error. However integer -overflow wraps around, and floating-point division by zero returns Inf. -Use the function "divide" if you want to get an error in all the -aforementioned cases.`, - ArgNames: []string{"dividend", "divisor"}, - } - divDoc = FunctionDoc{ - Summary: "Divide the arguments element-wise", - Description: `An error is returned when trying to divide by zero, -or when integer overflow is encountered.`, - ArgNames: []string{"dividend", "divisor"}, - } - negateUncheckedDoc = FunctionDoc{ - Summary: "Negate the argument element-wise", - Description: `Results will wrap around on integer overflow -Use function "negate" if you want overflow to return an error`, - ArgNames: []string{"x"}, - } - negateDoc = FunctionDoc{ - Summary: "Negate the argument element-wise", - Description: `This function returns an error on overflow. For a variant -that doesn't fail on overflow, use the function "negate_unchecked".`, - ArgNames: []string{"x"}, - } - powUncheckedDoc = FunctionDoc{ - Summary: "Raise argument to a power element-wise", - Description: `Integers to negative integer powers return an error. -However, integer overflow wraps around. If either base or exponent is null -the result will be null.`, - ArgNames: []string{"base", "exponent"}, - } - powDoc = FunctionDoc{ - Summary: "Raise argument to a power element-wise", - Description: `An error is returned when an integer is raised to a negative -power or an integer overflow occurs.`, - ArgNames: []string{"base", "exponent"}, - } - sqrtUncheckedDoc = FunctionDoc{ - Summary: "Takes the square root of arguments element-wise", - Description: `A negative argument returns an NaN. For a variant that returns -an error, use function "sqrt"`, - ArgNames: []string{"x"}, - } - sqrtDoc = FunctionDoc{ - Summary: "Takes the square root of arguments element-wise", - Description: `A negative argument returns an error. For a variant that -instead returns NaN, use function "sqrt_unchecked"`, - ArgNames: []string{"x"}, - } - signDoc = FunctionDoc{ - Summary: "Get the signedness of the arguments element-wise", - Description: `Output is -1 if <0, 1 if >0 and 0 for 0. -NaN values return NaN. Integral values return signedness as Int8, -and floating-point values return it with the same type as the input values.`, - ArgNames: []string{"x"}, - } - bitWiseNotDoc = FunctionDoc{ - Summary: "Bit-wise negate the arguments element-wise", - Description: "Null values return null", - ArgNames: []string{"x"}, - } - bitWiseAndDoc = FunctionDoc{ - Summary: "Bit-wise AND the arguments element-wise", - Description: "Null values return null", - ArgNames: []string{"x", "y"}, - } - bitWiseOrDoc = FunctionDoc{ - Summary: "Bit-wise OR the arguments element-wise", - Description: "Null values return null", - ArgNames: []string{"x", "y"}, - } - bitWiseXorDoc = FunctionDoc{ - Summary: "Bit-wise XOR the arguments element-wise", - Description: "Null values return null", - ArgNames: []string{"x", "y"}, - } - shiftLeftUncheckedDoc = FunctionDoc{ - Summary: "Left shift `x` by `y`", - Description: `The shift operates as if on the two's complement representation -of the number. In other words, this is equivalent to multiplying "x" by 2 -to the power of "y", even if overflow occurs. -"x" is returned if "y" (the amount to shift by) is (1) negative or (2) -greater than or equal to the precision of "x". -Use function "shift_left" if you want an invalid shift amount to -return an error.`, - ArgNames: []string{"x", "y"}, - } - shiftLeftDoc = FunctionDoc{ - Summary: "Left shift `x` by `y`", - Description: `The shift operates as if on the two's complement representation -of the number. In other words, this is equivalent to multiplying "x" by 2 -to the power of "y", even if overflow occurs. -An error is raised if "y" (the amount to shift by) is (1) negative or (2) -greater than or equal to the precision of "x". -See "shift_left_unchecked" for a variant that doesn't fail for an invalid -shift amount.`, - ArgNames: []string{"x", "y"}, - } - shiftRightUncheckedDoc = FunctionDoc{ - Summary: "Right shift `x` by `y`", - Description: `This is equivalent to dividing "x" by 2 to the power "y". -"x" is returned if "y" (the amount to shift by) is: (1) negative or -(2) greater than or equal to the precision of "x". -Use function "shift_right" if you want an invalid -shift amount to return an error.`, - ArgNames: []string{"x", "y"}, - } - shiftRightDoc = FunctionDoc{ - Summary: "Right shift `x` by `y`", - Description: `This is equivalent to dividing "x" by 2 to the power "y". -An error is raised if "y" (the amount to shift by) is (1) negative or -(2) greater than or equal to the precision of "x". -See "shift_right_unchecked" for a variant that doesn't fail for -an invalid shift amount.`, - ArgNames: []string{"x", "y"}, - } - sinUncheckedDoc = FunctionDoc{ - Summary: "Compute the sine", - Description: `NaN is returned for invalid input values; to raise an error -instead, see "sin"`, - ArgNames: []string{"x"}, - } - sinDoc = FunctionDoc{ - Summary: "Compute the sine", - Description: `Invalid input values raise an error; -to return NaN instead, see "sin_unchecked".`, - ArgNames: []string{"x"}, - } - cosUncheckedDoc = FunctionDoc{ - Summary: "Compute the cosine", - Description: `NaN is returned for invalid input values; -to raise an error instead, see "cos".`, - ArgNames: []string{"x"}, - } - cosDoc = FunctionDoc{ - Summary: "Compute the cosine", - Description: `Infinite values raise an error; -to return NaN instead, see "cos_unchecked".`, - ArgNames: []string{"x"}, - } - tanUncheckedDoc = FunctionDoc{ - Summary: "Compute the tangent", - Description: `NaN is returned for invalid input values; -to raise an error instead see "tan".`, - ArgNames: []string{"x"}, - } - tanDoc = FunctionDoc{ - Summary: "Compute the tangent", - Description: `Infinite values raise an error; -to return NaN instead, see "tan_unchecked".`, - ArgNames: []string{"x"}, - } - asinUncheckedDoc = FunctionDoc{ - Summary: "Compute the inverse sine", - Description: `NaN is returned for invalid input values; -to raise an error instead, see "asin"`, - ArgNames: []string{"x"}, - } - asinDoc = FunctionDoc{ - Summary: "Compute the inverse sine", - Description: `Invalid input values raise an error; -to return NaN instead see asin_unchecked.`, - ArgNames: []string{"x"}, - } - acosUncheckedDoc = FunctionDoc{ - Summary: "Compute the inverse cosine", - Description: `NaN is returned for invalid input values; -to raise an error instead, see "acos".`, - ArgNames: []string{"x"}, - } - acosDoc = FunctionDoc{ - Summary: "Compute the inverse cosine", - Description: `Invalid input values raise an error; -to return NaN instead, see "acos_unchecked".`, - ArgNames: []string{"x"}, - } - atanDoc = FunctionDoc{ - Summary: "Compute the inverse tangent of x", - Description: `The return value is in the range [-pi/2, pi/2]; -for a full return range [-pi, pi], see "atan2"`, - ArgNames: []string{"x"}, - } - atan2Doc = FunctionDoc{ - Summary: "Compute the inverse tangent of y/x", - Description: "The return value is in the range [-pi, pi].", - ArgNames: []string{"y", "x"}, - } - lnUncheckedDoc = FunctionDoc{ - Summary: "Compute natural logarithm", - Description: `Non-positive values return -Inf or NaN. Null values return null. -Use function "ln" if you want non-positive values to raise an error.`, - ArgNames: []string{"x"}, - } - lnDoc = FunctionDoc{ - Summary: "Compute natural logarithm", - Description: `Non-positive values raise an error. Null values return null. -Use function "ln_unchecked" if you want non-positive values to return --Inf or NaN`, - ArgNames: []string{"x"}, - } - log10UncheckedDoc = FunctionDoc{ - Summary: "Compute base 10 logarithm", - Description: `Non-positive values return -Inf or NaN. Null values return null. -Use function "log10" if you want non-positive values to raise an error.`, - ArgNames: []string{"x"}, - } - log10Doc = FunctionDoc{ - Summary: "Compute base 10 logarithm", - Description: `Non-positive values raise an error. Null values return null. -Use function "log10_unchecked" if you want non-positive values to return --Inf or NaN.`, - ArgNames: []string{"x"}, - } - log2UncheckedDoc = FunctionDoc{ - Summary: "Compute base 2 logarithm", - Description: `Non-positive values return -Inf or NaN. Null values return null. -Use function "log2" if you want non-positive values to raise an error.`, - ArgNames: []string{"x"}, - } - log2Doc = FunctionDoc{ - Summary: "Compute base 2 logarithm", - Description: `Non-positive values raise an error. Null values return null. -Use function "log2_unchecked" if you want non-positive values to -return -Inf or NaN`, - ArgNames: []string{"x"}, - } - log1pUncheckedDoc = FunctionDoc{ - Summary: "Compute natural log of (1+x)", - Description: `Values <= -1 return -Inf or NaN. Null values return null. -This function may be more precise than log(1 + x) for x close to zero. -Use function "log1p" if you want invalid values to raise an error.`, - ArgNames: []string{"x"}, - } - log1pDoc = FunctionDoc{ - Summary: "Compute natural log of (1+x)", - Description: `Values <= -1 return -Inf or NaN. Null values return null. -This function may be more precise than (1 + x) for x close to zero. -Use function "log1p_unchecked" if you want invalid values to return --Inf or NaN.`, - ArgNames: []string{"x"}, - } - logbUncheckedDoc = FunctionDoc{ - Summary: "Compute base `b` logarithm", - Description: `Values <= 0 return -Inf or NaN. Null values return null. -Use function "logb" if you want non-positive values to raise an error.`, - ArgNames: []string{"x", "b"}, - } - logbDoc = FunctionDoc{ - Summary: "Compute base `b` logarithm", - Description: `Values <= 0 returns an error. Null values return null. -Use function "logb_unchecked" if you want non-positive values to return --Inf or NaN.`, - ArgNames: []string{"x", "b"}, - } - floorDoc = FunctionDoc{ - Summary: "Round down to the nearest integer", - Description: "Compute the largest integer value not greater than `x`", - ArgNames: []string{"x"}, - } - ceilDoc = FunctionDoc{ - Summary: "Round up to the nearest integer", - Description: "Compute the smallest integer value not less than `x`", - ArgNames: []string{"x"}, - } - truncDoc = FunctionDoc{ - Summary: "Compute the integral part", - Description: "Compute the nearest integer not greater than `x`", - ArgNames: []string{"x"}, - } - roundDoc = FunctionDoc{ - Summary: "Round to a given precision", - Description: `Options are used to control the number of digits and rounding mode. -Default behavior is to round to the nearest integer and -use half-to-even rule to break ties.`, - ArgNames: []string{"x"}, - OptionsType: "RoundOptions", - } - roundToMultipleDoc = FunctionDoc{ - Summary: "Round to a given multiple", - Description: `Options are used to control the rounding multiple and rounding mode. -Default behavior is to round to the nearest integer and -use half-to-even rule to break ties.`, - ArgNames: []string{"x"}, - OptionsType: "RoundToMultipleOptions", - } -) - -func RegisterScalarArithmetic(reg FunctionRegistry) { - ops := []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - }{ - {"add_unchecked", kernels.OpAdd, decPromoteAdd, addUncheckedDoc}, - {"add", kernels.OpAddChecked, decPromoteAdd, addDoc}, - } - - for _, o := range ops { - fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), o.decPromote} - kns := append(kernels.GetArithmeticBinaryKernels(o.op), kernels.GetDecimalBinaryKernels(o.op)...) - kns = append(kns, kernels.GetArithmeticFunctionTimeDuration(o.op)...) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - for _, unit := range arrow.TimeUnitValues { - inType := exec.NewMatchedInput(exec.TimestampTypeUnit(unit)) - inDuration := exec.NewExactInput(&arrow.DurationType{Unit: unit}) - ex := kernels.ArithmeticExecSameType(arrow.TIMESTAMP, o.op) - err := fn.AddNewKernel([]exec.InputType{inType, inDuration}, kernels.OutputFirstType, ex, nil) - if err != nil { - panic(err) - } - err = fn.AddNewKernel([]exec.InputType{inDuration, inType}, kernels.OutputLastType, ex, nil) - if err != nil { - panic(err) - } - - matchDur := exec.NewMatchedInput(exec.DurationTypeUnit(unit)) - ex = kernels.ArithmeticExecSameType(arrow.DURATION, o.op) - err = fn.AddNewKernel([]exec.InputType{matchDur, matchDur}, exec.NewOutputType(&arrow.DurationType{Unit: unit}), ex, nil) - if err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - } - - ops = []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - }{ - {"sub_unchecked", kernels.OpSub, decPromoteAdd, subUncheckedDoc}, - {"sub", kernels.OpSubChecked, decPromoteAdd, subDoc}, - {"subtract_unchecked", kernels.OpSub, decPromoteAdd, subUncheckedDoc}, - {"subtract", kernels.OpSubChecked, decPromoteAdd, subDoc}, - } - - for _, o := range ops { - fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), o.decPromote} - kns := append(kernels.GetArithmeticBinaryKernels(o.op), kernels.GetDecimalBinaryKernels(o.op)...) - kns = append(kns, kernels.GetArithmeticFunctionTimeDuration(o.op)...) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - for _, unit := range arrow.TimeUnitValues { - // timestamp - timestamp => duration - inType := exec.NewMatchedInput(exec.TimestampTypeUnit(unit)) - ex := kernels.ArithmeticExecSameType(arrow.TIMESTAMP, o.op) - err := fn.AddNewKernel([]exec.InputType{inType, inType}, kernels.OutputResolveTemporal, ex, nil) - if err != nil { - panic(err) - } - - // timestamp - duration => timestamp - inDuration := exec.NewExactInput(&arrow.DurationType{Unit: unit}) - ex = kernels.ArithmeticExecSameType(arrow.TIMESTAMP, o.op) - err = fn.AddNewKernel([]exec.InputType{inType, inDuration}, kernels.OutputFirstType, ex, nil) - if err != nil { - panic(err) - } - - // duration - duration = duration - matchDur := exec.NewMatchedInput(exec.DurationTypeUnit(unit)) - ex = kernels.ArithmeticExecSameType(arrow.DURATION, o.op) - err = fn.AddNewKernel([]exec.InputType{matchDur, matchDur}, exec.NewOutputType(&arrow.DurationType{Unit: unit}), ex, nil) - if err != nil { - panic(err) - } - } - - // time32 - time32 = duration - for _, unit := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond} { - inType := exec.NewMatchedInput(exec.Time32TypeUnit(unit)) - internalEx := kernels.ArithmeticExecSameType(arrow.TIME32, o.op) - ex := func(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - if err := internalEx(ctx, batch, out); err != nil { - return err - } - // the allocated space is for duration (an int64) but we - // wrote the time32 - time32 as if the output was time32 - // so a quick copy in reverse expands the int32s to int64. - rawData := arrow.GetData[int32](out.Buffers[1].Buf) - outData := arrow.GetData[int64](out.Buffers[1].Buf) - - for i := out.Len - 1; i >= 0; i-- { - outData[i] = int64(rawData[i]) - } - return nil - } - - err := fn.AddNewKernel([]exec.InputType{inType, inType}, - exec.NewOutputType(&arrow.DurationType{Unit: unit}), ex, nil) - if err != nil { - panic(err) - } - } - - // time64 - time64 = duration - for _, unit := range []arrow.TimeUnit{arrow.Microsecond, arrow.Nanosecond} { - inType := exec.NewMatchedInput(exec.Time64TypeUnit(unit)) - ex := kernels.ArithmeticExecSameType(arrow.TIME64, o.op) - err := fn.AddNewKernel([]exec.InputType{inType, inType}, exec.NewOutputType(&arrow.DurationType{Unit: unit}), ex, nil) - if err != nil { - panic(err) - } - } - - inDate32 := exec.NewExactInput(arrow.FixedWidthTypes.Date32) - ex := kernels.SubtractDate32(o.op) - err := fn.AddNewKernel([]exec.InputType{inDate32, inDate32}, exec.NewOutputType(arrow.FixedWidthTypes.Duration_s), ex, nil) - if err != nil { - panic(err) - } - - inDate64 := exec.NewExactInput(arrow.FixedWidthTypes.Date64) - ex = kernels.ArithmeticExecSameType(arrow.DATE64, o.op) - err = fn.AddNewKernel([]exec.InputType{inDate64, inDate64}, exec.NewOutputType(arrow.FixedWidthTypes.Duration_ms), ex, nil) - if err != nil { - panic(err) - } - - reg.AddFunction(fn, false) - } - - oplist := []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - commutative bool - }{ - {"multiply_unchecked", kernels.OpMul, decPromoteMultiply, mulUncheckedDoc, true}, - {"multiply", kernels.OpMulChecked, decPromoteMultiply, mulDoc, true}, - {"divide_unchecked", kernels.OpDiv, decPromoteDivide, divUncheckedDoc, false}, - {"divide", kernels.OpDivChecked, decPromoteDivide, divDoc, false}, - } - - for _, o := range oplist { - fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), o.decPromote} - for _, k := range append(kernels.GetArithmeticBinaryKernels(o.op), kernels.GetDecimalBinaryKernels(o.op)...) { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - for _, unit := range arrow.TimeUnitValues { - durInput := exec.NewExactInput(&arrow.DurationType{Unit: unit}) - i64Input := exec.NewExactInput(arrow.PrimitiveTypes.Int64) - durOutput := exec.NewOutputType(&arrow.DurationType{Unit: unit}) - ex := kernels.ArithmeticExecSameType(arrow.DURATION, o.op) - err := fn.AddNewKernel([]exec.InputType{durInput, i64Input}, durOutput, ex, nil) - if err != nil { - panic(err) - } - if o.commutative { - err = fn.AddNewKernel([]exec.InputType{i64Input, durInput}, durOutput, ex, nil) - if err != nil { - panic(err) - } - } - } - - reg.AddFunction(fn, false) - } - - ops = []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - }{ - {"abs_unchecked", kernels.OpAbsoluteValue, decPromoteNone, absoluteValueUncheckedDoc}, - {"abs", kernels.OpAbsoluteValueChecked, decPromoteNone, absoluteValueDoc}, - {"negate_unchecked", kernels.OpNegate, decPromoteNone, negateUncheckedDoc}, - } - - for _, o := range ops { - fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Unary(), o.doc), decPromoteNone} - kns := append(kernels.GetArithmeticUnaryKernels(o.op), kernels.GetDecimalUnaryKernels(o.op)...) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - } - - fn := &arithmeticFunction{*NewScalarFunction("negate", Unary(), negateDoc), decPromoteNone} - kns := append(kernels.GetArithmeticUnarySignedKernels(kernels.OpNegateChecked), kernels.GetDecimalUnaryKernels(kernels.OpNegateChecked)...) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - - ops = []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - }{ - {"sqrt_unchecked", kernels.OpSqrt, decPromoteNone, sqrtUncheckedDoc}, - {"sqrt", kernels.OpSqrtChecked, decPromoteNone, sqrtDoc}, - {"sin_unchecked", kernels.OpSin, decPromoteNone, sinUncheckedDoc}, - {"sin", kernels.OpSinChecked, decPromoteNone, sinDoc}, - {"cos_unchecked", kernels.OpCos, decPromoteNone, cosUncheckedDoc}, - {"cos", kernels.OpCosChecked, decPromoteNone, cosDoc}, - {"tan_unchecked", kernels.OpTan, decPromoteNone, tanUncheckedDoc}, - {"tan", kernels.OpTanChecked, decPromoteNone, tanDoc}, - {"asin_unchecked", kernels.OpAsin, decPromoteNone, asinUncheckedDoc}, - {"asin", kernels.OpAsinChecked, decPromoteNone, asinDoc}, - {"acos_unchecked", kernels.OpAcos, decPromoteNone, acosUncheckedDoc}, - {"acos", kernels.OpAcosChecked, decPromoteNone, acosDoc}, - {"atan", kernels.OpAtan, decPromoteNone, atanDoc}, - {"ln_unchecked", kernels.OpLn, decPromoteNone, lnUncheckedDoc}, - {"ln", kernels.OpLnChecked, decPromoteNone, lnDoc}, - {"log10_unchecked", kernels.OpLog10, decPromoteNone, log10UncheckedDoc}, - {"log10", kernels.OpLog10Checked, decPromoteNone, log10Doc}, - {"log2_unchecked", kernels.OpLog2, decPromoteNone, log2UncheckedDoc}, - {"log2", kernels.OpLog2Checked, decPromoteNone, log2Doc}, - {"log1p_unchecked", kernels.OpLog1p, decPromoteNone, log1pUncheckedDoc}, - {"log1p", kernels.OpLog1pChecked, decPromoteNone, log1pDoc}, - } - - for _, o := range ops { - fn := &arithmeticFloatingPointFunc{arithmeticFunction{*NewScalarFunction(o.funcName, Unary(), o.doc), decPromoteNone}} - kns := kernels.GetArithmeticUnaryFloatingPointKernels(o.op) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - } - - ops = []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - }{ - {"atan2", kernels.OpAtan2, decPromoteNone, atan2Doc}, - {"logb_unchecked", kernels.OpLogb, decPromoteNone, logbUncheckedDoc}, - {"logb", kernels.OpLogbChecked, decPromoteNone, logbDoc}, - } - - for _, o := range ops { - fn := &arithmeticFloatingPointFunc{arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), addDoc), decPromoteNone}} - kns := kernels.GetArithmeticFloatingPointKernels(o.op) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - } - - fn = &arithmeticFunction{*NewScalarFunction("sign", Unary(), signDoc), decPromoteNone} - kns = kernels.GetArithmeticUnaryFixedIntOutKernels(arrow.PrimitiveTypes.Int8, kernels.OpSign) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - - ops = []struct { - funcName string - op kernels.ArithmeticOp - decPromote decimalPromotion - doc FunctionDoc - }{ - {"power_unchecked", kernels.OpPower, decPromoteNone, powUncheckedDoc}, - {"power", kernels.OpPowerChecked, decPromoteNone, powDoc}, - } - - for _, o := range ops { - fn := &arithmeticDecimalToFloatingPointFunc{arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), o.decPromote}} - kns := kernels.GetArithmeticBinaryKernels(o.op) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - reg.AddFunction(fn, false) - } - - bitWiseOps := []struct { - funcName string - op kernels.BitwiseOp - doc FunctionDoc - }{ - {"bit_wise_and", kernels.OpBitAnd, bitWiseAndDoc}, - {"bit_wise_or", kernels.OpBitOr, bitWiseOrDoc}, - {"bit_wise_xor", kernels.OpBitXor, bitWiseXorDoc}, - } - - for _, o := range bitWiseOps { - fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), decPromoteNone} - kns := kernels.GetBitwiseBinaryKernels(o.op) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - reg.AddFunction(fn, false) - } - - fn = &arithmeticFunction{*NewScalarFunction("bit_wise_not", Unary(), bitWiseNotDoc), decPromoteNone} - for _, k := range kernels.GetBitwiseUnaryKernels() { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - - reg.AddFunction(fn, false) - - shiftOps := []struct { - funcName string - dir kernels.ShiftDir - checked bool - doc FunctionDoc - }{ - {"shift_left", kernels.ShiftLeft, true, shiftLeftDoc}, - {"shift_left_unchecked", kernels.ShiftLeft, false, shiftLeftUncheckedDoc}, - {"shift_right", kernels.ShiftRight, true, shiftRightDoc}, - {"shift_right_unchecked", kernels.ShiftRight, false, shiftRightUncheckedDoc}, - } - - for _, o := range shiftOps { - fn := &arithmeticFunction{*NewScalarFunction(o.funcName, Binary(), o.doc), decPromoteNone} - kns := kernels.GetShiftKernels(o.dir, o.checked) - for _, k := range kns { - if err := fn.AddKernel(k); err != nil { - panic(err) - } - } - reg.AddFunction(fn, false) - } - - floorFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("floor", Unary(), floorDoc), decPromoteNone}} - kns = kernels.GetSimpleRoundKernels(kernels.RoundDown) - for _, k := range kns { - if err := floorFn.AddKernel(k); err != nil { - panic(err) - } - } - floorFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL128)}, - kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal128.Num](kernels.RoundDown), nil) - floorFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL256)}, - kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal256.Num](kernels.RoundDown), nil) - reg.AddFunction(floorFn, false) - - ceilFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("ceil", Unary(), ceilDoc), decPromoteNone}} - kns = kernels.GetSimpleRoundKernels(kernels.RoundUp) - for _, k := range kns { - if err := ceilFn.AddKernel(k); err != nil { - panic(err) - } - } - ceilFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL128)}, - kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal128.Num](kernels.RoundUp), nil) - ceilFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL256)}, - kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal256.Num](kernels.RoundUp), nil) - reg.AddFunction(ceilFn, false) - - truncFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("trunc", Unary(), truncDoc), decPromoteNone}} - kns = kernels.GetSimpleRoundKernels(kernels.TowardsZero) - for _, k := range kns { - if err := truncFn.AddKernel(k); err != nil { - panic(err) - } - } - truncFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL128)}, - kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal128.Num](kernels.TowardsZero), nil) - truncFn.AddNewKernel([]exec.InputType{exec.NewIDInput(arrow.DECIMAL256)}, - kernels.OutputFirstType, kernels.FixedRoundDecimalExec[decimal256.Num](kernels.TowardsZero), nil) - reg.AddFunction(truncFn, false) - - roundFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("round", Unary(), roundDoc), decPromoteNone}} - kns = kernels.GetRoundUnaryKernels(kernels.InitRoundState, kernels.UnaryRoundExec) - for _, k := range kns { - if err := roundFn.AddKernel(k); err != nil { - panic(err) - } - } - - roundFn.defaultOpts = DefaultRoundOptions - reg.AddFunction(roundFn, false) - - roundToMultipleFn := &arithmeticIntegerToFloatingPointFunc{arithmeticFunction{*NewScalarFunction("round_to_multiple", Unary(), roundToMultipleDoc), decPromoteNone}} - kns = kernels.GetRoundUnaryKernels(kernels.InitRoundToMultipleState, kernels.UnaryRoundToMultipleExec) - for _, k := range kns { - if err := roundToMultipleFn.AddKernel(k); err != nil { - panic(err) - } - } - - roundToMultipleFn.defaultOpts = DefaultRoundToMultipleOptions - reg.AddFunction(roundToMultipleFn, false) -} - -func impl(ctx context.Context, fn string, opts ArithmeticOptions, left, right Datum) (Datum, error) { - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, left, right) -} - -// Add performs an addition between the passed in arguments (scalar or array) -// and returns the result. If one argument is a scalar and the other is an -// array, the scalar value is added to each value of the array. -// -// ArithmeticOptions specifies whether or not to check for overflows, -// performance is faster if not explicitly checking for overflows but -// will error on an overflow if NoCheckOverflow is false (default). -func Add(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error) { - return impl(ctx, "add", opts, left, right) -} - -// Sub performs a subtraction between the passed in arguments (scalar or array) -// and returns the result. If one argument is a scalar and the other is an -// array, the scalar value is subtracted from each value of the array. -// -// ArithmeticOptions specifies whether or not to check for overflows, -// performance is faster if not explicitly checking for overflows but -// will error on an overflow if NoCheckOverflow is false (default). -func Subtract(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error) { - return impl(ctx, "sub", opts, left, right) -} - -// Multiply performs a multiplication between the passed in arguments (scalar or array) -// and returns the result. If one argument is a scalar and the other is an -// array, the scalar value is multiplied against each value of the array. -// -// ArithmeticOptions specifies whether or not to check for overflows, -// performance is faster if not explicitly checking for overflows but -// will error on an overflow if NoCheckOverflow is false (default). -func Multiply(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error) { - return impl(ctx, "multiply", opts, left, right) -} - -// Divide performs a division between the passed in arguments (scalar or array) -// and returns the result. If one argument is a scalar and the other is an -// array, the scalar value is used with each value of the array. -// -// ArithmeticOptions specifies whether or not to check for overflows, -// performance is faster if not explicitly checking for overflows but -// will error on an overflow if NoCheckOverflow is false (default). -// -// Will error on divide by zero regardless of whether or not checking for -// overflows. -func Divide(ctx context.Context, opts ArithmeticOptions, left, right Datum) (Datum, error) { - return impl(ctx, "divide", opts, left, right) -} - -// AbsoluteValue returns the AbsoluteValue for each element in the input -// argument. It accepts either a scalar or an array. -// -// ArithmeticOptions specifies whether or not to check for overflows, -// performance is faster if not explicitly checking for overflows but -// will error on an overflow if CheckOverflow is true. -func AbsoluteValue(ctx context.Context, opts ArithmeticOptions, input Datum) (Datum, error) { - fn := "abs" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, input) -} - -// Negate returns a result containing the negation of each element in the -// input argument. It accepts either a scalar or an array. -// -// ArithmeticOptions specifies whether or not to check for overflows, -// or to throw an error on unsigned types. -func Negate(ctx context.Context, opts ArithmeticOptions, input Datum) (Datum, error) { - fn := "negate" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, input) -} - -// Sign returns -1, 0, or 1 depending on the sign of each element in the -// input. For x in the input: -// -// if x > 0: 1 -// if x < 0: -1 -// if x == 0: 0 -func Sign(ctx context.Context, input Datum) (Datum, error) { - return CallFunction(ctx, "sign", nil, input) -} - -// Power returns base**exp for each element in the input arrays. Should work -// for both Arrays and Scalars -func Power(ctx context.Context, opts ArithmeticOptions, base, exp Datum) (Datum, error) { - fn := "power" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, base, exp) -} - -// ShiftLeft only accepts integral types and shifts each element of the -// first argument to the left by the value of the corresponding element -// in the second argument. -// -// The value to shift by should be >= 0 and < precision of the type. -func ShiftLeft(ctx context.Context, opts ArithmeticOptions, lhs, rhs Datum) (Datum, error) { - fn := "shift_left" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, lhs, rhs) -} - -// ShiftRight only accepts integral types and shifts each element of the -// first argument to the right by the value of the corresponding element -// in the second argument. -// -// The value to shift by should be >= 0 and < precision of the type. -func ShiftRight(ctx context.Context, opts ArithmeticOptions, lhs, rhs Datum) (Datum, error) { - fn := "shift_right" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, lhs, rhs) -} - -func Sin(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "sin" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Cos(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "cos" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Tan(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "tan" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Asin(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "asin" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Acos(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "acos" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Atan(ctx context.Context, arg Datum) (Datum, error) { - return CallFunction(ctx, "atan", nil, arg) -} - -func Atan2(ctx context.Context, x, y Datum) (Datum, error) { - return CallFunction(ctx, "atan2", nil, x, y) -} - -func Ln(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "ln" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Log10(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "log10" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Log2(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "log2" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Log1p(ctx context.Context, opts ArithmeticOptions, arg Datum) (Datum, error) { - fn := "log1p" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, arg) -} - -func Logb(ctx context.Context, opts ArithmeticOptions, x, base Datum) (Datum, error) { - fn := "logb" - if opts.NoCheckOverflow { - fn += "_unchecked" - } - return CallFunction(ctx, fn, nil, x, base) -} - -func Round(ctx context.Context, opts RoundOptions, arg Datum) (Datum, error) { - return CallFunction(ctx, "round", &opts, arg) -} - -func RoundToMultiple(ctx context.Context, opts RoundToMultipleOptions, arg Datum) (Datum, error) { - return CallFunction(ctx, "round_to_multiple", &opts, arg) -} diff --git a/go/arrow/compute/arithmetic_test.go b/go/arrow/compute/arithmetic_test.go deleted file mode 100644 index 6e693481a322c..0000000000000 --- a/go/arrow/compute/arithmetic_test.go +++ /dev/null @@ -1,3504 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute_test - -import ( - "context" - "fmt" - "math" - "strings" - "testing" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/klauspost/cpuid/v2" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" - "golang.org/x/exp/constraints" -) - -var ( - CpuCacheSizes = [...]int{ // defaults - 32 * 1024, // level 1: 32K - 256 * 1024, // level 2: 256K - 3072 * 1024, // level 3: 3M - } -) - -func init() { - if cpuid.CPU.Cache.L1D != -1 { - CpuCacheSizes[0] = cpuid.CPU.Cache.L1D - } - if cpuid.CPU.Cache.L2 != -1 { - CpuCacheSizes[1] = cpuid.CPU.Cache.L2 - } - if cpuid.CPU.Cache.L3 != -1 { - CpuCacheSizes[2] = cpuid.CPU.Cache.L3 - } -} - -func assertNullToNull(t *testing.T, ctx context.Context, fn string, mem memory.Allocator) { - f, ok := compute.GetFunctionRegistry().GetFunction(fn) - require.True(t, ok) - nulls := array.MakeArrayOfNull(mem, arrow.Null, 7) - defer nulls.Release() - n := f.Arity().NArgs - - t.Run("null to null array", func(t *testing.T) { - args := make([]compute.Datum, n) - for i := 0; i < n; i++ { - args[i] = &compute.ArrayDatum{nulls.Data()} - } - - result, err := compute.CallFunction(ctx, fn, nil, args...) - assert.NoError(t, err) - defer result.Release() - out := result.(*compute.ArrayDatum).MakeArray() - defer out.Release() - assertArraysEqual(t, nulls, out) - }) - - t.Run("null to null scalar", func(t *testing.T) { - args := make([]compute.Datum, n) - for i := 0; i < n; i++ { - args[i] = compute.NewDatum(scalar.ScalarNull) - } - - result, err := compute.CallFunction(ctx, fn, nil, args...) - assert.NoError(t, err) - assertScalarEquals(t, scalar.ScalarNull, result.(*compute.ScalarDatum).Value) - }) -} - -type fnOpts interface { - compute.ArithmeticOptions | compute.RoundOptions | compute.RoundToMultipleOptions -} - -type unaryArithmeticFunc[O fnOpts] func(context.Context, O, compute.Datum) (compute.Datum, error) - -// type unaryFunc = func(compute.Datum) (compute.Datum, error) - -type binaryArithmeticFunc = func(context.Context, compute.ArithmeticOptions, compute.Datum, compute.Datum) (compute.Datum, error) - -type binaryFunc = func(left, right compute.Datum) (compute.Datum, error) - -func assertScalarEquals(t *testing.T, expected, actual scalar.Scalar, opt ...scalar.EqualOption) { - assert.Truef(t, scalar.ApproxEquals(expected, actual, opt...), "expected: %s\ngot: %s", expected, actual) -} - -func assertBinop(t *testing.T, fn binaryFunc, left, right, expected arrow.Array, opt []array.EqualOption, scalarOpt []scalar.EqualOption) { - actual, err := fn(&compute.ArrayDatum{Value: left.Data()}, &compute.ArrayDatum{Value: right.Data()}) - require.NoError(t, err) - defer actual.Release() - assertDatumsEqual(t, &compute.ArrayDatum{Value: expected.Data()}, actual, opt, scalarOpt) - - // also check (Scalar, Scalar) operations - for i := 0; i < expected.Len(); i++ { - s, err := scalar.GetScalar(expected, i) - require.NoError(t, err) - lhs, _ := scalar.GetScalar(left, i) - rhs, _ := scalar.GetScalar(right, i) - - actual, err := fn(&compute.ScalarDatum{Value: lhs}, &compute.ScalarDatum{Value: rhs}) - assert.NoError(t, err) - assertScalarEquals(t, s, actual.(*compute.ScalarDatum).Value, scalarOpt...) - } -} - -func assertBinopErr(t *testing.T, fn binaryFunc, left, right arrow.Array, expectedMsg string) { - _, err := fn(&compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{Value: right.Data()}) - assert.ErrorIs(t, err, arrow.ErrInvalid) - assert.ErrorContains(t, err, expectedMsg) -} - -type BinaryFuncTestSuite struct { - suite.Suite - - mem *memory.CheckedAllocator - ctx context.Context -} - -func (b *BinaryFuncTestSuite) SetupTest() { - b.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - b.ctx = compute.WithAllocator(context.TODO(), b.mem) -} - -func (b *BinaryFuncTestSuite) TearDownTest() { - b.mem.AssertSize(b.T(), 0) -} - -func (b *BinaryFuncTestSuite) getArr(dt arrow.DataType, str string) arrow.Array { - arr, _, err := array.FromJSON(b.mem, dt, strings.NewReader(str), array.WithUseNumber()) - b.Require().NoError(err) - return arr -} - -type Float16BinaryFuncTestSuite struct { - BinaryFuncTestSuite -} - -func (b *Float16BinaryFuncTestSuite) assertBinopErr(fn binaryFunc, lhs, rhs string) { - left, _, _ := array.FromJSON(b.mem, arrow.FixedWidthTypes.Float16, strings.NewReader(lhs), array.WithUseNumber()) - defer left.Release() - right, _, _ := array.FromJSON(b.mem, arrow.FixedWidthTypes.Float16, strings.NewReader(rhs), array.WithUseNumber()) - defer right.Release() - - _, err := fn(&compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}) - b.ErrorIs(err, arrow.ErrNotImplemented) -} - -func (b *Float16BinaryFuncTestSuite) TestAdd() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { - opts := compute.ArithmeticOptions{NoCheckOverflow: overflow} - b.assertBinopErr(func(left, right compute.Datum) (compute.Datum, error) { - return compute.Add(b.ctx, opts, left, right) - }, `[1.5]`, `[1.5]`) - }) - } -} - -func (b *Float16BinaryFuncTestSuite) TestSub() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { - opts := compute.ArithmeticOptions{NoCheckOverflow: overflow} - b.assertBinopErr(func(left, right compute.Datum) (compute.Datum, error) { - return compute.Subtract(b.ctx, opts, left, right) - }, `[1.5]`, `[1.5]`) - }) - } -} - -type BinaryArithmeticSuite[T arrow.NumericType] struct { - BinaryFuncTestSuite - - opts compute.ArithmeticOptions - min, max T - equalOpts []array.EqualOption - scalarEqualOpts []scalar.EqualOption -} - -func (BinaryArithmeticSuite[T]) DataType() arrow.DataType { - return arrow.GetDataType[T]() -} - -func (b *BinaryArithmeticSuite[T]) setNansEqual(val bool) { - b.equalOpts = []array.EqualOption{array.WithNaNsEqual(val)} - b.scalarEqualOpts = []scalar.EqualOption{scalar.WithNaNsEqual(val)} -} - -func (b *BinaryArithmeticSuite[T]) SetupTest() { - b.BinaryFuncTestSuite.SetupTest() - b.opts.NoCheckOverflow = false -} - -func (b *BinaryArithmeticSuite[T]) makeNullScalar() scalar.Scalar { - return scalar.MakeNullScalar(b.DataType()) -} - -func (b *BinaryArithmeticSuite[T]) makeScalar(val T) scalar.Scalar { - return scalar.MakeScalar(val) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopScalars(fn binaryArithmeticFunc, lhs, rhs T, expected T) { - left, right := b.makeScalar(lhs), b.makeScalar(rhs) - exp := b.makeScalar(expected) - - actual, err := fn(b.ctx, b.opts, &compute.ScalarDatum{Value: left}, &compute.ScalarDatum{Value: right}) - b.NoError(err) - sc := actual.(*compute.ScalarDatum).Value - - assertScalarEquals(b.T(), exp, sc) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopScalarValArr(fn binaryArithmeticFunc, lhs T, rhs, expected string) { - left := b.makeScalar(lhs) - b.assertBinopScalarArr(fn, left, rhs, expected) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopScalarArr(fn binaryArithmeticFunc, lhs scalar.Scalar, rhs, expected string) { - right, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(rhs)) - defer right.Release() - exp, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(expected)) - defer exp.Release() - - actual, err := fn(b.ctx, b.opts, &compute.ScalarDatum{Value: lhs}, &compute.ArrayDatum{Value: right.Data()}) - b.NoError(err) - defer actual.Release() - assertDatumsEqual(b.T(), &compute.ArrayDatum{Value: exp.Data()}, actual, b.equalOpts, b.scalarEqualOpts) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopArrScalarExpArr(fn binaryArithmeticFunc, lhs string, rhs scalar.Scalar, exp arrow.Array) { - left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs)) - defer left.Release() - - actual, err := fn(b.ctx, b.opts, &compute.ArrayDatum{left.Data()}, compute.NewDatum(rhs)) - b.Require().NoError(err) - defer actual.Release() - assertDatumsEqual(b.T(), &compute.ArrayDatum{exp.Data()}, actual, b.equalOpts, b.scalarEqualOpts) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopArrScalarVal(fn binaryArithmeticFunc, lhs string, rhs T, expected string) { - right := b.makeScalar(rhs) - b.assertBinopArrScalar(fn, lhs, right, expected) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopArrScalar(fn binaryArithmeticFunc, lhs string, rhs scalar.Scalar, expected string) { - left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs)) - defer left.Release() - exp, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(expected)) - defer exp.Release() - - actual, err := fn(b.ctx, b.opts, &compute.ArrayDatum{Value: left.Data()}, &compute.ScalarDatum{Value: rhs}) - b.NoError(err) - defer actual.Release() - assertDatumsEqual(b.T(), &compute.ArrayDatum{Value: exp.Data()}, actual, b.equalOpts, b.scalarEqualOpts) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopArrs(fn binaryArithmeticFunc, lhs, rhs, exp arrow.Array) { - assertBinop(b.T(), func(left, right compute.Datum) (compute.Datum, error) { - return fn(b.ctx, b.opts, left, right) - }, lhs, rhs, exp, b.equalOpts, b.scalarEqualOpts) -} - -func (b *BinaryArithmeticSuite[T]) assertBinopExpArr(fn binaryArithmeticFunc, lhs, rhs string, exp arrow.Array) { - left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs), array.WithUseNumber()) - defer left.Release() - right, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(rhs), array.WithUseNumber()) - defer right.Release() - - b.assertBinopArrs(fn, left, right, exp) -} - -func (b *BinaryArithmeticSuite[T]) assertBinop(fn binaryArithmeticFunc, lhs, rhs, expected string) { - left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs), array.WithUseNumber()) - defer left.Release() - right, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(rhs), array.WithUseNumber()) - defer right.Release() - exp, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(expected), array.WithUseNumber()) - defer exp.Release() - - b.assertBinopArrs(fn, left, right, exp) -} - -func (b *BinaryArithmeticSuite[T]) setOverflowCheck(value bool) { - b.opts.NoCheckOverflow = !value -} - -func (b *BinaryArithmeticSuite[T]) assertBinopErr(fn binaryArithmeticFunc, lhs, rhs, expectedMsg string) { - left, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(lhs), array.WithUseNumber()) - defer left.Release() - right, _, _ := array.FromJSON(b.mem, b.DataType(), strings.NewReader(rhs), array.WithUseNumber()) - defer right.Release() - - assertBinopErr(b.T(), func(left, right compute.Datum) (compute.Datum, error) { - return fn(b.ctx, b.opts, left, right) - }, left, right, expectedMsg) -} - -func (b *BinaryArithmeticSuite[T]) TestAdd() { - b.Run(b.DataType().String(), func() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { - b.setOverflowCheck(overflow) - - b.assertBinop(compute.Add, `[]`, `[]`, `[]`) - b.assertBinop(compute.Add, `[3, 2, 6]`, `[1, 0, 2]`, `[4, 2, 8]`) - // nulls on one side - b.assertBinop(compute.Add, `[null, 1, null]`, `[3, 4, 5]`, `[null, 5, null]`) - b.assertBinop(compute.Add, `[3, 4, 5]`, `[null, 1, null]`, `[null, 5, null]`) - // nulls on both sides - b.assertBinop(compute.Add, `[null, 1, 2]`, `[3, 4, null]`, `[null, 5, null]`) - // all nulls - b.assertBinop(compute.Add, `[null]`, `[null]`, `[null]`) - - // scalar on the left - b.assertBinopScalarValArr(compute.Add, 3, `[1, 2]`, `[4, 5]`) - b.assertBinopScalarValArr(compute.Add, 3, `[null, 2]`, `[null, 5]`) - b.assertBinopScalarArr(compute.Add, b.makeNullScalar(), `[1, 2]`, `[null, null]`) - b.assertBinopScalarArr(compute.Add, b.makeNullScalar(), `[null, 2]`, `[null, null]`) - // scalar on the right - b.assertBinopArrScalarVal(compute.Add, `[1, 2]`, 3, `[4, 5]`) - b.assertBinopArrScalarVal(compute.Add, `[null, 2]`, 3, `[null, 5]`) - b.assertBinopArrScalar(compute.Add, `[1, 2]`, b.makeNullScalar(), `[null, null]`) - b.assertBinopArrScalar(compute.Add, `[null, 2]`, b.makeNullScalar(), `[null, null]`) - - if !arrow.IsFloating(b.DataType().ID()) && overflow { - val := fmt.Sprintf("[%v]", b.max) - b.assertBinopErr(compute.Add, val, val, "overflow") - } - }) - } - }) -} - -func (b *BinaryArithmeticSuite[T]) TestSub() { - b.Run(b.DataType().String(), func() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { - b.setOverflowCheck(overflow) - - b.assertBinop(compute.Subtract, `[]`, `[]`, `[]`) - b.assertBinop(compute.Subtract, `[3, 2, 6]`, `[1, 0, 2]`, `[2, 2, 4]`) - // nulls on one side - b.assertBinop(compute.Subtract, `[null, 4, null]`, `[2, 1, 0]`, `[null, 3, null]`) - b.assertBinop(compute.Subtract, `[3, 4, 5]`, `[null, 1, null]`, `[null, 3, null]`) - // nulls on both sides - b.assertBinop(compute.Subtract, `[null, 4, 3]`, `[2, 1, null]`, `[null, 3, null]`) - // all nulls - b.assertBinop(compute.Subtract, `[null]`, `[null]`, `[null]`) - - // scalar on the left - b.assertBinopScalarValArr(compute.Subtract, 3, `[1, 2]`, `[2, 1]`) - b.assertBinopScalarValArr(compute.Subtract, 3, `[null, 2]`, `[null, 1]`) - b.assertBinopScalarArr(compute.Subtract, b.makeNullScalar(), `[1, 2]`, `[null, null]`) - b.assertBinopScalarArr(compute.Subtract, b.makeNullScalar(), `[null, 2]`, `[null, null]`) - // scalar on the right - b.assertBinopArrScalarVal(compute.Subtract, `[4, 5]`, 3, `[1, 2]`) - b.assertBinopArrScalarVal(compute.Subtract, `[null, 5]`, 3, `[null, 2]`) - b.assertBinopArrScalar(compute.Subtract, `[1, 2]`, b.makeNullScalar(), `[null, null]`) - b.assertBinopArrScalar(compute.Subtract, `[null, 2]`, b.makeNullScalar(), `[null, null]`) - - if !arrow.IsFloating(b.DataType().ID()) && overflow { - b.assertBinopErr(compute.Subtract, fmt.Sprintf("[%v]", b.min), fmt.Sprintf("[%v]", b.max), "overflow") - } - }) - } - }) -} - -func (b *BinaryArithmeticSuite[T]) TestMultiply() { - b.Run(b.DataType().String(), func() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { - b.setOverflowCheck(overflow) - - b.assertBinop(compute.Multiply, `[]`, `[]`, `[]`) - b.assertBinop(compute.Multiply, `[3, 2, 6]`, `[1, 0, 2]`, `[3, 0, 12]`) - // nulls on one side - b.assertBinop(compute.Multiply, `[null, 2, null]`, `[4, 5, 6]`, `[null, 10, null]`) - b.assertBinop(compute.Multiply, `[4, 5, 6]`, `[null, 2, null]`, `[null, 10, null]`) - // nulls on both sides - b.assertBinop(compute.Multiply, `[null, 2, 3]`, `[4, 5, null]`, `[null, 10, null]`) - // all nulls - b.assertBinop(compute.Multiply, `[null]`, `[null]`, `[null]`) - - // scalar on left - b.assertBinopScalarValArr(compute.Multiply, 3, `[4, 5]`, `[12, 15]`) - b.assertBinopScalarValArr(compute.Multiply, 3, `[null, 5]`, `[null, 15]`) - b.assertBinopScalarArr(compute.Multiply, b.makeNullScalar(), `[1, 2]`, `[null, null]`) - b.assertBinopScalarArr(compute.Multiply, b.makeNullScalar(), `[null, 2]`, `[null, null]`) - // scalar on right - b.assertBinopArrScalarVal(compute.Multiply, `[4, 5]`, 3, `[12, 15]`) - b.assertBinopArrScalarVal(compute.Multiply, `[null, 5]`, 3, `[null, 15]`) - b.assertBinopArrScalar(compute.Multiply, `[1, 2]`, b.makeNullScalar(), `[null, null]`) - b.assertBinopArrScalar(compute.Multiply, `[null, 2]`, b.makeNullScalar(), `[null, null]`) - }) - } - }) -} - -func (b *BinaryArithmeticSuite[T]) TestDiv() { - b.Run(b.DataType().String(), func() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("no_overflow_check=%t", overflow), func() { - b.setOverflowCheck(overflow) - - // empty arrays - b.assertBinop(compute.Divide, `[]`, `[]`, `[]`) - // ordinary arrays - b.assertBinop(compute.Divide, `[3, 2, 6]`, `[1, 1, 2]`, `[3, 2, 3]`) - // with nulls - b.assertBinop(compute.Divide, `[null, 10, 30, null, 20]`, `[1, 5, 2, 5, 10]`, `[null, 2, 15, null, 2]`) - if !arrow.IsFloating(b.DataType().ID()) { - // scalar divided by array - b.assertBinopScalarValArr(compute.Divide, 33, `[null, 1, 3, null, 2]`, `[null, 33, 11, null, 16]`) - // array divided by scalar - b.assertBinopArrScalarVal(compute.Divide, `[null, 10, 30, null, 2]`, 3, `[null, 3, 10, null, 0]`) - // scalar divided by scalar - b.assertBinopScalars(compute.Divide, 16, 7, 2) - } else { - b.assertBinop(compute.Divide, `[3.4, 0.64, 1.28]`, `[1, 2, 4]`, `[3.4, 0.32, 0.32]`) - b.assertBinop(compute.Divide, `[null, 1, 3.3, null, 2]`, `[1, 4, 2, 5, 0.1]`, `[null, 0.25, 1.65, null, 20]`) - b.assertBinopScalarValArr(compute.Divide, 10, `[null, 1, 2.5, null, 2, 5]`, `[null, 10, 4, null, 5, 2]`) - b.assertBinopArrScalarVal(compute.Divide, `[null, 1, 2.5, null, 2, 5]`, 10, `[null, 0.1, 0.25, null, 0.2, 0.5]`) - - b.assertBinop(compute.Divide, `[3.4, "Inf", "-Inf"]`, `[1, 2, 3]`, `[3.4, "Inf", "-Inf"]`) - b.setNansEqual(true) - b.assertBinop(compute.Divide, `[3.4, "NaN", 2.0]`, `[1, 2, 2.0]`, `[3.4, "NaN", 1.0]`) - b.assertBinopScalars(compute.Divide, 21, 3, 7) - } - }) - } - }) -} - -func (b *BinaryArithmeticSuite[T]) TestDivideByZero() { - if !arrow.IsFloating(b.DataType().ID()) { - for _, checkOverflow := range []bool{false, true} { - b.setOverflowCheck(checkOverflow) - b.assertBinopErr(compute.Divide, `[3, 2, 6]`, `[1, 1, 0]`, "divide by zero") - } - } else { - b.setOverflowCheck(true) - b.assertBinopErr(compute.Divide, `[3, 2, 6]`, `[1, 1, 0]`, "divide by zero") - b.assertBinopErr(compute.Divide, `[3, 2, 0]`, `[1, 1, 0]`, "divide by zero") - b.assertBinopErr(compute.Divide, `[3, 2, -6]`, `[1, 1, 0]`, "divide by zero") - - b.setOverflowCheck(false) - b.setNansEqual(true) - b.assertBinop(compute.Divide, `[3, 2, 6]`, `[1, 1, 0]`, `[3, 2, "Inf"]`) - b.assertBinop(compute.Divide, `[3, 2, 0]`, `[1, 1, 0]`, `[3, 2, "NaN"]`) - b.assertBinop(compute.Divide, `[3, 2, -6]`, `[1, 1, 0]`, `[3, 2, "-Inf"]`) - } -} - -func (b *BinaryArithmeticSuite[T]) TestPower() { - b.setNansEqual(true) - b.Run(b.DataType().String(), func() { - for _, checkOverflow := range []bool{false, true} { - b.Run(fmt.Sprintf("checkOverflow=%t", checkOverflow), func() { - b.setOverflowCheck(checkOverflow) - - b.assertBinop(compute.Power, `[]`, `[]`, `[]`) - if !arrow.IsFloating(b.DataType().ID()) { - b.assertBinop(compute.Power, `[3, 2, 6, 2]`, `[1, 1, 2, 0]`, `[3, 2, 36, 1]`) - b.assertBinop(compute.Power, `[null, 2, 3, null, 20]`, `[1, 6, 2, 5, 1]`, `[null, 64, 9, null, 20]`) - b.assertBinopScalarValArr(compute.Power, 3, `[null, 3, 4, null, 2]`, `[null, 27, 81, null, 9]`) - b.assertBinopArrScalarVal(compute.Power, `[null, 10, 3, null, 2]`, 2, `[null, 100, 9, null, 4]`) - b.assertBinopScalars(compute.Power, 4, 3, 64) - b.assertBinop(compute.Power, `[0, 1, 0]`, `[0, 0, 42]`, `[1, 1, 0]`) - - if checkOverflow { - b.assertBinopErr(compute.Power, fmt.Sprintf("[%v]", b.max), `[10]`, "overflow") - } else { - b.assertBinopScalars(compute.Power, b.max, 10, 1) - } - } else { - b.assertBinop(compute.Power, `[3.4, 16, 0.64, 1.2, 0]`, `[1, 0.5, 2, 4, 0]`, `[3.4, 4, 0.4096, 2.0736, 1]`) - b.assertBinop(compute.Power, `[null, 1, 3.3, null, 2]`, `[1, 4, 2, 5, 0.1]`, `[null, 1, 10.89, null, 1.07177346]`) - b.assertBinopScalarValArr(compute.Power, 10, `[null, 1, 2.5, null, 2, 5]`, `[null, 10, 316.227766017, null, 100, 100000]`) - b.assertBinopArrScalarVal(compute.Power, `[null, 1, 2.5, null, 2, 5]`, 10, `[null, 1, 9536.74316406, null, 1024, 9765625]`) - b.assertBinop(compute.Power, `[3.4, "Inf", "-Inf", 1.1, 10000]`, `[1, 2, 3, "Inf", 100000]`, `[3.4, "Inf", "-Inf", "Inf", "Inf"]`) - b.assertBinop(compute.Power, `[3.4, "NaN", 2.0]`, `[1, 2, 2.0]`, `[3.4, "NaN", 4.0]`) - b.assertBinop(compute.Power, `[0.0, 0.0]`, `[-1.0, -3.0]`, `["Inf", "Inf"]`) - } - }) - } - }) -} - -type BinaryFloatingArithmeticSuite[T constraints.Float] struct { - BinaryArithmeticSuite[T] - - smallest T -} - -func (bs *BinaryFloatingArithmeticSuite[T]) TestTrigAtan2() { - bs.setNansEqual(true) - atan2 := func(ctx context.Context, _ compute.ArithmeticOptions, x, y compute.Datum) (compute.Datum, error) { - return compute.Atan2(ctx, x, y) - } - - bs.assertBinop(atan2, `[]`, `[]`, `[]`) - bs.assertBinop(atan2, `[0, 0, null, "NaN"]`, `[null, "NaN", 0, 0]`, `[null, "NaN", null, "NaN"]`) - bs.assertBinop(atan2, `[0, 0, -0.0, 0, -0.0, 0, 1, 0, -1, "Inf", "-Inf", 0, 0]`, - `[0, 0, 0, -0.0, -0.0, 1, 0, -1, 0, 0, 0, "Inf", "-Inf"]`, - fmt.Sprintf("[0, 0, -0.0, %f, %f, 0, %f, %f, %f, %f, %f, 0, %f]", - math.Pi, -math.Pi, math.Pi/2, math.Pi, -math.Pi/2, math.Pi/2, -math.Pi/2, math.Pi)) -} - -func (bs *BinaryFloatingArithmeticSuite[T]) TestLog() { - bs.setNansEqual(true) - for _, overflow := range []bool{false, true} { - bs.setOverflowCheck(overflow) - bs.assertBinop(compute.Logb, `[1, 10, null, "NaN", "Inf"]`, `[100, 10, null, 2, 10]`, - `[0, 1, null, "NaN", "Inf"]`) - bs.assertBinopScalars(compute.Logb, bs.smallest, 10, T(math.Log(float64(bs.smallest))/math.Log(10))) - bs.assertBinopScalars(compute.Logb, bs.max, 10, T(math.Log(float64(bs.max))/math.Log(10))) - } - - bs.setOverflowCheck(true) - bs.assertBinop(compute.Logb, `[1, 10, null]`, `[10, 10, null]`, `[0, 1, null]`) - bs.assertBinop(compute.Logb, `[1, 2, null]`, `[2, 2, null]`, `[0, 1, null]`) - bs.assertBinopArrScalarVal(compute.Logb, `[10, 100, 1000, null]`, 10, `[1, 2, 3, null]`) - bs.assertBinopArrScalarVal(compute.Logb, `[1, 2, 4, 8]`, 0.25, `[-0.0, -0.5, -1.0, -1.5]`) - - bs.setOverflowCheck(false) - bs.assertBinopArrScalarVal(compute.Logb, `["-Inf", -1, 0, "Inf"]`, 10, `["NaN", "NaN", "-Inf", "Inf"]`) - bs.assertBinopArrScalarVal(compute.Logb, `["-Inf", -1, 0, "Inf"]`, 2, `["NaN", "NaN", "-Inf", "Inf"]`) - bs.assertBinop(compute.Logb, `["-Inf", -1, 0, "Inf"]`, `[2, 10, 0, 0]`, `["NaN", "NaN", "NaN", "NaN"]`) - bs.assertBinopArrScalarVal(compute.Logb, `["-Inf", -1, 0, "Inf"]`, 0, `["NaN", "NaN", "NaN", "NaN"]`) - bs.assertBinopArrScalarVal(compute.Logb, `["-Inf", -2, -1, "Inf"]`, 2, `["NaN", "NaN", "NaN", "Inf"]`) - - bs.setOverflowCheck(true) - bs.assertBinopErr(compute.Logb, `[0]`, `[2]`, "logarithm of zero") - bs.assertBinopErr(compute.Logb, `[2]`, `[0]`, "logarithm of zero") - bs.assertBinopErr(compute.Logb, `[-1]`, `[2]`, "logarithm of negative number") - bs.assertBinopErr(compute.Logb, `["-Inf"]`, `[2]`, "logarithm of negative number") -} - -type BinaryIntegralArithmeticSuite[T arrow.IntType | arrow.UintType] struct { - BinaryArithmeticSuite[T] -} - -func (b *BinaryIntegralArithmeticSuite[T]) TestShiftLeft() { - b.Run(b.DataType().String(), func() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("check_overflow=%t", overflow), func() { - b.setOverflowCheck(overflow) - - b.assertBinop(compute.ShiftLeft, `[]`, `[]`, `[]`) - b.assertBinop(compute.ShiftLeft, `[0, 1, 2, 3]`, `[2, 3, 4, 5]`, `[0, 8, 32, 96]`) - b.assertBinop(compute.ShiftLeft, `[0, null, 2, 3]`, `[2, 3, 4, 5]`, `[0, null, 32, 96]`) - b.assertBinop(compute.ShiftLeft, `[0, 1, 2, 3]`, `[2, 3, null, 5]`, `[0, 8, null, 96]`) - b.assertBinop(compute.ShiftLeft, `[0, null, 2, 3]`, `[2, 3, null, 5]`, `[0, null, null, 96]`) - b.assertBinop(compute.ShiftLeft, `[null]`, `[null]`, `[null]`) - b.assertBinopScalarValArr(compute.ShiftLeft, 2, `[null, 5]`, `[null, 64]`) - b.assertBinopScalarArr(compute.ShiftLeft, b.makeNullScalar(), `[null, 5]`, `[null, null]`) - b.assertBinopArrScalarVal(compute.ShiftLeft, `[null, 5]`, 3, `[null, 40]`) - b.assertBinopArrScalar(compute.ShiftLeft, `[null, 5]`, b.makeNullScalar(), `[null, null]`) - }) - } - }) -} - -func (b *BinaryIntegralArithmeticSuite[T]) TestShiftRight() { - b.Run(b.DataType().String(), func() { - for _, overflow := range []bool{false, true} { - b.Run(fmt.Sprintf("check_overflow=%t", overflow), func() { - b.setOverflowCheck(overflow) - - b.assertBinop(compute.ShiftRight, `[]`, `[]`, `[]`) - b.assertBinop(compute.ShiftRight, `[0, 1, 4, 8]`, `[1, 1, 1, 4]`, `[0, 0, 2, 0]`) - b.assertBinop(compute.ShiftRight, `[0, null, 4, 8]`, `[1, 1, 1, 4]`, `[0, null, 2, 0]`) - b.assertBinop(compute.ShiftRight, `[0, 1, 4, 8]`, `[1, 1, null, 4]`, `[0, 0, null, 0]`) - b.assertBinop(compute.ShiftRight, `[0, null, 4, 8]`, `[1, 1, null, 4]`, `[0, null, null, 0]`) - b.assertBinop(compute.ShiftRight, `[null]`, `[null]`, `[null]`) - b.assertBinopScalarValArr(compute.ShiftRight, 64, `[null, 2, 6]`, `[null, 16, 1]`) - b.assertBinopScalarArr(compute.ShiftRight, b.makeNullScalar(), `[null, 2, 6]`, `[null, null, null]`) - b.assertBinopArrScalarVal(compute.ShiftRight, `[null, 3, 96]`, 3, `[null, 0, 12]`) - b.assertBinopArrScalar(compute.ShiftRight, `[null, 3, 96]`, b.makeNullScalar(), `[null, null, null]`) - }) - } - }) -} - -func (b *BinaryIntegralArithmeticSuite[T]) TestShiftLeftOverflowError() { - b.Run(b.DataType().String(), func() { - bitWidth := b.DataType().(arrow.FixedWidthDataType).BitWidth() - if !arrow.IsUnsignedInteger(b.DataType().ID()) { - bitWidth-- - } - - b.setOverflowCheck(true) - b.assertBinop(compute.ShiftLeft, `[1]`, fmt.Sprintf("[%d]", bitWidth-1), - fmt.Sprintf("[%d]", T(1)<<(bitWidth-1))) - b.assertBinop(compute.ShiftLeft, `[2]`, fmt.Sprintf("[%d]", bitWidth-2), - fmt.Sprintf("[%d]", T(1)<<(bitWidth-1))) - if arrow.IsUnsignedInteger(b.DataType().ID()) { - b.assertBinop(compute.ShiftLeft, `[2]`, fmt.Sprintf("[%d]", bitWidth-1), `[0]`) - b.assertBinop(compute.ShiftLeft, `[4]`, fmt.Sprintf("[%d]", bitWidth-1), `[0]`) - b.assertBinopErr(compute.ShiftLeft, `[1]`, fmt.Sprintf("[%d]", bitWidth), "shift amount must be >= 0 and less than precision of type") - } else { - // shift a bit into the sign bit - b.assertBinop(compute.ShiftLeft, `[2]`, fmt.Sprintf("[%d]", bitWidth-1), - fmt.Sprintf("[%d]", b.min)) - // shift a bit past the sign bit - b.assertBinop(compute.ShiftLeft, `[4]`, fmt.Sprintf("[%d]", bitWidth-1), `[0]`) - b.assertBinop(compute.ShiftLeft, fmt.Sprintf("[%d]", b.min), `[1]`, `[0]`) - b.assertBinopErr(compute.ShiftLeft, `[1, 2]`, `[1, -1]`, "shift amount must be >= 0 and less than precision of type") - b.assertBinopErr(compute.ShiftLeft, `[1]`, fmt.Sprintf("[%d]", bitWidth), "shift amount must be >= 0 and less than precision of type") - - b.setOverflowCheck(false) - b.assertBinop(compute.ShiftLeft, `[1, 1]`, fmt.Sprintf("[-1, %d]", bitWidth), `[1, 1]`) - } - }) -} - -func (b *BinaryIntegralArithmeticSuite[T]) TestShiftRightOverflowError() { - b.Run(b.DataType().String(), func() { - bitWidth := b.DataType().(arrow.FixedWidthDataType).BitWidth() - if !arrow.IsUnsignedInteger(b.DataType().ID()) { - bitWidth-- - } - - b.setOverflowCheck(true) - - b.assertBinop(compute.ShiftRight, fmt.Sprintf("[%d]", b.max), fmt.Sprintf("[%d]", bitWidth-1), `[1]`) - if arrow.IsUnsignedInteger(b.DataType().ID()) { - b.assertBinopErr(compute.ShiftRight, `[1]`, fmt.Sprintf("[%d]", bitWidth), "shift amount must be >= 0 and less than precision of type") - } else { - b.assertBinop(compute.ShiftRight, `[-1, -1]`, `[1, 5]`, `[-1, -1]`) - b.assertBinop(compute.ShiftRight, fmt.Sprintf("[%d]", b.min), `[1]`, fmt.Sprintf("[%d]", b.min/2)) - - b.assertBinopErr(compute.ShiftRight, `[1, 2]`, `[1, -1]`, "shift amount must be >= 0 and less than precision of type") - b.assertBinopErr(compute.ShiftRight, `[1]`, fmt.Sprintf("[%d]", bitWidth), "shift amount must be >= 0 and less than precision of type") - - b.setOverflowCheck(false) - b.assertBinop(compute.ShiftRight, `[1, 1]`, fmt.Sprintf("[-1, %d]", bitWidth), `[1, 1]`) - } - }) -} - -func (b *BinaryIntegralArithmeticSuite[T]) TestTrig() { - // integer arguments promoted to float64, sanity check here - ty := b.DataType() - b.setNansEqual(true) - atan2 := func(ctx context.Context, _ compute.ArithmeticOptions, x, y compute.Datum) (compute.Datum, error) { - return compute.Atan2(ctx, x, y) - } - - lhs, rhs := b.getArr(ty, `[0, 1]`), b.getArr(ty, `[1, 0]`) - defer lhs.Release() - defer rhs.Release() - exp := b.getArr(arrow.PrimitiveTypes.Float64, fmt.Sprintf(`[0, %f]`, math.Pi/2)) - defer exp.Release() - - b.assertBinopArrs(atan2, lhs, rhs, exp) -} - -func (b *BinaryIntegralArithmeticSuite[T]) TestLog() { - // integer arguments promoted to double, sanity check here - exp1 := b.getArr(arrow.PrimitiveTypes.Float64, `[0, 1, null]`) - exp2 := b.getArr(arrow.PrimitiveTypes.Float64, `[1, 2, null]`) - defer exp1.Release() - defer exp2.Release() - - b.assertBinopExpArr(compute.Logb, `[1, 10, null]`, `[10, 10, null]`, exp1) - b.assertBinopExpArr(compute.Logb, `[1, 2, null]`, `[2, 2, null]`, exp1) - b.assertBinopArrScalarExpArr(compute.Logb, `[10, 100, null]`, scalar.MakeScalar(T(10)), exp2) -} - -func TestBinaryArithmetic(t *testing.T) { - suite.Run(t, &BinaryIntegralArithmeticSuite[int8]{BinaryArithmeticSuite[int8]{min: math.MinInt8, max: math.MaxInt8}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[uint8]{BinaryArithmeticSuite[uint8]{min: 0, max: math.MaxUint8}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[int16]{BinaryArithmeticSuite[int16]{min: math.MinInt16, max: math.MaxInt16}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[uint16]{BinaryArithmeticSuite[uint16]{min: 0, max: math.MaxUint16}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[int32]{BinaryArithmeticSuite[int32]{min: math.MinInt32, max: math.MaxInt32}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[uint32]{BinaryArithmeticSuite[uint32]{min: 0, max: math.MaxUint32}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[int64]{BinaryArithmeticSuite[int64]{min: math.MinInt64, max: math.MaxInt64}}) - suite.Run(t, &BinaryIntegralArithmeticSuite[uint64]{BinaryArithmeticSuite[uint64]{min: 0, max: math.MaxUint64}}) - suite.Run(t, &BinaryFloatingArithmeticSuite[float32]{BinaryArithmeticSuite[float32]{min: -math.MaxFloat32, max: math.MaxFloat32}, math.SmallestNonzeroFloat32}) - suite.Run(t, &BinaryFloatingArithmeticSuite[float64]{BinaryArithmeticSuite[float64]{min: -math.MaxFloat64, max: math.MaxFloat64}, math.SmallestNonzeroFloat64}) - suite.Run(t, new(Float16BinaryFuncTestSuite)) - suite.Run(t, new(DecimalBinaryArithmeticSuite)) - suite.Run(t, new(ScalarBinaryTemporalArithmeticSuite)) -} - -func TestBinaryArithmeticDispatchBest(t *testing.T) { - for _, name := range []string{"add", "sub", "multiply", "divide", "power"} { - for _, suffix := range []string{"", "_unchecked"} { - name += suffix - t.Run(name, func(t *testing.T) { - - tests := []struct { - left, right arrow.DataType - expected arrow.DataType - }{ - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.Null, arrow.PrimitiveTypes.Int32}, - {arrow.Null, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int64}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Int32}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int64}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Uint64, arrow.PrimitiveTypes.Int64}, - {arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Uint8}, - {arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Uint16, arrow.PrimitiveTypes.Uint16}, - {arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float32}, - {arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Float32}, - {arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float64}, - {&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.PrimitiveTypes.Float64}, - arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}, - {&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.PrimitiveTypes.Float64}, - arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Float64}, - } - - for _, tt := range tests { - CheckDispatchBest(t, name, []arrow.DataType{tt.left, tt.right}, []arrow.DataType{tt.expected, tt.expected}) - } - }) - } - } -} - -type DecimalArithmeticSuite struct { - BinaryFuncTestSuite -} - -func (*DecimalArithmeticSuite) positiveScales() []arrow.DataType { - return []arrow.DataType{ - &arrow.Decimal128Type{Precision: 4, Scale: 2}, - &arrow.Decimal256Type{Precision: 4, Scale: 2}, - &arrow.Decimal128Type{Precision: 38, Scale: 2}, - &arrow.Decimal256Type{Precision: 76, Scale: 2}, - } -} - -func (*DecimalArithmeticSuite) negativeScales() []arrow.DataType { - return []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: -2}, - &arrow.Decimal256Type{Precision: 2, Scale: -2}, - } -} - -func (ds *DecimalArithmeticSuite) checkDecimalToFloat(fn string, args []compute.Datum) { - // validate that fn(*decimals) is the same as - // fn([cast(x, float64) x for x in decimals]) - - newArgs := make([]compute.Datum, len(args)) - for i, arg := range args { - if arrow.IsDecimal(arg.(compute.ArrayLikeDatum).Type().ID()) { - casted, err := compute.CastDatum(ds.ctx, arg, compute.NewCastOptions(arrow.PrimitiveTypes.Float64, true)) - ds.Require().NoError(err) - defer casted.Release() - newArgs[i] = casted - } else { - newArgs[i] = arg - } - } - - expected, err := compute.CallFunction(ds.ctx, fn, nil, newArgs...) - ds.Require().NoError(err) - defer expected.Release() - actual, err := compute.CallFunction(ds.ctx, fn, nil, args...) - ds.Require().NoError(err) - defer actual.Release() - - assertDatumsEqual(ds.T(), expected, actual, []array.EqualOption{array.WithNaNsEqual(true)}, []scalar.EqualOption{scalar.WithNaNsEqual(true)}) -} - -func (ds *DecimalArithmeticSuite) checkFail(fn string, args []compute.Datum, substr string, opts compute.FunctionOptions) { - _, err := compute.CallFunction(ds.ctx, fn, opts, args...) - ds.ErrorIs(err, arrow.ErrInvalid) - ds.ErrorContains(err, substr) -} - -func (ds *DecimalArithmeticSuite) decimalArrayFromJSON(ty arrow.DataType, str string) arrow.Array { - arr, _, err := array.FromJSON(ds.mem, ty, strings.NewReader(str)) - ds.Require().NoError(err) - return arr -} - -type DecimalBinaryArithmeticSuite struct { - DecimalArithmeticSuite -} - -func (ds *DecimalBinaryArithmeticSuite) TestDispatchBest() { - // decimal, floating point - ds.Run("dec/floatingpoint", func() { - for _, fn := range []string{"add", "sub", "multiply", "divide"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - ds.Run(fn, func() { - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 1, Scale: 0}, - arrow.PrimitiveTypes.Float32}, []arrow.DataType{ - arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float32}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 1, Scale: 0}, arrow.PrimitiveTypes.Float64}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - arrow.PrimitiveTypes.Float32, &arrow.Decimal256Type{Precision: 1, Scale: 0}}, - []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float32}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - arrow.PrimitiveTypes.Float64, &arrow.Decimal128Type{Precision: 1, Scale: 0}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - }) - } - } - }) - - // decimal, decimal => decimal - // decimal, integer => decimal - ds.Run("dec/dec_int", func() { - for _, fn := range []string{"add", "sub"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - ds.Run(fn, func() { - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - arrow.PrimitiveTypes.Int64, &arrow.Decimal128Type{Precision: 1, Scale: 0}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 19, Scale: 0}, - &arrow.Decimal128Type{Precision: 1, Scale: 0}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 1, Scale: 0}, arrow.PrimitiveTypes.Int64}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 1, Scale: 0}, - &arrow.Decimal128Type{Precision: 19, Scale: 0}}) - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 1}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 0}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 3, Scale: 1}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 0}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 1}, - &arrow.Decimal128Type{Precision: 3, Scale: 1}}) - }) - } - } - }) - - { - fn := "multiply" - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - ds.Run(fn, func() { - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - arrow.PrimitiveTypes.Int64, &arrow.Decimal128Type{Precision: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 19}, - &arrow.Decimal128Type{Precision: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 1}, arrow.PrimitiveTypes.Int64}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 1}, - &arrow.Decimal128Type{Precision: 19}}) - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 1}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 2, Scale: 1}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 0}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 0}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 0}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: 1}, - &arrow.Decimal128Type{Precision: 2, Scale: 0}}) - }) - } - } - - { - fn := "divide" - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - ds.Run(fn, func() { - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - arrow.PrimitiveTypes.Int64, &arrow.Decimal128Type{Precision: 1, Scale: 0}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 23, Scale: 4}, - &arrow.Decimal128Type{Precision: 1, Scale: 0}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 1, Scale: 0}, arrow.PrimitiveTypes.Int64}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 21, Scale: 20}, - &arrow.Decimal128Type{Precision: 19, Scale: 0}}) - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 6, Scale: 5}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 6, Scale: 5}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 6, Scale: 5}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal256Type{Precision: 6, Scale: 5}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}}) - - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 0}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 7, Scale: 5}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}}) - CheckDispatchBest(ds.T(), fn, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 0}}, - []arrow.DataType{&arrow.Decimal128Type{Precision: 5, Scale: 4}, - &arrow.Decimal128Type{Precision: 2, Scale: 0}}) - }) - } - } - - for _, name := range []string{"power", "power_unchecked", "atan2", "logb", "logb_unchecked"} { - ds.Run(name, func() { - CheckDispatchBest(ds.T(), name, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(ds.T(), name, []arrow.DataType{ - &arrow.Decimal256Type{Precision: 2, Scale: 1}, &arrow.Decimal256Type{Precision: 2, Scale: 1}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(ds.T(), name, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, arrow.PrimitiveTypes.Int64}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(ds.T(), name, []arrow.DataType{ - arrow.PrimitiveTypes.Int32, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(ds.T(), name, []arrow.DataType{ - &arrow.Decimal128Type{Precision: 2, Scale: 1}, arrow.PrimitiveTypes.Float64}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(ds.T(), name, []arrow.DataType{ - arrow.PrimitiveTypes.Float32, &arrow.Decimal128Type{Precision: 2, Scale: 1}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Float64}) - }) - } -} - -func (ds *DecimalBinaryArithmeticSuite) TestAddSubtractDec128() { - left, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 30, Scale: 3}, - strings.NewReader(`["1.000", "-123456789012345678901234567.890", "98765432109876543210.987", "-999999999999999999999999999.999"]`)) - defer left.Release() - right, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 20, Scale: 9}, - strings.NewReader(`["-1.000000000", "12345678901.234567890", "98765.432101234", "-99999999999.999999999"]`)) - defer right.Release() - added, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 37, Scale: 9}, - strings.NewReader(`["0.000000000", "-123456789012345666555555666.655432110", "98765432109876641976.419101234", "-1000000000000000099999999999.998999999"]`)) - defer added.Release() - subtracted, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 37, Scale: 9}, - strings.NewReader(`["2.000000000", "-123456789012345691246913469.124567890", "98765432109876444445.554898766", "-999999999999999899999999999.999000001"]`)) - defer subtracted.Release() - - leftDatum, rightDatum := &compute.ArrayDatum{Value: left.Data()}, &compute.ArrayDatum{Value: right.Data()} - checkScalarBinary(ds.T(), "add", leftDatum, rightDatum, &compute.ArrayDatum{Value: added.Data()}, nil) - checkScalarBinary(ds.T(), "sub", leftDatum, rightDatum, &compute.ArrayDatum{Value: subtracted.Data()}, nil) -} - -func (ds *DecimalBinaryArithmeticSuite) TestAddSubtractDec256() { - left, _, _ := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 30, Scale: 20}, - strings.NewReader(`[ - "-1.00000000000000000001", - "1234567890.12345678900000000000", - "-9876543210.09876543210987654321", - "9999999999.99999999999999999999" - ]`)) - defer left.Release() - right, _, _ := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 30, Scale: 10}, - strings.NewReader(`[ - "1.0000000000", - "-1234567890.1234567890", - "6789.5432101234", - "99999999999999999999.9999999999" - ]`)) - defer right.Release() - added, _, _ := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 41, Scale: 20}, - strings.NewReader(`[ - "-0.00000000000000000001", - "0.00000000000000000000", - "-9876536420.55555530870987654321", - "100000000009999999999.99999999989999999999" - ]`)) - defer added.Release() - subtracted, _, _ := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 41, Scale: 20}, - strings.NewReader(`[ - "-2.00000000000000000001", - "2469135780.24691357800000000000", - "-9876549999.64197555550987654321", - "-99999999989999999999.99999999990000000001" - ]`)) - defer subtracted.Release() - - leftDatum, rightDatum := &compute.ArrayDatum{Value: left.Data()}, &compute.ArrayDatum{Value: right.Data()} - checkScalarBinary(ds.T(), "add", leftDatum, rightDatum, &compute.ArrayDatum{Value: added.Data()}, nil) - checkScalarBinary(ds.T(), "sub", leftDatum, rightDatum, &compute.ArrayDatum{Value: subtracted.Data()}, nil) -} - -func (ds *DecimalBinaryArithmeticSuite) TestAddSubScalars() { - ds.Run("scalar_array", func() { - left := scalar.NewDecimal128Scalar(decimal128.New(0, 123456), &arrow.Decimal128Type{Precision: 6, Scale: 1}) - right, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 10, Scale: 3}, - strings.NewReader(`["1.234", "1234.000", "-9876.543", "666.888"]`)) - defer right.Release() - added, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 11, Scale: 3}, - strings.NewReader(`["12346.834", "13579.600", "2469.057", "13012.488"]`)) - defer added.Release() - leftSubRight, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 11, Scale: 3}, - strings.NewReader(`["12344.366", "11111.600", "22222.143", "11678.712"]`)) - defer leftSubRight.Release() - rightSubLeft, _, _ := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 11, Scale: 3}, - strings.NewReader(`["-12344.366", "-11111.600", "-22222.143", "-11678.712"]`)) - defer rightSubLeft.Release() - - rightDatum := &compute.ArrayDatum{right.Data()} - addedDatum := &compute.ArrayDatum{added.Data()} - checkScalarBinary(ds.T(), "add", compute.NewDatum(left), rightDatum, addedDatum, nil) - checkScalarBinary(ds.T(), "add", rightDatum, compute.NewDatum(left), addedDatum, nil) - checkScalarBinary(ds.T(), "sub", compute.NewDatum(left), rightDatum, &compute.ArrayDatum{leftSubRight.Data()}, nil) - checkScalarBinary(ds.T(), "sub", rightDatum, compute.NewDatum(left), &compute.ArrayDatum{rightSubLeft.Data()}, nil) - }) - - ds.Run("scalar_scalar", func() { - left := scalar.NewDecimal256Scalar(decimal256.FromU64(666), &arrow.Decimal256Type{Precision: 3}) - right := scalar.NewDecimal256Scalar(decimal256.FromU64(888), &arrow.Decimal256Type{Precision: 3}) - added := scalar.NewDecimal256Scalar(decimal256.FromU64(1554), &arrow.Decimal256Type{Precision: 4}) - subtracted := scalar.NewDecimal256Scalar(decimal256.FromI64(-222), &arrow.Decimal256Type{Precision: 4}) - checkScalarBinary(ds.T(), "add", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(added), nil) - checkScalarBinary(ds.T(), "sub", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(subtracted), nil) - }) - - ds.Run("dec128_dec256", func() { - left := scalar.NewDecimal128Scalar(decimal128.FromU64(666), &arrow.Decimal128Type{Precision: 3}) - right := scalar.NewDecimal256Scalar(decimal256.FromU64(888), &arrow.Decimal256Type{Precision: 3}) - added := scalar.NewDecimal256Scalar(decimal256.FromU64(1554), &arrow.Decimal256Type{Precision: 4}) - checkScalarBinary(ds.T(), "add", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(added), nil) - checkScalarBinary(ds.T(), "add", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(added), nil) - }) - - ds.Run("decimal_float", func() { - left := scalar.NewDecimal128Scalar(decimal128.FromU64(666), &arrow.Decimal128Type{Precision: 3}) - right := scalar.MakeScalar(float64(888)) - added := scalar.MakeScalar(float64(1554)) - checkScalarBinary(ds.T(), "add", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(added), nil) - checkScalarBinary(ds.T(), "add", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(added), nil) - }) - - ds.Run("decimal_integer", func() { - left := scalar.NewDecimal128Scalar(decimal128.FromU64(666), &arrow.Decimal128Type{Precision: 3}) - right := scalar.MakeScalar(int64(888)) - added := scalar.NewDecimal128Scalar(decimal128.FromU64(1554), &arrow.Decimal128Type{Precision: 20}) - subtracted := scalar.NewDecimal128Scalar(decimal128.FromI64(-222), &arrow.Decimal128Type{Precision: 20}) - checkScalarBinary(ds.T(), "add", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(added), nil) - checkScalarBinary(ds.T(), "sub", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(subtracted), nil) - }) -} - -func (ds *DecimalBinaryArithmeticSuite) TestMultiply() { - ds.Run("array x array, decimal128", func() { - left, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 20, Scale: 10}, - strings.NewReader(`["1234567890.1234567890", "-0.0000000001", "-9999999999.9999999999"]`)) - ds.Require().NoError(err) - defer left.Release() - right, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 13, Scale: 3}, - strings.NewReader(`["1234567890.123", "0.001", "-9999999999.999"]`)) - ds.Require().NoError(err) - defer right.Release() - expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 34, Scale: 13}, - strings.NewReader(`["1524157875323319737.98709039504701", "-0.0000000000001", "99999999999989999999.0000000000001"]`)) - ds.Require().NoError(err) - defer expected.Release() - - checkScalarBinary(ds.T(), "multiply_unchecked", &compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}, &compute.ArrayDatum{expected.Data()}, nil) - }) - - ds.Run("array x array decimal256", func() { - left, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 30, Scale: 3}, - strings.NewReader(`["123456789012345678901234567.890", "0.000"]`)) - ds.Require().NoError(err) - defer left.Release() - right, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 20, Scale: 9}, - strings.NewReader(`["-12345678901.234567890", "99999999999.999999999"]`)) - ds.Require().NoError(err) - defer right.Release() - expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 51, Scale: 12}, - strings.NewReader(`["-1524157875323883675034293577501905199.875019052100", "0.000000000000"]`)) - ds.Require().NoError(err) - defer expected.Release() - checkScalarBinary(ds.T(), "multiply_unchecked", &compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}, &compute.ArrayDatum{expected.Data()}, nil) - }) - - ds.Run("scalar x array", func() { - left, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3, Scale: 2}, "3.14") - ds.Require().NoError(err) - right, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 1, Scale: 0}, - strings.NewReader(`["1", "2", "3", "4", "5"]`)) - ds.Require().NoError(err) - defer right.Release() - expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 5, Scale: 2}, - strings.NewReader(`["3.14", "6.28", "9.42", "12.56", "15.70"]`)) - ds.Require().NoError(err) - defer expected.Release() - - leftDatum, rightDatum := &compute.ScalarDatum{left}, &compute.ArrayDatum{right.Data()} - expDatum := &compute.ArrayDatum{expected.Data()} - - checkScalarBinary(ds.T(), "multiply_unchecked", leftDatum, rightDatum, expDatum, nil) - checkScalarBinary(ds.T(), "multiply_unchecked", rightDatum, leftDatum, expDatum, nil) - }) - - ds.Run("scalar x scalar", func() { - left, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 1}, "1") - ds.Require().NoError(err) - right, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 1}, "1") - ds.Require().NoError(err) - expected, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "1") - ds.Require().NoError(err) - checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) - }) - - ds.Run("decimal128 x decimal256", func() { - left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3, Scale: 2}, "6.66") - right, _ := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 3, Scale: 1}, "88.8") - expected, _ := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 7, Scale: 3}, "591.408") - checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) - checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(expected), nil) - }) - - ds.Run("decimal x float", func() { - left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "666") - right := scalar.MakeScalar(float64(888)) - expected := scalar.MakeScalar(float64(591408)) - checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) - checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(expected), nil) - }) - - ds.Run("decimal x integer", func() { - left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "666") - right := scalar.MakeScalar(int64(888)) - expected, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 23}, "591408") - checkScalarBinary(ds.T(), "multiply_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) - }) -} - -func (ds *DecimalBinaryArithmeticSuite) TestDivide() { - ds.Run("array / array, decimal128", func() { - left, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 13, Scale: 3}, - strings.NewReader(`["1234567890.123", "0.001"]`)) - ds.Require().NoError(err) - defer left.Release() - right, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 3, Scale: 0}, - strings.NewReader(`["-987", "999"]`)) - ds.Require().NoError(err) - defer right.Release() - expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 17, Scale: 7}, - strings.NewReader(`["-1250828.6627386", "0.0000010"]`)) - ds.Require().NoError(err) - defer expected.Release() - - checkScalarBinary(ds.T(), "divide_unchecked", &compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}, &compute.ArrayDatum{expected.Data()}, nil) - }) - - ds.Run("array / array decimal256", func() { - left, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 20, Scale: 10}, - strings.NewReader(`["1234567890.1234567890", "9999999999.9999999999"]`)) - ds.Require().NoError(err) - defer left.Release() - right, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 13, Scale: 3}, - strings.NewReader(`["1234567890.123", "0.001"]`)) - ds.Require().NoError(err) - defer right.Release() - expected, _, err := array.FromJSON(ds.mem, &arrow.Decimal256Type{Precision: 34, Scale: 21}, - strings.NewReader(`["1.000000000000369999093", "9999999999999.999999900000000000000"]`)) - ds.Require().NoError(err) - defer expected.Release() - checkScalarBinary(ds.T(), "divide_unchecked", &compute.ArrayDatum{left.Data()}, &compute.ArrayDatum{right.Data()}, &compute.ArrayDatum{expected.Data()}, nil) - }) - - ds.Run("scalar / array", func() { - left, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 1, Scale: 0}, "1") - ds.Require().NoError(err) - right, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 1, Scale: 0}, - strings.NewReader(`["1", "2", "3", "4"]`)) - ds.Require().NoError(err) - defer right.Release() - leftDivRight, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 5, Scale: 4}, - strings.NewReader(`["1.0000", "0.5000", "0.3333", "0.2500"]`)) - ds.Require().NoError(err) - defer leftDivRight.Release() - rightDivLeft, _, err := array.FromJSON(ds.mem, &arrow.Decimal128Type{Precision: 5, Scale: 4}, - strings.NewReader(`["1.0000", "2.0000", "3.0000", "4.0000"]`)) - ds.Require().NoError(err) - defer rightDivLeft.Release() - - leftDatum, rightDatum := &compute.ScalarDatum{left}, &compute.ArrayDatum{right.Data()} - - checkScalarBinary(ds.T(), "divide_unchecked", leftDatum, rightDatum, &compute.ArrayDatum{leftDivRight.Data()}, nil) - checkScalarBinary(ds.T(), "divide_unchecked", rightDatum, leftDatum, &compute.ArrayDatum{rightDivLeft.Data()}, nil) - }) - - ds.Run("scalar / scalar", func() { - left, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 6, Scale: 5}, "2.71828") - ds.Require().NoError(err) - right, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 6, Scale: 5}, "3.14159") - ds.Require().NoError(err) - expected, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 13, Scale: 7}, "0.8652561") - ds.Require().NoError(err) - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(expected), nil) - }) - - ds.Run("decimal128 / decimal256", func() { - left, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 6, Scale: 5}, "2.71828") - ds.Require().NoError(err) - right, err := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 6, Scale: 5}, "3.14159") - ds.Require().NoError(err) - leftDivRight, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 13, Scale: 7}, "0.8652561") - ds.Require().NoError(err) - rightDivLeft, err := scalar.ParseScalar(&arrow.Decimal256Type{Precision: 13, Scale: 7}, "1.1557271") - ds.Require().NoError(err) - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(leftDivRight), nil) - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(rightDivLeft), nil) - }) - - ds.Run("decimal / float", func() { - left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "100") - right := scalar.MakeScalar(float64(50)) - leftDivRight := scalar.MakeScalar(float64(2)) - rightDivLeft := scalar.MakeScalar(float64(0.5)) - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(leftDivRight), nil) - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(rightDivLeft), nil) - }) - - ds.Run("decimal / integer", func() { - left, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 3}, "100") - right := scalar.MakeScalar(int64(50)) - leftDivRight, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 23, Scale: 20}, "2.0000000000000000000") - rightDivLeft, _ := scalar.ParseScalar(&arrow.Decimal128Type{Precision: 23, Scale: 4}, "0.5000") - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(left), compute.NewDatum(right), compute.NewDatum(leftDivRight), nil) - checkScalarBinary(ds.T(), "divide_unchecked", compute.NewDatum(right), compute.NewDatum(left), compute.NewDatum(rightDivLeft), nil) - }) -} - -func (ds *DecimalBinaryArithmeticSuite) TestAtan2() { - // decimal arguments get promoted to float64, sanity check here - fn := "atan2" - for _, ty := range ds.positiveScales() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}, &compute.ArrayDatum{empty.Data()}}) - - larr := ds.getArr(ty, `["1.00", "10.00", "1.00", "2.00", null]`) - defer larr.Release() - - ldatum := &compute.ArrayDatum{larr.Data()} - - test := ds.getArr(ty, `["10.00", "10.00", "2.00", "2.00", null]`) - defer test.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - &compute.ArrayDatum{test.Data()}}) - - test = ds.getArr(&arrow.Decimal128Type{Precision: 4, Scale: 2}, `["10.00", "10.00", "2.00", "2.00", null]`) - defer test.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - &compute.ArrayDatum{test.Data()}}) - - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - compute.NewDatum(scalar.MakeScalar(int64(10)))}) - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - compute.NewDatum(scalar.MakeScalar(float64(10)))}) - - larr = ds.getArr(arrow.PrimitiveTypes.Float64, `[1, 10, 1, 2, null]`) - defer larr.Release() - - sc, _ := scalar.MakeScalarParam("10.00", ty) - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, - compute.NewDatum(sc)}) - - larr = ds.getArr(arrow.PrimitiveTypes.Int64, `[1, 10, 1, 2, null]`) - defer larr.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, - compute.NewDatum(sc)}) - } - - for _, ty := range ds.negativeScales() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}, &compute.ArrayDatum{empty.Data()}}) - - larr := ds.getArr(ty, `["12E2", "42E2", null]`) - defer larr.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, &compute.ArrayDatum{larr.Data()}}) - - rarr := ds.getArr(&arrow.Decimal128Type{Precision: 2, Scale: -2}, `["12E2", "42E2", null]`) - defer rarr.Release() - - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, &compute.ArrayDatum{rarr.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, compute.NewDatum(scalar.MakeScalar(int64(10)))}) - } -} - -func (ds *DecimalBinaryArithmeticSuite) TestLogb() { - // decimal arguments get promoted to float64, sanity check here - for _, fn := range []string{"logb", "logb_unchecked"} { - ds.Run(fn, func() { - for _, ty := range ds.positiveScales() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}, &compute.ArrayDatum{empty.Data()}}) - - larr := ds.getArr(ty, `["1.00", "10.00", "1.00", "2.00", null]`) - defer larr.Release() - - ldatum := &compute.ArrayDatum{larr.Data()} - - test := ds.getArr(ty, `["10.00", "10.00", "2.00", "2.00", null]`) - defer test.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - &compute.ArrayDatum{test.Data()}}) - - test = ds.getArr(&arrow.Decimal128Type{Precision: 4, Scale: 2}, `["10.00", "10.00", "2.00", "2.00", null]`) - defer test.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - &compute.ArrayDatum{test.Data()}}) - - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - compute.NewDatum(scalar.MakeScalar(int64(10)))}) - ds.checkDecimalToFloat(fn, []compute.Datum{ldatum, - compute.NewDatum(scalar.MakeScalar(float64(10)))}) - - larr = ds.getArr(arrow.PrimitiveTypes.Float64, `[1, 10, 1, 2, null]`) - defer larr.Release() - - sc, _ := scalar.MakeScalarParam("10.00", ty) - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, - compute.NewDatum(sc)}) - - larr = ds.getArr(arrow.PrimitiveTypes.Int64, `[1, 10, 1, 2, null]`) - defer larr.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, - compute.NewDatum(sc)}) - } - - for _, ty := range ds.negativeScales() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}, &compute.ArrayDatum{empty.Data()}}) - - larr := ds.getArr(ty, `["12E2", "42E2", null]`) - defer larr.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, &compute.ArrayDatum{larr.Data()}}) - - rarr := ds.getArr(&arrow.Decimal128Type{Precision: 2, Scale: -2}, `["12E2", "42E2", null]`) - defer rarr.Release() - - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, &compute.ArrayDatum{rarr.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{ - &compute.ArrayDatum{larr.Data()}, compute.NewDatum(scalar.MakeScalar(int64(10)))}) - } - }) - } -} - -type DecimalUnaryArithmeticSuite struct { - DecimalArithmeticSuite -} - -func (ds *DecimalUnaryArithmeticSuite) TestAbsoluteValue() { - max128 := decimal128.GetMaxValue(38) - max256 := decimal256.GetMaxValue(76) - ds.Run("decimal", func() { - for _, fn := range []string{"abs_unchecked", "abs"} { - ds.Run(fn, func() { - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`[]`)) - defer empty.Release() - in, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["1.00", "-42.15", null]`)) - defer in.Release() - exp, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["1.00", "42.15", null]`)) - defer exp.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) - }) - } - - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal128Scalar(max128.Negate(), &arrow.Decimal128Type{Precision: 38}))}, - compute.NewDatum(scalar.NewDecimal128Scalar(max128, &arrow.Decimal128Type{Precision: 38})), nil) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal256Scalar(max256.Negate(), &arrow.Decimal256Type{Precision: 76}))}, - compute.NewDatum(scalar.NewDecimal256Scalar(max256, &arrow.Decimal256Type{Precision: 76})), nil) - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`[]`)) - defer empty.Release() - in, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["12E2", "-42E2", null]`)) - defer in.Release() - exp, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["12E2", "42E2", null]`)) - defer exp.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) - }) - } - }) - } - }) -} - -func (ds *DecimalUnaryArithmeticSuite) TestNegate() { - max128 := decimal128.GetMaxValue(38) - max256 := decimal256.GetMaxValue(76) - - for _, fn := range []string{"negate_unchecked", "negate"} { - ds.Run(fn, func() { - for _, ty := range ds.positiveScales() { - empty, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`[]`)) - defer empty.Release() - in, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["0.00", "1.00", "-42.15", null]`)) - defer in.Release() - exp, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["0.00", "-1.00", "42.15", null]`)) - defer exp.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) - } - - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal128Scalar(max128.Negate(), &arrow.Decimal128Type{Precision: 38}))}, - compute.NewDatum(scalar.NewDecimal128Scalar(max128, &arrow.Decimal128Type{Precision: 38})), nil) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal256Scalar(max256.Negate(), &arrow.Decimal256Type{Precision: 76}))}, - compute.NewDatum(scalar.NewDecimal256Scalar(max256, &arrow.Decimal256Type{Precision: 76})), nil) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal128Scalar(max128, &arrow.Decimal128Type{Precision: 38}))}, - compute.NewDatum(scalar.NewDecimal128Scalar(max128.Negate(), &arrow.Decimal128Type{Precision: 38})), nil) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(scalar.NewDecimal256Scalar(max256, &arrow.Decimal256Type{Precision: 76}))}, - compute.NewDatum(scalar.NewDecimal256Scalar(max256.Negate(), &arrow.Decimal256Type{Precision: 76})), nil) - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`[]`)) - defer empty.Release() - in, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["0", "12E2", "-42E2", null]`)) - defer in.Release() - exp, _, _ := array.FromJSON(ds.mem, ty, strings.NewReader(`["0", "-12E2", "42E2", null]`)) - defer exp.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, nil) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, &compute.ArrayDatum{exp.Data()}, nil) - }) - } - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestSquareRoot() { - for _, fn := range []string{"sqrt_unchecked", "sqrt"} { - ds.Run(fn, func() { - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - arr := ds.decimalArrayFromJSON(ty, `["4.00", "16.00", "36.00", null]`) - defer arr.Release() - - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{Value: empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{Value: arr.Data()}}) - - neg := ds.decimalArrayFromJSON(ty, `["-2.00"]`) - defer neg.Release() - ds.checkFail("sqrt", []compute.Datum{&compute.ArrayDatum{Value: neg.Data()}}, "square root of negative number", nil) - }) - } - - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - arr := ds.decimalArrayFromJSON(ty, `["400", "1600", "3600", null]`) - defer arr.Release() - - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{Value: empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{Value: arr.Data()}}) - - neg := ds.decimalArrayFromJSON(ty, `["-400"]`) - defer neg.Release() - ds.checkFail("sqrt", []compute.Datum{&compute.ArrayDatum{Value: neg.Data()}}, "square root of negative number", nil) - }) - } - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestSign() { - max128 := decimal128.GetMaxValue(38) - max256 := decimal256.GetMaxValue(76) - - for _, ty := range ds.positiveScales() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - emptyOut := ds.decimalArrayFromJSON(arrow.PrimitiveTypes.Int64, `[]`) - defer emptyOut.Release() - in := ds.decimalArrayFromJSON(ty, `["1.00", "0.00", "-42.15", null]`) - defer in.Release() - exp := ds.decimalArrayFromJSON(arrow.PrimitiveTypes.Int64, `[1, 0, -1, null]`) - defer exp.Release() - - checkScalar(ds.T(), "sign", []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{emptyOut.Data()}, nil) - checkScalar(ds.T(), "sign", []compute.Datum{&compute.ArrayDatum{in.Data()}}, - &compute.ArrayDatum{exp.Data()}, nil) - } - - checkScalar(ds.T(), "sign", []compute.Datum{compute.NewDatum( - scalar.NewDecimal128Scalar(max128, &arrow.Decimal128Type{Precision: 38}))}, - compute.NewDatum(scalar.MakeScalar(int64(1))), nil) - checkScalar(ds.T(), "sign", []compute.Datum{compute.NewDatum( - scalar.NewDecimal128Scalar(max128.Negate(), &arrow.Decimal128Type{Precision: 38}))}, - compute.NewDatum(scalar.MakeScalar(int64(-1))), nil) - checkScalar(ds.T(), "sign", []compute.Datum{compute.NewDatum( - scalar.NewDecimal256Scalar(max256, &arrow.Decimal256Type{Precision: 38}))}, - compute.NewDatum(scalar.MakeScalar(int64(1))), nil) - checkScalar(ds.T(), "sign", []compute.Datum{compute.NewDatum( - scalar.NewDecimal256Scalar(max256.Negate(), &arrow.Decimal256Type{Precision: 38}))}, - compute.NewDatum(scalar.MakeScalar(int64(-1))), nil) - - for _, ty := range ds.negativeScales() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - emptyOut := ds.decimalArrayFromJSON(arrow.PrimitiveTypes.Int64, `[]`) - defer emptyOut.Release() - in := ds.decimalArrayFromJSON(ty, `["12e2", "0.00", "-42E2", null]`) - defer in.Release() - exp := ds.decimalArrayFromJSON(arrow.PrimitiveTypes.Int64, `[1, 0, -1, null]`) - defer exp.Release() - - checkScalar(ds.T(), "sign", []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{emptyOut.Data()}, nil) - checkScalar(ds.T(), "sign", []compute.Datum{&compute.ArrayDatum{in.Data()}}, - &compute.ArrayDatum{exp.Data()}, nil) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestTrigAcosAsin() { - for _, fn := range []string{"acos", "acos_unchecked", "asin", "asin_unchecked"} { - ds.Run(fn, func() { - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - vals := ds.decimalArrayFromJSON(ty, `["0.00", "-1.00", "1.00", null]`) - defer vals.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) - }) - } - }) - } - - for _, fn := range []string{"acos", "asin"} { - ds.Run(fn, func() { - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - arr := ds.decimalArrayFromJSON(ty, `["12E2", "-42E2", null]`) - defer arr.Release() - ds.checkDecimalToFloat(fn+"_unchecked", []compute.Datum{&compute.ArrayDatum{arr.Data()}}) - ds.checkFail(fn, []compute.Datum{&compute.ArrayDatum{arr.Data()}}, "domain error", nil) - }) - } - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestAtan() { - fn := "atan" - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - vals := ds.decimalArrayFromJSON(ty, `["0.00", "-1.00", "1.00", null]`) - defer vals.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) - }) - } - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - vals := ds.decimalArrayFromJSON(ty, `["12E2", "-42E2", null]`) - defer vals.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestTrig() { - for _, fn := range []string{"cos", "sin", "tan"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - ds.Run(fn, func() { - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - vals := ds.decimalArrayFromJSON(ty, `["0.00", "-1.00", "1.00", null]`) - defer vals.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) - }) - } - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty := ds.decimalArrayFromJSON(ty, `[]`) - defer empty.Release() - vals := ds.decimalArrayFromJSON(ty, `["12E2", "-42E2", null]`) - defer vals.Release() - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}) - ds.checkDecimalToFloat(fn, []compute.Datum{&compute.ArrayDatum{vals.Data()}}) - }) - } - }) - } - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRound() { - options := compute.RoundOptions{NDigits: 2, Mode: compute.RoundDown} - - cases := []struct { - mode compute.RoundMode - exp string - }{ - {compute.RoundDown, `["1.010", "1.010", "1.010", "1.010", "-1.010", "-1.020", "-1.020", "-1.020", null]`}, - {compute.RoundUp, `["1.010", "1.020", "1.020", "1.020", "-1.010", "-1.010", "-1.010", "-1.010", null]`}, - {compute.RoundTowardsZero, `["1.010", "1.010", "1.010", "1.010", "-1.010", "-1.010", "-1.010", "-1.010", null]`}, - {compute.RoundTowardsInfinity, `["1.010", "1.020", "1.020", "1.020", "-1.010", "-1.020", "-1.020", "-1.020", null]`}, - {compute.RoundHalfDown, `["1.010", "1.010", "1.010", "1.020", "-1.010", "-1.010", "-1.020", "-1.020", null]`}, - {compute.RoundHalfUp, `["1.010", "1.010", "1.020", "1.020", "-1.010", "-1.010", "-1.010", "-1.020", null]`}, - {compute.RoundHalfTowardsZero, `["1.010", "1.010", "1.010", "1.020", "-1.010", "-1.010", "-1.010", "-1.020", null]`}, - {compute.RoundHalfTowardsInfinity, `["1.010", "1.010", "1.020", "1.020", "-1.010", "-1.010", "-1.020", "-1.020", null]`}, - {compute.RoundHalfToEven, `["1.010", "1.010", "1.020", "1.020", "-1.010", "-1.010", "-1.020", "-1.020", null]`}, - {compute.RoundHalfToOdd, `["1.010", "1.010", "1.010", "1.020", "-1.010", "-1.010", "-1.010", "-1.020", null]`}, - } - - fn := "round" - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 3}, &arrow.Decimal256Type{Precision: 4, Scale: 3}} { - ds.Run(ty.String(), func() { - values := ds.getArr(ty, `["1.010", "1.012", "1.015", "1.019", "-1.010", "-1.012", "-1.015", "-1.019", null]`) - defer values.Release() - - for _, tt := range cases { - ds.Run(tt.mode.String(), func() { - options.Mode = tt.mode - exp := ds.getArr(ty, tt.exp) - defer exp.Release() - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{values.Data()}}, - &compute.ArrayDatum{exp.Data()}, options) - }) - } - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundTowardsInfinity() { - fn := "round" - options := compute.RoundOptions{NDigits: 0, Mode: compute.RoundTowardsInfinity} - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - vals := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) - defer vals.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) - input := []compute.Datum{&compute.ArrayDatum{vals.Data()}} - - options.NDigits = 0 - - exp0 := ds.getArr(ty, `["1.00", "2.00", "2.00", "-42.00", "-43.00", "-43.00", null]`) - defer exp0.Release() - - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{exp0.Data()}, options) - - exp1 := ds.getArr(ty, `["1.00", "2.00", "1.10", "-42.00", "-43.00", "-42.20", null]`) - defer exp1.Release() - - options.NDigits = 1 - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{exp1.Data()}, options) - - options.NDigits = 2 - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{vals.Data()}, options) - options.NDigits = 4 - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{vals.Data()}, options) - options.NDigits = 100 - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{vals.Data()}, options) - - options.NDigits = -1 - neg := ds.getArr(ty, `["10.00", "10.00", "10.00", "-50.00", "-50.00", "-50.00", null]`) - defer neg.Release() - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{neg.Data()}, options) - - options.NDigits = -2 - ds.checkFail(fn, input, "rounding to -2 digits will not fit in precision", options) - options.NDigits = -1 - - noprec := ds.getArr(ty, `["99.99"]`) - defer noprec.Release() - ds.checkFail(fn, []compute.Datum{&compute.ArrayDatum{noprec.Data()}}, "rounded value 100.00 does not fit in precision", options) - }) - } - - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}} { - ds.Run(ty.String(), func() { - values := ds.getArr(ty, `["10E2", "12E2", "18E2", "-10E2", "-12E2", "-18E2", null]`) - defer values.Release() - - input := &compute.ArrayDatum{values.Data()} - - options.NDigits = 0 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = 2 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = 100 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -1 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -2 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -3 - res := ds.getArr(ty, `["10E2", "20E2", "20E2", "-10E2", "-20E2", "-20E2", null]`) - defer res.Release() - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{res.Data()}, options) - - options.NDigits = -4 - ds.checkFail(fn, []compute.Datum{input}, "rounding to -4 digits will not fit in precision", options) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundHalfToEven() { - fn := "round" - options := compute.RoundOptions{NDigits: 0, Mode: compute.RoundHalfToEven} - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) - - values := ds.getArr(ty, `["1.00", "5.99", "1.01", "-42.00", "-42.99", "-42.15", "1.50", "2.50", "-5.50", "-2.55", null]`) - defer values.Release() - input := &compute.ArrayDatum{values.Data()} - - exp0 := ds.getArr(ty, `["1.00", "6.00", "1.00", "-42.00", "-43.00", "-42.00", "2.00", "2.00", "-6.00", "-3.00", null]`) - defer exp0.Release() - - exp1 := ds.getArr(ty, `["1.00", "6.00", "1.00", "-42.00", "-43.00", "-42.20", "1.50", "2.50", "-5.50", "-2.60", null]`) - defer exp1.Release() - - expNeg1 := ds.getArr(ty, `["0.00", "10.00", "0.00", "-40.00", "-40.00", "-40.00", "0.00", "0.00", "-10.00", "0.00", null]`) - defer expNeg1.Release() - - options.NDigits = 0 - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp0.Data()}, options) - options.NDigits = 1 - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp1.Data()}, options) - options.NDigits = 2 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = 4 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = 100 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -1 - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{expNeg1.Data()}, options) - options.NDigits = -2 - ds.checkFail(fn, []compute.Datum{input}, "rounding to -2 digits will not fit in precision", options) - options.NDigits = -1 - noprec := ds.getArr(ty, `["99.99"]`) - defer noprec.Release() - ds.checkFail(fn, []compute.Datum{&compute.ArrayDatum{noprec.Data()}}, "rounded value 100.00 does not fit in precision", options) - }) - } - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}} { - ds.Run(ty.String(), func() { - values := ds.getArr(ty, `["5E2", "10E2", "12E2", "15E2", "18E2", "-10E2", "-12E2", "-15E2", "-18E2", null]`) - defer values.Release() - - input := &compute.ArrayDatum{values.Data()} - - options.NDigits = 0 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = 2 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = 100 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -1 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -2 - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - options.NDigits = -3 - res := ds.getArr(ty, `["0", "10E2", "10E2", "20E2", "20E2", "-10E2", "-10E2", "-20E2", "-20E2", null]`) - defer res.Release() - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{res.Data()}, options) - - options.NDigits = -4 - ds.checkFail(fn, []compute.Datum{input}, "rounding to -4 digits will not fit in precision", options) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundCeil() { - fn := "ceil" - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{empty.Data()}, nil) - - in := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) - defer in.Release() - out := ds.getArr(ty, `["1.00", "2.00", "2.00", "-42.00", "-42.00", "-42.00", null]`) - defer out.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, - &compute.ArrayDatum{out.Data()}, nil) - }) - } - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - sc, _ := scalar.MakeScalarParam("99.99", ty) - ds.checkFail(fn, []compute.Datum{compute.NewDatum(sc)}, "rounded value 100.00 does not fit in precision of decimal", nil) - sc, _ = scalar.MakeScalarParam("-99.99", ty) - out, _ := scalar.MakeScalarParam("-99.00", ty) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(sc)}, compute.NewDatum(out), nil) - }) - } - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{empty.Data()}, nil) - - ex := ds.getArr(ty, `["12E2", "-42E2", null]`) - defer ex.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{ex.Data()}}, - &compute.ArrayDatum{ex.Data()}, nil) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundFloor() { - fn := "floor" - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{empty.Data()}, nil) - - in := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) - defer in.Release() - out := ds.getArr(ty, `["1.00", "1.00", "1.00", "-42.00", "-43.00", "-43.00", null]`) - defer out.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, - &compute.ArrayDatum{out.Data()}, nil) - }) - } - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - sc, _ := scalar.MakeScalarParam("-99.99", ty) - ds.checkFail(fn, []compute.Datum{compute.NewDatum(sc)}, "rounded value -100.00 does not fit in precision of decimal", nil) - sc, _ = scalar.MakeScalarParam("99.99", ty) - out, _ := scalar.MakeScalarParam("99.00", ty) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(sc)}, compute.NewDatum(out), nil) - }) - } - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{empty.Data()}, nil) - - ex := ds.getArr(ty, `["12E2", "-42E2", null]`) - defer ex.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{ex.Data()}}, - &compute.ArrayDatum{ex.Data()}, nil) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundTrunc() { - fn := "trunc" - for _, ty := range ds.positiveScales() { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{empty.Data()}, nil) - - in := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) - defer in.Release() - out := ds.getArr(ty, `["1.00", "1.00", "1.00", "-42.00", "-42.00", "-42.00", null]`) - defer out.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{in.Data()}}, - &compute.ArrayDatum{out.Data()}, nil) - }) - } - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - sc, _ := scalar.MakeScalarParam("99.99", ty) - out, _ := scalar.MakeScalarParam("99.00", ty) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(sc)}, compute.NewDatum(out), nil) - sc, _ = scalar.MakeScalarParam("-99.99", ty) - out, _ = scalar.MakeScalarParam("-99.00", ty) - checkScalar(ds.T(), fn, []compute.Datum{compute.NewDatum(sc)}, compute.NewDatum(out), nil) - }) - } - for _, ty := range ds.negativeScales() { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, - &compute.ArrayDatum{empty.Data()}, nil) - - ex := ds.getArr(ty, `["12E2", "-42E2", null]`) - defer ex.Release() - - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{ex.Data()}}, - &compute.ArrayDatum{ex.Data()}, nil) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundToMultiple() { - fn := "round_to_multiple" - var options compute.RoundToMultipleOptions - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - if ty.ID() == arrow.DECIMAL128 { - options.Multiple, _ = scalar.MakeScalarParam(decimal128.FromI64(200), ty) - } else { - options.Multiple, _ = scalar.MakeScalarParam(decimal256.FromI64(200), ty) - } - - values := ds.getArr(ty, `["-3.50", "-3.00", "-2.50", "-2.00", "-1.50", "-1.00", "-0.50", "0.00", "0.50", "1.00", "1.50", "2.00", "2.50", "3.00", "3.50", null]`) - defer values.Release() - - input := []compute.Datum{&compute.ArrayDatum{values.Data()}} - - tests := []struct { - mode compute.RoundMode - exp string - }{ - {compute.RoundDown, `["-4.00", "-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", null]`}, - {compute.RoundUp, `["-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "-0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", "4.00", "4.00", null]`}, - {compute.RoundTowardsZero, `["-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "-0.00", "0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", null]`}, - {compute.RoundTowardsInfinity, `["-4.00", "-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", "4.00", "4.00", null]`}, - {compute.RoundHalfDown, `["-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", null]`}, - {compute.RoundHalfUp, `["-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", "4.00", null]`}, - {compute.RoundHalfTowardsZero, `["-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", null]`}, - {compute.RoundHalfTowardsInfinity, `["-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "4.00", "4.00", null]`}, - {compute.RoundHalfToEven, `["-4.00", "-4.00", "-2.00", "-2.00", "-2.00", "-0.00", "-0.00", "0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "4.00", "4.00", null]`}, - {compute.RoundHalfToOdd, `["-4.00", "-2.00", "-2.00", "-2.00", "-2.00", "-2.00", "-0.00", "0.00", "0.00", "2.00", "2.00", "2.00", "2.00", "2.00", "4.00", null]`}, - } - - for _, tt := range tests { - ds.Run(tt.mode.String(), func() { - options.Mode = tt.mode - - result := ds.getArr(ty, tt.exp) - defer result.Release() - - checkScalar(ds.T(), fn, input, &compute.ArrayDatum{result.Data()}, options) - }) - } - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundToMultipleTowardsInfinity() { - fn := "round_to_multiple" - options := compute.RoundToMultipleOptions{Mode: compute.RoundTowardsInfinity} - setMultiple := func(ty arrow.DataType, val int64) { - if ty.ID() == arrow.DECIMAL128 { - options.Multiple = scalar.NewDecimal128Scalar(decimal128.FromI64(val), ty) - } else { - options.Multiple = scalar.NewDecimal256Scalar(decimal256.FromI64(val), ty) - } - } - - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - ds.Run(ty.String(), func() { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - values := ds.getArr(ty, `["1.00", "1.99", "1.01", "-42.00", "-42.99", "-42.15", null]`) - defer values.Release() - - input := &compute.ArrayDatum{values.Data()} - - setMultiple(ty, 25) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) - - exp25 := ds.getArr(ty, `["1.00", "2.00", "1.25", "-42.00", "-43.00", "-42.25", null]`) - defer exp25.Release() - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp25.Data()}, options) - - setMultiple(ty, 1) - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - - setMultiple(&arrow.Decimal128Type{Precision: 2, Scale: 0}, 2) - exp20 := ds.getArr(ty, `["2.00", "2.00", "2.00", "-42.00", "-44.00", "-44.00", null]`) - defer exp20.Release() - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp20.Data()}, options) - - setMultiple(ty, 0) - ds.checkFail(fn, []compute.Datum{input}, "rounding multiple must be positive", options) - - options.Multiple = scalar.NewDecimal128Scalar(decimal128.Num{}, &arrow.Decimal128Type{Precision: 4, Scale: 2}) - ds.checkFail(fn, []compute.Datum{input}, "rounding multiple must be positive", options) - - tester := ds.getArr(ty, `["99.99"]`) - defer tester.Release() - - testDatum := &compute.ArrayDatum{tester.Data()} - - setMultiple(ty, -10) - ds.checkFail(fn, []compute.Datum{testDatum}, "rounding multiple must be positive", options) - setMultiple(ty, 100) - ds.checkFail(fn, []compute.Datum{testDatum}, "rounded value 100.00 does not fit in precision", options) - options.Multiple = scalar.NewFloat64Scalar(1) - ds.checkFail(fn, []compute.Datum{testDatum}, "rounded value 100.00 does not fit in precision", options) - options.Multiple = scalar.MakeNullScalar(&arrow.Decimal128Type{Precision: 3}) - ds.checkFail(fn, []compute.Datum{testDatum}, "rounding multiple must be non-null and valid", options) - options.Multiple = nil - ds.checkFail(fn, []compute.Datum{testDatum}, "rounding multiple must be non-null and valid", options) - }) - } - - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}} { - ds.Run(ty.String(), func() { - values := ds.getArr(ty, `["10E2", "12E2", "18E2", "-10E2", "-12E2", "-18E2", null]`) - defer values.Release() - - input := &compute.ArrayDatum{values.Data()} - - setMultiple(ty, 4) - exp := ds.getArr(ty, `["12E2", "12E2", "20E2", "-12E2", "-12E2", "-20E2", null]`) - defer exp.Release() - - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp.Data()}, options) - - setMultiple(ty, 1) - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - }) - } -} - -func (ds *DecimalUnaryArithmeticSuite) TestRoundToMultipleHalfToOdd() { - fn := "round_to_multiple" - options := compute.RoundToMultipleOptions{Mode: compute.RoundHalfToOdd} - setMultiple := func(ty arrow.DataType, val int64) { - if ty.ID() == arrow.DECIMAL128 { - options.Multiple = scalar.NewDecimal128Scalar(decimal128.FromI64(val), ty) - } else { - options.Multiple = scalar.NewDecimal256Scalar(decimal256.FromI64(val), ty) - } - } - - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 4, Scale: 2}, &arrow.Decimal256Type{Precision: 4, Scale: 2}} { - empty := ds.getArr(ty, `[]`) - defer empty.Release() - - values := ds.getArr(ty, `["-0.38", "-0.37", "-0.25", "-0.13", "-0.12", "0.00", "0.12", "0.13", "0.25", "0.37", "0.38", null]`) - defer values.Release() - - input := &compute.ArrayDatum{values.Data()} - - // there is no exact halfway point, check what happens - setMultiple(ty, 25) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) - - exp25 := ds.getArr(ty, `["-0.50", "-0.25", "-0.25", "-0.25", "-0.00", "0.00", "0.00", "0.25", "0.25", "0.25", "0.50", null]`) - defer exp25.Release() - - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp25.Data()}, options) - - setMultiple(ty, 1) - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - setMultiple(ty, 24) - checkScalar(ds.T(), fn, []compute.Datum{&compute.ArrayDatum{empty.Data()}}, &compute.ArrayDatum{empty.Data()}, options) - - exp24 := ds.getArr(ty, `["-0.48", "-0.48", "-0.24", "-0.24", "-0.24", "0.00", "0.24", "0.24", "0.24", "0.48", "0.48", null]`) - defer exp24.Release() - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp24.Data()}, options) - - setMultiple(&arrow.Decimal128Type{Precision: 3, Scale: 1}, 1) - exp1 := ds.getArr(ty, `["-0.40", "-0.40", "-0.30", "-0.10", "-0.10", "0.00", "0.10", "0.10", "0.30", "0.40", "0.40", null]`) - defer exp1.Release() - - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp1.Data()}, options) - } - - for _, ty := range []arrow.DataType{&arrow.Decimal128Type{Precision: 2, Scale: -2}, &arrow.Decimal256Type{Precision: 2, Scale: -2}} { - values := ds.getArr(ty, `["10E2", "12E2", "18E2", "-10E2", "-12E2", "-18E2", null]`) - defer values.Release() - - exp4 := ds.getArr(ty, `["12E2", "12E2", "20E2", "-12E2", "-12E2", "-20E2", null]`) - defer exp4.Release() - - exp5 := ds.getArr(ty, `["10E2", "10E2", "20E2", "-10E2", "-10E2", "-20E2", null]`) - defer exp5.Release() - - input := &compute.ArrayDatum{values.Data()} - setMultiple(ty, 4) - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp4.Data()}, options) - - setMultiple(ty, 5) - checkScalar(ds.T(), fn, []compute.Datum{input}, &compute.ArrayDatum{exp5.Data()}, options) - - setMultiple(ty, 1) - checkScalar(ds.T(), fn, []compute.Datum{input}, input, options) - } -} - -type ScalarBinaryTemporalArithmeticSuite struct { - BinaryFuncTestSuite -} - -var ( - date32JSON = `[0, 11016, -25932, 23148, 18262, 18261, 18260, 14609, 14610, 14612, - 14613, 13149, 13148, 14241, 14242, 15340, null]` - date32JSON2 = `[365, 10650, -25901, 23118, 18263, 18259, 18260, 14609, 14610, 14612, - 14613, 13149, 13148, 14240, 13937, 15400, null]` - date64JSON = `[0, 951782400000, -2240524800000, 1999987200000, 1577836800000, - 1577750400000, 1577664000000, 1262217600000, 1262304000000, 1262476800000, - 1262563200000, 1136073600000, 1135987200000, 1230422400000, 1230508800000, - 1325376000000, null]` - date64JSON2 = `[31536000000, 920160000000, -2237846400000, 1997395200000, - 1577923200000, 1577577600000, 1577664000000, 1262217600000, 1262304000000, - 1262476800000, 1262563200000, 1136073600000, 1135987200000, 1230336000000, - 1204156800000, 1330560000000, null]` - timeJSONs = `[59, 84203, 3560, 12800, 3905, 7810, 11715, 15620, 19525, 23430, 27335, - 31240, 35145, 0, 0, 3723, null]` - timeJSONs2 = `[59, 84203, 12642, 7182, 68705, 7390, 915, 16820, 19525, 5430, 84959, - 31207, 35145, 0, 0, 3723, null]` - timeJSONms = `[59123, 84203999, 3560001, 12800000, 3905001, 7810002, 11715003, 15620004, - 19525005, 23430006, 27335000, 31240000, 35145000, 0, 0, 3723000, null]` - timeJSONms2 = `[59103, 84203999, 12642001, 7182000, 68705005, 7390000, 915003, 16820004, - 19525005, 5430006, 84959000, 31207000, 35145000, 0, 0, 3723000, null]` - timeJSONus = `[59123456, 84203999999, 3560001001, 12800000000, 3905001000, 7810002000, - 11715003000, 15620004132, 19525005321, 23430006163, 27335000000, - 31240000000, 35145000000, 0, 0, 3723000000, null]` - timeJSONus2 = `[59103476, 84203999999, 12642001001, 7182000000, 68705005000, 7390000000, - 915003000, 16820004432, 19525005021, 5430006163, 84959000000, - 31207000000, 35145000000, 0, 0, 3723000000, null]` - timeJSONns = `[59123456789, 84203999999999, 3560001001001, 12800000000000, 3905001000000, - 7810002000000, 11715003000000, 15620004132000, 19525005321000, - 23430006163000, 27335000000000, 31240000000000, 35145000000000, 0, 0, - 3723000000000, null]` - timeJSONns2 = `[59103476799, 84203999999909, 12642001001001, 7182000000000, 68705005000000, - 7390000000000, 915003000000, 16820004432000, 19525005021000, 5430006163000, - 84959000000000, 31207000000000, 35145000000000, 0, 0, 3723000000000, null]` -) - -func (s *ScalarBinaryTemporalArithmeticSuite) TestTemporalAddSub() { - tests := []struct { - val1 string - val2 string - dt arrow.DataType - exp arrow.DataType - }{ - {date32JSON, date32JSON2, arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Duration_s}, - {date64JSON, date64JSON2, arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Duration_ms}, - {timeJSONs, timeJSONs2, arrow.FixedWidthTypes.Time32s, arrow.FixedWidthTypes.Duration_s}, - {timeJSONms, timeJSONms2, arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Duration_ms}, - {timeJSONus, timeJSONus2, arrow.FixedWidthTypes.Time64us, arrow.FixedWidthTypes.Duration_us}, - {timeJSONns, timeJSONns2, arrow.FixedWidthTypes.Time64ns, arrow.FixedWidthTypes.Duration_ns}, - } - - for _, tt := range tests { - s.Run(tt.dt.String(), func() { - for _, checked := range []bool{true, false} { - s.Run(fmt.Sprintf("checked=%t", checked), func() { - opts := compute.ArithmeticOptions{NoCheckOverflow: !checked} - arr1, _, _ := array.FromJSON(s.mem, tt.dt, strings.NewReader(tt.val1)) - defer arr1.Release() - arr2, _, _ := array.FromJSON(s.mem, tt.dt, strings.NewReader(tt.val2)) - defer arr2.Release() - - datum1 := &compute.ArrayDatum{Value: arr1.Data()} - datum2 := &compute.ArrayDatum{Value: arr2.Data()} - - result, err := compute.Subtract(s.ctx, opts, datum1, datum2) - s.Require().NoError(err) - defer result.Release() - res := result.(*compute.ArrayDatum) - s.Truef(arrow.TypeEqual(tt.exp, res.Type()), - "expected: %s\n got: %s", tt.exp, res.Type()) - - out, err := compute.Add(s.ctx, opts, datum2, result) - s.Require().NoError(err) - defer out.Release() - - // date32 - date32 / date64 - date64 produce durations - // and date + duration == timestamp so we need to cast - // the timestamp back to a date in that case. Otherwise - // we get back time32/time64 in those cases and can - // compare them accurately. - if arrow.TypeEqual(arr1.DataType(), out.(*compute.ArrayDatum).Type()) { - assertDatumsEqual(s.T(), datum1, out, nil, nil) - } else { - casted, err := compute.CastDatum(s.ctx, out, compute.SafeCastOptions(arr1.DataType())) - s.Require().NoError(err) - defer casted.Release() - assertDatumsEqual(s.T(), datum1, casted, nil, nil) - } - - }) - } - }) - } -} - -func TestUnaryDispatchBest(t *testing.T) { - for _, fn := range []string{"abs"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - t.Run(fn, func(t *testing.T) { - for _, ty := range numericTypes { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{ty}) - }) - } - }) - } - } - - for _, fn := range []string{"negate_unchecked", "sign"} { - t.Run(fn, func(t *testing.T) { - for _, ty := range numericTypes { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{ty}) - }) - } - }) - } - - for _, fn := range []string{"negate"} { - t.Run(fn, func(t *testing.T) { - for _, ty := range append(signedIntTypes, floatingTypes...) { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{ty}) - }) - } - }) - } - - // float types (with _unchecked variants) - for _, fn := range []string{"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - t.Run(fn, func(t *testing.T) { - for _, ty := range floatingTypes { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{ty}) - }) - } - }) - } - } - - // float types (without _unchecked variants) - for _, fn := range []string{"atan", "sign", "floor", "ceil", "trunc", "round"} { - t.Run(fn, func(t *testing.T) { - for _, ty := range floatingTypes { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{ty}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{ty}) - }) - } - }) - } - - // integer -> float64 (with _unchecked variant) - for _, fn := range []string{"ln", "log2", "log10", "log1p", "sin", "cos", "tan", "asin", "acos"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - t.Run(fn, func(t *testing.T) { - for _, ty := range integerTypes { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64}) - }) - } - }) - } - } - - // integer -> float64 (without _unchecked variants) - for _, fn := range []string{"atan", "floor", "ceil", "trunc", "round"} { - t.Run(fn, func(t *testing.T) { - for _, ty := range integerTypes { - t.Run(ty.String(), func(t *testing.T) { - CheckDispatchBest(t, fn, []arrow.DataType{ty}, []arrow.DataType{arrow.PrimitiveTypes.Float64}) - CheckDispatchBest(t, fn, []arrow.DataType{&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: ty}}, - []arrow.DataType{arrow.PrimitiveTypes.Float64}) - }) - } - }) - } -} - -func TestUnaryArithmeticNull(t *testing.T) { - for _, fn := range []string{"abs", "negate", "acos", "asin", "cos", "ln", "log10", "log1p", "log2", "sin", "tan"} { - for _, suffix := range []string{"", "_unchecked"} { - fn += suffix - assertNullToNull(t, context.TODO(), fn, memory.DefaultAllocator) - } - } - - for _, fn := range []string{"sign", "atan", "bit_wise_not", "floor", "ceil", "trunc", "round"} { - assertNullToNull(t, context.TODO(), fn, memory.DefaultAllocator) - } -} - -type UnaryArithmeticSuite[T arrow.NumericType, O fnOpts] struct { - suite.Suite - - mem *memory.CheckedAllocator - ctx context.Context - - opts O -} - -func (us *UnaryArithmeticSuite[T, O]) SetupTest() { - us.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - us.ctx = compute.WithAllocator(context.TODO(), us.mem) - var def O - us.opts = def -} - -func (us *UnaryArithmeticSuite[T, O]) TearDownTest() { - us.mem.AssertSize(us.T(), 0) -} - -func (*UnaryArithmeticSuite[T, O]) datatype() arrow.DataType { - return arrow.GetDataType[T]() -} - -func (us *UnaryArithmeticSuite[T, O]) makeNullScalar() scalar.Scalar { - return scalar.MakeNullScalar(us.datatype()) -} - -func (us *UnaryArithmeticSuite[T, O]) makeScalar(v T) scalar.Scalar { - return scalar.MakeScalar(v) -} - -func (us *UnaryArithmeticSuite[T, O]) makeArray(v ...T) arrow.Array { - return exec.ArrayFromSlice(us.mem, v) -} - -func (us *UnaryArithmeticSuite[T, O]) getArr(dt arrow.DataType, str string) arrow.Array { - arr, _, err := array.FromJSON(us.mem, dt, strings.NewReader(str), array.WithUseNumber()) - us.Require().NoError(err) - return arr -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpValError(fn unaryArithmeticFunc[O], arg T, msg string) { - in := us.makeScalar(arg) - _, err := fn(us.ctx, us.opts, compute.NewDatum(in)) - us.ErrorIs(err, arrow.ErrInvalid) - us.ErrorContains(err, msg) -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpNotImplemented(fn unaryArithmeticFunc[O], arg T, msg string) { - in := us.makeScalar(arg) - _, err := fn(us.ctx, us.opts, compute.NewDatum(in)) - us.ErrorIs(err, arrow.ErrNotImplemented) - us.ErrorContains(err, msg) -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpVals(fn unaryArithmeticFunc[O], arg, expected T) { - in := us.makeScalar(arg) - exp := us.makeScalar(expected) - - actual, err := fn(us.ctx, us.opts, compute.NewDatum(in)) - us.Require().NoError(err) - assertScalarEquals(us.T(), exp, actual.(*compute.ScalarDatum).Value, scalar.WithNaNsEqual(true)) -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpScalars(fn unaryArithmeticFunc[O], arg, exp scalar.Scalar) { - actual, err := fn(us.ctx, us.opts, compute.NewDatum(arg)) - us.Require().NoError(err) - assertScalarEquals(us.T(), exp, actual.(*compute.ScalarDatum).Value, scalar.WithNaNsEqual(true)) -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpArrs(fn unaryArithmeticFunc[O], arg, exp arrow.Array) { - datum := &compute.ArrayDatum{arg.Data()} - actual, err := fn(us.ctx, us.opts, datum) - us.Require().NoError(err) - defer actual.Release() - assertDatumsEqual(us.T(), &compute.ArrayDatum{exp.Data()}, actual, []array.EqualOption{array.WithNaNsEqual(true)}, []scalar.EqualOption{scalar.WithNaNsEqual(true)}) - - // also check scalar ops - for i := 0; i < arg.Len(); i++ { - expScalar, err := scalar.GetScalar(exp, i) - us.NoError(err) - argScalar, err := scalar.GetScalar(arg, i) - us.NoError(err) - - actual, err := fn(us.ctx, us.opts, compute.NewDatum(argScalar)) - us.Require().NoError(err) - assertDatumsEqual(us.T(), compute.NewDatum(expScalar), compute.NewDatum(actual), []array.EqualOption{array.WithNaNsEqual(true)}, []scalar.EqualOption{scalar.WithNaNsEqual(true)}) - } -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpExpArr(fn unaryArithmeticFunc[O], arg string, exp arrow.Array) { - in, _, err := array.FromJSON(us.mem, us.datatype(), strings.NewReader(arg), array.WithUseNumber()) - us.Require().NoError(err) - defer in.Release() - - us.assertUnaryOpArrs(fn, in, exp) -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOp(fn unaryArithmeticFunc[O], arg, exp string) { - in, _, err := array.FromJSON(us.mem, us.datatype(), strings.NewReader(arg), array.WithUseNumber()) - us.Require().NoError(err) - defer in.Release() - expected, _, err := array.FromJSON(us.mem, us.datatype(), strings.NewReader(exp), array.WithUseNumber()) - us.Require().NoError(err) - defer expected.Release() - - us.assertUnaryOpArrs(fn, in, expected) -} - -func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpErr(fn unaryArithmeticFunc[O], arg string, msg string) { - in, _, err := array.FromJSON(us.mem, us.datatype(), strings.NewReader(arg), array.WithUseNumber()) - us.Require().NoError(err) - defer in.Release() - - _, err = fn(us.ctx, us.opts, &compute.ArrayDatum{in.Data()}) - us.ErrorIs(err, arrow.ErrInvalid) - us.ErrorContains(err, msg) -} - -type UnaryArithmeticIntegral[T arrow.IntType | arrow.UintType] struct { - UnaryArithmeticSuite[T, compute.ArithmeticOptions] -} - -func (us *UnaryArithmeticIntegral[T]) setOverflowCheck(v bool) { - us.opts.NoCheckOverflow = !v -} - -func (us *UnaryArithmeticIntegral[T]) TestTrig() { - // integer arguments promoted to float64, sanity check here - atan := func(ctx context.Context, _ compute.ArithmeticOptions, arg compute.Datum) (compute.Datum, error) { - return compute.Atan(ctx, arg) - } - - input := us.makeArray(0, 1) - defer input.Release() - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - sinOut := us.getArr(arrow.PrimitiveTypes.Float64, `[0, 0.8414709848078965]`) - defer sinOut.Release() - cosOut := us.getArr(arrow.PrimitiveTypes.Float64, `[1, 0.5403023058681398]`) - defer cosOut.Release() - tanOut := us.getArr(arrow.PrimitiveTypes.Float64, `[0, 1.5574077246549023]`) - defer tanOut.Release() - asinOut := us.getArr(arrow.PrimitiveTypes.Float64, fmt.Sprintf("[0, %f]", math.Pi/2)) - defer asinOut.Release() - acosOut := us.getArr(arrow.PrimitiveTypes.Float64, fmt.Sprintf("[%f, 0]", math.Pi/2)) - defer acosOut.Release() - atanOut := us.getArr(arrow.PrimitiveTypes.Float64, fmt.Sprintf("[0, %f]", math.Pi/4)) - defer atanOut.Release() - - us.assertUnaryOpArrs(compute.Sin, input, sinOut) - us.assertUnaryOpArrs(compute.Cos, input, cosOut) - us.assertUnaryOpArrs(compute.Tan, input, tanOut) - us.assertUnaryOpArrs(compute.Asin, input, asinOut) - us.assertUnaryOpArrs(compute.Acos, input, acosOut) - us.assertUnaryOpArrs(atan, input, atanOut) - } -} - -func (us *UnaryArithmeticIntegral[T]) TestLog() { - // integer arguments promoted to double, sanity check here - ty := us.datatype() - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - exp1 := us.getArr(arrow.PrimitiveTypes.Float64, `[0, null]`) - defer exp1.Release() - exp2 := us.getArr(arrow.PrimitiveTypes.Float64, `[0, 1, null]`) - defer exp2.Release() - - ln := us.getArr(ty, `[1, null]`) - defer ln.Release() - log10 := us.getArr(ty, `[1, 10, null]`) - defer log10.Release() - log2 := us.getArr(ty, `[1, 2, null]`) - defer log2.Release() - log1p := us.getArr(ty, `[0, null]`) - defer log1p.Release() - - us.assertUnaryOpArrs(compute.Ln, ln, exp1) - us.assertUnaryOpArrs(compute.Log10, log10, exp2) - us.assertUnaryOpArrs(compute.Log2, log2, exp2) - us.assertUnaryOpArrs(compute.Log1p, log1p, exp1) - } -} - -type UnaryArithmeticSigned[T arrow.IntType] struct { - UnaryArithmeticIntegral[T] -} - -func (us *UnaryArithmeticSigned[T]) TestAbsoluteValue() { - var ( - dt = us.datatype() - min = kernels.MinOf[T]() - max = kernels.MaxOf[T]() - ) - - fn := func(in, exp string) { - us.assertUnaryOp(compute.AbsoluteValue, in, exp) - } - - us.Run(dt.String(), func() { - for _, checkOverflow := range []bool{true, false} { - us.setOverflowCheck(checkOverflow) - us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { - // empty array - fn(`[]`, `[]`) - // scalar/arrays with nulls - fn(`[null]`, `[null]`) - fn(`[1, null, -10]`, `[1, null, 10]`) - us.assertUnaryOpScalars(compute.AbsoluteValue, us.makeNullScalar(), us.makeNullScalar()) - // scalar/arrays with zeros - fn(`[0, -0]`, `[0, 0]`) - us.assertUnaryOpVals(compute.AbsoluteValue, -0, 0) - us.assertUnaryOpVals(compute.AbsoluteValue, 0, 0) - // ordinary scalars/arrays (positive inputs) - fn(`[1, 10, 127]`, `[1, 10, 127]`) - us.assertUnaryOpVals(compute.AbsoluteValue, 1, 1) - // ordinary scalars/arrays (negative inputs) - fn(`[-1, -10, -127]`, `[1, 10, 127]`) - us.assertUnaryOpVals(compute.AbsoluteValue, -1, 1) - // min/max - us.assertUnaryOpVals(compute.AbsoluteValue, max, max) - if checkOverflow { - us.assertUnaryOpValError(compute.AbsoluteValue, min, "overflow") - } else { - us.assertUnaryOpVals(compute.AbsoluteValue, min, min) - } - }) - } - }) -} - -func (us *UnaryArithmeticSigned[T]) TestNegate() { - var ( - dt = us.datatype() - min = kernels.MinOf[T]() - max = kernels.MaxOf[T]() - ) - - fn := func(in, exp string) { - us.assertUnaryOp(compute.Negate, in, exp) - } - - us.Run(dt.String(), func() { - for _, checkOverflow := range []bool{true, false} { - us.setOverflowCheck(checkOverflow) - us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { - fn(`[]`, `[]`) - // scalar/arrays with nulls - fn(`[null]`, `[null]`) - fn(`[1, null, -10]`, `[-1, null, 10]`) - // ordinary scalars/arrays (positive inputs) - fn(`[1, 10, 127]`, `[-1, -10, -127]`) - us.assertUnaryOpVals(compute.Negate, 1, -1) - // ordinary scalars/arrays (negative inputs) - fn(`[-1, -10, -127]`, `[1, 10, 127]`) - us.assertUnaryOpVals(compute.Negate, -1, 1) - // min/max - us.assertUnaryOpVals(compute.Negate, min+1, max) - us.assertUnaryOpVals(compute.Negate, max, min+1) - }) - } - }) -} - -type UnaryArithmeticUnsigned[T arrow.UintType] struct { - UnaryArithmeticIntegral[T] -} - -func (us *UnaryArithmeticUnsigned[T]) TestAbsoluteValue() { - var ( - min, max T = 0, kernels.MaxOf[T]() - ) - - fn := func(in, exp string) { - us.assertUnaryOp(compute.AbsoluteValue, in, exp) - } - - us.Run(us.datatype().String(), func() { - for _, checkOverflow := range []bool{true, false} { - us.setOverflowCheck(checkOverflow) - us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { - fn(`[]`, `[]`) - fn(`[null]`, `[null]`) - us.assertUnaryOpScalars(compute.AbsoluteValue, us.makeNullScalar(), us.makeNullScalar()) - fn(`[0, 1, 10, 127]`, `[0, 1, 10, 127]`) - us.assertUnaryOpVals(compute.AbsoluteValue, min, min) - us.assertUnaryOpVals(compute.AbsoluteValue, max, max) - }) - } - }) -} - -func (us *UnaryArithmeticUnsigned[T]) TestNegate() { - var ( - dt = us.datatype() - ) - - fn := func(in, exp string) { - us.assertUnaryOp(compute.Negate, in, exp) - } - - us.Run(dt.String(), func() { - us.setOverflowCheck(true) - us.assertUnaryOpNotImplemented(compute.Negate, 1, "no kernel matching input types") - - us.setOverflowCheck(false) - fn(`[]`, `[]`) - fn(`[null]`, `[null]`) - us.assertUnaryOpVals(compute.Negate, 1, ^T(1)+1) - }) -} - -type UnaryArithmeticFloating[T constraints.Float] struct { - UnaryArithmeticSuite[T, compute.ArithmeticOptions] - - min, max T - smallest T -} - -func (us *UnaryArithmeticFloating[T]) setOverflowCheck(v bool) { - us.opts.NoCheckOverflow = !v -} - -func (us *UnaryArithmeticFloating[T]) TestAbsoluteValue() { - fn := func(in, exp string) { - us.assertUnaryOp(compute.AbsoluteValue, in, exp) - } - - us.Run(us.datatype().String(), func() { - for _, checkOverflow := range []bool{true, false} { - us.setOverflowCheck(checkOverflow) - us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { - fn(`[]`, `[]`) - fn(`[null]`, `[null]`) - fn(`[1.3, null, -10.80]`, `[1.3, null, 10.80]`) - us.assertUnaryOpScalars(compute.AbsoluteValue, us.makeNullScalar(), us.makeNullScalar()) - fn(`[0.0, -0.0]`, `[0.0, 0.0]`) - us.assertUnaryOpVals(compute.AbsoluteValue, T(math.Copysign(0, -1)), 0) - us.assertUnaryOpVals(compute.AbsoluteValue, 0, 0) - fn(`[1.3, 10.80, 12748.001]`, `[1.3, 10.80, 12748.001]`) - us.assertUnaryOpVals(compute.AbsoluteValue, 1.3, 1.3) - fn(`[-1.3, -10.80, -12748.001]`, `[1.3, 10.80, 12748.001]`) - us.assertUnaryOpVals(compute.AbsoluteValue, -1.3, 1.3) - fn(`["Inf", "-Inf"]`, `["Inf", "Inf"]`) - us.assertUnaryOpVals(compute.AbsoluteValue, us.min, us.max) - us.assertUnaryOpVals(compute.AbsoluteValue, us.max, us.max) - }) - } - }) -} - -func (us *UnaryArithmeticFloating[T]) TestNegate() { - var ( - dt = us.datatype() - ) - - fn := func(in, exp string) { - us.assertUnaryOp(compute.Negate, in, exp) - } - - us.Run(dt.String(), func() { - for _, checkOverflow := range []bool{true, false} { - us.setOverflowCheck(checkOverflow) - us.Run(fmt.Sprintf("check_overflow=%t", checkOverflow), func() { - fn(`[]`, `[]`) - // scalar/arrays with nulls - fn(`[null]`, `[null]`) - fn(`[1.5, null, -10.25]`, `[-1.5, null, 10.25]`) - // ordinary scalars/arrays (positive inputs) - fn(`[0.5, 10.123, 127.321]`, `[-0.5, -10.123, -127.321]`) - us.assertUnaryOpVals(compute.Negate, 1.25, -1.25) - // ordinary scalars/arrays (negative inputs) - fn(`[-0.5, -10.123, -127.321]`, `[0.5, 10.123, 127.321]`) - us.assertUnaryOpVals(compute.Negate, -1.25, 1.25) - // min/max - us.assertUnaryOpVals(compute.Negate, us.min, us.max) - us.assertUnaryOpVals(compute.Negate, us.max, us.min) - }) - } - }) -} - -func (us *UnaryArithmeticFloating[T]) TestTrigSin() { - us.setOverflowCheck(false) - us.assertUnaryOp(compute.Sin, `["Inf", "-Inf"]`, `["NaN", "NaN"]`) - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - us.assertUnaryOp(compute.Sin, `[]`, `[]`) - us.assertUnaryOp(compute.Sin, `[null, "NaN"]`, `[null, "NaN"]`) - arr := us.makeArray(0, math.Pi/2, math.Pi) - exp := us.makeArray(0, 1, 0) - defer arr.Release() - defer exp.Release() - us.assertUnaryOpArrs(compute.Sin, arr, exp) - } - - us.setOverflowCheck(true) - us.assertUnaryOpErr(compute.Sin, `["Inf", "-Inf"]`, "domain error") -} - -func (us *UnaryArithmeticFloating[T]) TestTrigCos() { - us.setOverflowCheck(false) - us.assertUnaryOp(compute.Cos, `["Inf", "-Inf"]`, `["NaN", "NaN"]`) - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - us.assertUnaryOp(compute.Cos, `[]`, `[]`) - us.assertUnaryOp(compute.Cos, `[null, "NaN"]`, `[null, "NaN"]`) - arr := us.makeArray(0, math.Pi/2, math.Pi) - exp := us.makeArray(1, 0, -1) - defer arr.Release() - defer exp.Release() - us.assertUnaryOpArrs(compute.Cos, arr, exp) - } - - us.setOverflowCheck(true) - us.assertUnaryOpErr(compute.Cos, `["Inf", "-Inf"]`, "domain error") -} - -func (us *UnaryArithmeticFloating[T]) TestTrigTan() { - us.setOverflowCheck(false) - us.assertUnaryOp(compute.Tan, `["Inf", "-Inf"]`, `["NaN", "NaN"]`) - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - us.assertUnaryOp(compute.Tan, `[]`, `[]`) - us.assertUnaryOp(compute.Tan, `[null, "NaN"]`, `[null, "NaN"]`) - // pi/2 isn't representable exactly -> there are no poles - // (i.e. tan(pi/2) is merely a large value and not +Inf) - arr := us.makeArray(0, math.Pi) - exp := us.makeArray(0, 0) - defer arr.Release() - defer exp.Release() - us.assertUnaryOpArrs(compute.Tan, arr, exp) - } - - us.setOverflowCheck(true) - us.assertUnaryOpErr(compute.Tan, `["Inf", "-Inf"]`, "domain error") -} - -func (us *UnaryArithmeticFloating[T]) TestTrigAsin() { - us.setOverflowCheck(false) - us.assertUnaryOp(compute.Asin, `["Inf", "-Inf", -2, 2]`, `["NaN", "NaN", "NaN", "NaN"]`) - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - us.assertUnaryOp(compute.Asin, `[]`, `[]`) - us.assertUnaryOp(compute.Asin, `[null, "NaN"]`, `[null, "NaN"]`) - arr := us.makeArray(0, 1, -1) - exp := us.makeArray(0, math.Pi/2, -math.Pi/2) - defer arr.Release() - defer exp.Release() - us.assertUnaryOpArrs(compute.Asin, arr, exp) - } - - us.setOverflowCheck(true) - us.assertUnaryOpErr(compute.Asin, `["Inf", "-Inf", -2, 2]`, "domain error") -} - -func (us *UnaryArithmeticFloating[T]) TestTrigAcos() { - us.setOverflowCheck(false) - us.assertUnaryOp(compute.Acos, `["Inf", "-Inf", -2, 2]`, `["NaN", "NaN", "NaN", "NaN"]`) - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - us.assertUnaryOp(compute.Acos, `[]`, `[]`) - us.assertUnaryOp(compute.Acos, `[null, "NaN"]`, `[null, "NaN"]`) - arr := us.makeArray(0, 1, -1) - exp := us.makeArray(math.Pi/2, 0, math.Pi) - defer arr.Release() - defer exp.Release() - us.assertUnaryOpArrs(compute.Acos, arr, exp) - } - - us.setOverflowCheck(true) - us.assertUnaryOpErr(compute.Acos, `["Inf", "-Inf", -2, 2]`, "domain error") -} - -func (us *UnaryArithmeticFloating[T]) TestTrigAtan() { - us.setOverflowCheck(false) - atan := func(ctx context.Context, _ compute.ArithmeticOptions, arg compute.Datum) (compute.Datum, error) { - return compute.Atan(ctx, arg) - } - us.assertUnaryOp(atan, `[]`, `[]`) - us.assertUnaryOp(atan, `[null, "NaN"]`, `[null, "NaN"]`) - - arr := us.makeArray(0, 1, -1, T(math.Inf(1)), T(math.Inf(-1))) - exp := us.makeArray(0, math.Pi/4, -math.Pi/4, math.Pi/2, -math.Pi/2) - defer arr.Release() - defer exp.Release() - us.assertUnaryOpArrs(atan, arr, exp) -} - -func (us *UnaryArithmeticFloating[T]) TestLog() { - for _, overflow := range []bool{false, true} { - us.setOverflowCheck(overflow) - us.Run(fmt.Sprintf("checked=%t", overflow), func() { - us.assertUnaryOp(compute.Ln, `[1, 2.718281828459045, null, "NaN", "Inf"]`, - `[0, 1, null, "NaN", "Inf"]`) - us.assertUnaryOpVals(compute.Ln, us.smallest, T(math.Log(float64(us.smallest)))) - us.assertUnaryOpVals(compute.Ln, us.max, T(math.Log(float64(us.max)))) - us.assertUnaryOp(compute.Log10, `[1, 10, null, "NaN", "Inf"]`, `[0, 1, null, "NaN", "Inf"]`) - us.assertUnaryOpVals(compute.Log10, us.smallest, T(math.Log10(float64(us.smallest)))) - us.assertUnaryOpVals(compute.Log10, us.max, T(math.Log10(float64(us.max)))) - us.assertUnaryOp(compute.Log2, `[1, 2, null, "NaN", "Inf"]`, `[0, 1, null, "NaN", "Inf"]`) - us.assertUnaryOpVals(compute.Log2, us.smallest, T(math.Log2(float64(us.smallest)))) - us.assertUnaryOpVals(compute.Log2, us.max, T(math.Log2(float64(us.max)))) - us.assertUnaryOp(compute.Log1p, `[0, 1.718281828459045, null, "NaN", "Inf"]`, `[0, 1, null, "NaN", "Inf"]`) - us.assertUnaryOpVals(compute.Log1p, us.smallest, T(math.Log1p(float64(us.smallest)))) - us.assertUnaryOpVals(compute.Log1p, us.max, T(math.Log1p(float64(us.max)))) - }) - } - - us.setOverflowCheck(false) - us.assertUnaryOp(compute.Ln, `["-Inf", -1, 0, "Inf"]`, `["NaN", "NaN", "-Inf", "Inf"]`) - us.assertUnaryOp(compute.Log10, `["-Inf", -1, 0, "Inf"]`, `["NaN", "NaN", "-Inf", "Inf"]`) - us.assertUnaryOp(compute.Log2, `["-Inf", -1, 0, "Inf"]`, `["NaN", "NaN", "-Inf", "Inf"]`) - us.assertUnaryOp(compute.Log1p, `["-Inf", -2, -1, "Inf"]`, `["NaN", "NaN", "-Inf", "Inf"]`) - - us.setOverflowCheck(true) - us.assertUnaryOpErr(compute.Ln, `[0]`, "logarithm of zero") - us.assertUnaryOpErr(compute.Ln, `[-1]`, "logarithm of negative number") - us.assertUnaryOpErr(compute.Ln, `["-Inf"]`, "logarithm of negative number") - us.assertUnaryOpValError(compute.Ln, us.min, "logarithm of negative number") - - us.assertUnaryOpErr(compute.Log10, `[0]`, "logarithm of zero") - us.assertUnaryOpErr(compute.Log10, `[-1]`, "logarithm of negative number") - us.assertUnaryOpErr(compute.Log10, `["-Inf"]`, "logarithm of negative number") - us.assertUnaryOpValError(compute.Log10, us.min, "logarithm of negative number") - - us.assertUnaryOpErr(compute.Log2, `[0]`, "logarithm of zero") - us.assertUnaryOpErr(compute.Log2, `[-1]`, "logarithm of negative number") - us.assertUnaryOpErr(compute.Log2, `["-Inf"]`, "logarithm of negative number") - us.assertUnaryOpValError(compute.Log2, us.min, "logarithm of negative number") - - us.assertUnaryOpErr(compute.Log1p, `[-1]`, "logarithm of zero") - us.assertUnaryOpErr(compute.Log1p, `[-2]`, "logarithm of negative number") - us.assertUnaryOpErr(compute.Log1p, `["-Inf"]`, "logarithm of negative number") - us.assertUnaryOpValError(compute.Log1p, us.min, "logarithm of negative number") -} - -func TestUnaryArithmetic(t *testing.T) { - suite.Run(t, new(UnaryArithmeticSigned[int8])) - suite.Run(t, new(UnaryArithmeticSigned[int16])) - suite.Run(t, new(UnaryArithmeticSigned[int32])) - suite.Run(t, new(UnaryArithmeticSigned[int64])) - suite.Run(t, new(UnaryArithmeticUnsigned[uint8])) - suite.Run(t, new(UnaryArithmeticUnsigned[uint16])) - suite.Run(t, new(UnaryArithmeticUnsigned[uint32])) - suite.Run(t, new(UnaryArithmeticUnsigned[uint64])) - suite.Run(t, &UnaryArithmeticFloating[float32]{min: -math.MaxFloat32, max: math.MaxFloat32, smallest: math.SmallestNonzeroFloat32}) - suite.Run(t, &UnaryArithmeticFloating[float64]{min: -math.MaxFloat64, max: math.MaxFloat64, smallest: math.SmallestNonzeroFloat64}) - suite.Run(t, new(DecimalUnaryArithmeticSuite)) -} - -type BitwiseArithmeticSuite[T arrow.IntType | arrow.UintType] struct { - BinaryFuncTestSuite -} - -func (bs *BitwiseArithmeticSuite[T]) datatype() arrow.DataType { - return arrow.GetDataType[T]() -} - -// to make it easier to test different widths, tests give bytes which -// get repeated to make an array of the actual type -func (bs *BitwiseArithmeticSuite[T]) expandByteArray(values []byte) arrow.Array { - vals := make([]T, len(values)+1) - sz := kernels.SizeOf[T]() - for i, v := range values { - memory.Set(unsafe.Slice((*byte)(unsafe.Pointer(&vals[i])), sz), v) - } - valid := make([]bool, len(vals)) - for i := range values { - valid[i] = true - } - return exec.ArrayFromSliceWithValid(bs.mem, vals, valid) -} - -func (bs *BitwiseArithmeticSuite[T]) assertBinaryOp(fn string, arg0, arg1, expected []byte) { - in0, in1 := bs.expandByteArray(arg0), bs.expandByteArray(arg1) - out := bs.expandByteArray(expected) - defer func() { - in0.Release() - in1.Release() - out.Release() - }() - - actual, err := compute.CallFunction(bs.ctx, fn, nil, &compute.ArrayDatum{in0.Data()}, &compute.ArrayDatum{in1.Data()}) - bs.Require().NoError(err) - defer actual.Release() - assertDatumsEqual(bs.T(), &compute.ArrayDatum{out.Data()}, actual, nil, nil) - - for i := 0; i < out.Len(); i++ { - a0, err := scalar.GetScalar(in0, i) - bs.Require().NoError(err) - a1, err := scalar.GetScalar(in1, i) - bs.Require().NoError(err) - exp, err := scalar.GetScalar(out, i) - bs.Require().NoError(err) - - actual, err := compute.CallFunction(bs.ctx, fn, nil, compute.NewDatum(a0), compute.NewDatum(a1)) - bs.Require().NoError(err) - assertScalarEquals(bs.T(), exp, actual.(*compute.ScalarDatum).Value) - } -} - -func (bs *BitwiseArithmeticSuite[T]) TestBitWiseAnd() { - bs.Run(bs.datatype().String(), func() { - bs.assertBinaryOp("bit_wise_and", []byte{0x00, 0xFF, 0x00, 0xFF}, - []byte{0x00, 0x00, 0xFF, 0xFF}, []byte{0x00, 0x00, 0x00, 0xFF}) - }) -} - -func (bs *BitwiseArithmeticSuite[T]) TestBitWiseOr() { - bs.Run(bs.datatype().String(), func() { - bs.assertBinaryOp("bit_wise_or", []byte{0x00, 0xFF, 0x00, 0xFF}, - []byte{0x00, 0x00, 0xFF, 0xFF}, []byte{0x00, 0xFF, 0xFF, 0xFF}) - }) -} - -func (bs *BitwiseArithmeticSuite[T]) TestBitWiseXor() { - bs.Run(bs.datatype().String(), func() { - bs.assertBinaryOp("bit_wise_xor", []byte{0x00, 0xFF, 0x00, 0xFF}, - []byte{0x00, 0x00, 0xFF, 0xFF}, []byte{0x00, 0xFF, 0xFF, 0x00}) - }) -} - -func TestBitwiseArithmetic(t *testing.T) { - suite.Run(t, new(BitwiseArithmeticSuite[int8])) - suite.Run(t, new(BitwiseArithmeticSuite[uint8])) - suite.Run(t, new(BitwiseArithmeticSuite[int16])) - suite.Run(t, new(BitwiseArithmeticSuite[uint16])) - suite.Run(t, new(BitwiseArithmeticSuite[int32])) - suite.Run(t, new(BitwiseArithmeticSuite[uint32])) - suite.Run(t, new(BitwiseArithmeticSuite[int64])) - suite.Run(t, new(BitwiseArithmeticSuite[uint64])) -} - -var roundModes = []compute.RoundMode{ - compute.RoundDown, - compute.RoundUp, - compute.RoundTowardsZero, - compute.RoundTowardsInfinity, - compute.RoundHalfDown, - compute.RoundHalfUp, - compute.RoundHalfTowardsZero, - compute.RoundHalfTowardsInfinity, - compute.RoundHalfToEven, - compute.RoundHalfToOdd, -} - -type UnaryRoundSuite[T arrow.NumericType] struct { - UnaryArithmeticSuite[T, compute.RoundOptions] -} - -func (us *UnaryRoundSuite[T]) setRoundMode(mode compute.RoundMode) { - us.opts.Mode = mode -} - -func (us *UnaryRoundSuite[T]) setRoundNDigits(v int64) { - us.opts.NDigits = v -} - -type UnaryRoundToMultipleSuite[T arrow.NumericType] struct { - UnaryArithmeticSuite[T, compute.RoundToMultipleOptions] -} - -func (us *UnaryRoundToMultipleSuite[T]) setRoundMode(mode compute.RoundMode) { - us.opts.Mode = mode -} - -func (us *UnaryRoundToMultipleSuite[T]) setRoundMultiple(val float64) { - us.opts.Multiple = scalar.NewFloat64Scalar(val) -} - -type UnaryRoundIntegral[T arrow.IntType | arrow.UintType] struct { - UnaryRoundSuite[T] -} - -type UnaryRoundToMultipleIntegral[T arrow.IntType | arrow.UintType] struct { - UnaryRoundToMultipleSuite[T] -} - -type UnaryRoundSigned[T arrow.IntType] struct { - UnaryRoundIntegral[T] -} - -func (us *UnaryRoundSigned[T]) TestRound() { - values := `[0, 1, -13, -50, 115]` - us.setRoundNDigits(0) - - arr := us.getArr(arrow.PrimitiveTypes.Float64, values) - defer arr.Release() - for _, mode := range roundModes { - us.setRoundMode(mode) - us.assertUnaryOpExpArr(compute.Round, values, arr) - } - - // test different round N-digits for nearest rounding mode - ndigExpected := []struct { - n int64 - exp string - }{ - {-2, `[0, 0, -0.0, -100, 100]`}, - {-1, `[0.0, 0.0, -10, -50, 120]`}, - {0, values}, - {1, values}, - {2, values}, - } - us.setRoundMode(compute.RoundHalfTowardsInfinity) - for _, tt := range ndigExpected { - us.Run(fmt.Sprintf("ndigits=%d", tt.n), func() { - us.setRoundNDigits(tt.n) - arr := us.getArr(arrow.PrimitiveTypes.Float64, tt.exp) - defer arr.Release() - us.assertUnaryOpExpArr(compute.Round, values, arr) - }) - } -} - -type UnaryRoundToMultipleSigned[T arrow.IntType] struct { - UnaryRoundToMultipleIntegral[T] -} - -func (us *UnaryRoundToMultipleSigned[T]) TestRoundToMultiple() { - values := `[0, 1, -13, -50, 115]` - us.setRoundMultiple(1) - for _, mode := range roundModes { - us.setRoundMode(mode) - arr := us.getArr(arrow.PrimitiveTypes.Float64, values) - defer arr.Release() - us.assertUnaryOpExpArr(compute.RoundToMultiple, values, arr) - } - - tests := []struct { - mult float64 - exp string - }{ - {2, `[0.0, 2, -14, -50, 116]`}, - {0.05, `[0.0, 1, -13, -50, 115]`}, - {0.1, values}, - {10, `[0.0, 0.0, -10, -50, 120]`}, - {100, `[0.0, 0.0, -0.0, -100, 100]`}, - } - - us.setRoundMode(compute.RoundHalfTowardsInfinity) - for _, tt := range tests { - us.setRoundMultiple(tt.mult) - arr := us.getArr(arrow.PrimitiveTypes.Float64, tt.exp) - defer arr.Release() - us.assertUnaryOpExpArr(compute.RoundToMultiple, values, arr) - } -} - -type UnaryRoundUnsigned[T arrow.UintType] struct { - UnaryRoundIntegral[T] -} - -func (us *UnaryRoundUnsigned[T]) TestRound() { - values := `[0, 1, 13, 50, 115]` - us.setRoundNDigits(0) - - arr := us.getArr(arrow.PrimitiveTypes.Float64, values) - defer arr.Release() - for _, mode := range roundModes { - us.setRoundMode(mode) - us.assertUnaryOpExpArr(compute.Round, values, arr) - } - - // test different round N-digits for nearest rounding mode - ndigExpected := []struct { - n int64 - exp string - }{ - {-2, `[0, 0, 0, 100, 100]`}, - {-1, `[0.0, 0.0, 10, 50, 120]`}, - {0, values}, - {1, values}, - {2, values}, - } - us.setRoundMode(compute.RoundHalfTowardsInfinity) - for _, tt := range ndigExpected { - us.Run(fmt.Sprintf("ndigits=%d", tt.n), func() { - us.setRoundNDigits(tt.n) - arr := us.getArr(arrow.PrimitiveTypes.Float64, tt.exp) - defer arr.Release() - us.assertUnaryOpExpArr(compute.Round, values, arr) - }) - } -} - -type UnaryRoundToMultipleUnsigned[T arrow.UintType] struct { - UnaryRoundToMultipleIntegral[T] -} - -func (us *UnaryRoundToMultipleUnsigned[T]) TestRoundToMultiple() { - values := `[0, 1, 13, 50, 115]` - us.setRoundMultiple(1) - for _, mode := range roundModes { - us.setRoundMode(mode) - arr := us.getArr(arrow.PrimitiveTypes.Float64, values) - defer arr.Release() - us.assertUnaryOpExpArr(compute.RoundToMultiple, values, arr) - } - - tests := []struct { - mult float64 - exp string - }{ - {0.05, `[0, 1, 13, 50, 115]`}, - {0.1, values}, - {2, `[0, 2, 14, 50, 116]`}, - {10, `[0, 0, 10, 50, 120]`}, - {100, `[0, 0, 0, 100, 100]`}, - } - - us.setRoundMode(compute.RoundHalfTowardsInfinity) - for _, tt := range tests { - us.setRoundMultiple(tt.mult) - arr := us.getArr(arrow.PrimitiveTypes.Float64, tt.exp) - defer arr.Release() - us.assertUnaryOpExpArr(compute.RoundToMultiple, values, arr) - } -} - -type UnaryRoundFloating[T constraints.Float] struct { - UnaryRoundSuite[T] -} - -func (us *UnaryRoundFloating[T]) TestRound() { - values := `[3.2, 3.5, 3.7, 4.5, -3.2, -3.5, -3.7]` - rmodeExpected := []struct { - mode compute.RoundMode - exp string - }{ - {compute.RoundDown, `[3, 3, 3, 4, -4, -4, -4]`}, - {compute.RoundUp, `[4, 4, 4, 5, -3, -3, -3]`}, - {compute.RoundTowardsZero, `[3, 3, 3, 4, -3, -3, -3]`}, - {compute.RoundTowardsInfinity, `[4, 4, 4, 5, -4, -4, -4]`}, - {compute.RoundHalfDown, `[3, 3, 4, 4, -3, -4, -4]`}, - {compute.RoundHalfUp, `[3, 4, 4, 5, -3, -3, -4]`}, - {compute.RoundHalfTowardsZero, `[3, 3, 4, 4, -3, -3, -4]`}, - {compute.RoundHalfToEven, `[3, 4, 4, 4, -3, -4, -4]`}, - {compute.RoundHalfToOdd, `[3, 3, 4, 5, -3, -3, -4]`}, - } - us.setRoundNDigits(0) - for _, tt := range rmodeExpected { - us.Run(tt.mode.String(), func() { - us.setRoundMode(tt.mode) - us.assertUnaryOp(compute.Round, `[]`, `[]`) - us.assertUnaryOp(compute.Round, `[null, 0, "Inf", "-Inf", "NaN"]`, - `[null, 0, "Inf", "-Inf", "NaN"]`) - us.assertUnaryOp(compute.Round, values, tt.exp) - }) - } - - // test different round n-digits for nearest rounding mode - values = `[320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045]` - ndigitsExp := []struct { - n int64 - exp string - }{ - {-2, `[300, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0]`}, - {-1, `[320, 0.0, 0.0, 0.0, -0.0, -40, -0.0]`}, - {0, `[320, 4, 3, 5, -3, -35, -3]`}, - {1, `[320, 3.5, 3.1, 4.5, -3.2, -35.1, -3]`}, - {2, `[320, 3.5, 3.08, 4.5, -3.21, -35.12, -3.05]`}, - } - - us.setRoundMode(compute.RoundHalfTowardsInfinity) - for _, tt := range ndigitsExp { - us.Run(fmt.Sprintf("ndigits=%d", tt.n), func() { - us.setRoundNDigits(tt.n) - us.assertUnaryOp(compute.Round, values, tt.exp) - }) - } -} - -type UnaryRoundToMultipleFloating[T constraints.Float] struct { - UnaryRoundToMultipleSuite[T] -} - -func (us *UnaryRoundToMultipleFloating[T]) TestRoundToMultiple() { - values := `[3.2, 3.5, 3.7, 4.5, -3.2, -3.5, -3.7]` - rmodeExpected := []struct { - mode compute.RoundMode - exp string - }{ - {compute.RoundDown, `[3, 3, 3, 4, -4, -4, -4]`}, - {compute.RoundUp, `[4, 4, 4, 5, -3, -3, -3]`}, - {compute.RoundTowardsZero, `[3, 3, 3, 4, -3, -3, -3]`}, - {compute.RoundTowardsInfinity, `[4, 4, 4, 5, -4, -4, -4]`}, - {compute.RoundHalfDown, `[3, 3, 4, 4, -3, -4, -4]`}, - {compute.RoundHalfUp, `[3, 4, 4, 5, -3, -3, -4]`}, - {compute.RoundHalfTowardsZero, `[3, 3, 4, 4, -3, -3, -4]`}, - {compute.RoundHalfToEven, `[3, 4, 4, 4, -3, -4, -4]`}, - {compute.RoundHalfToOdd, `[3, 3, 4, 5, -3, -3, -4]`}, - } - us.setRoundMultiple(1) - for _, tt := range rmodeExpected { - us.Run(tt.mode.String(), func() { - us.setRoundMode(tt.mode) - us.assertUnaryOp(compute.RoundToMultiple, `[]`, `[]`) - us.assertUnaryOp(compute.RoundToMultiple, `[null, 0, "Inf", "-Inf", "NaN"]`, - `[null, 0, "Inf", "-Inf", "NaN"]`) - us.assertUnaryOp(compute.RoundToMultiple, values, tt.exp) - }) - } - - // test different round n-digits for nearest rounding mode - values = `[320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045]` - multAndExp := []struct { - mult float64 - exp string - }{ - {0.05, `[320, 3.5, 3.1, 4.5, -3.2, -35.1, -3.05]`}, - {0.1, `[320, 3.5, 3.1, 4.5, -3.2, -35.1, -3]`}, - {2, `[320, 4, 4, 4, -4, -36, -4]`}, - {10, `[320, 0.0, 0.0, 0.0, -0.0, -40, -0.0]`}, - {100, `[300, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0]`}, - } - - us.setRoundMode(compute.RoundHalfTowardsInfinity) - for _, tt := range multAndExp { - us.Run(fmt.Sprintf("multiple=%f", tt.mult), func() { - us.setRoundMultiple(tt.mult) - us.assertUnaryOp(compute.RoundToMultiple, values, tt.exp) - }) - } -} - -func TestRounding(t *testing.T) { - suite.Run(t, new(UnaryRoundSigned[int8])) - suite.Run(t, new(UnaryRoundSigned[int16])) - suite.Run(t, new(UnaryRoundSigned[int32])) - suite.Run(t, new(UnaryRoundSigned[int64])) - suite.Run(t, new(UnaryRoundUnsigned[uint8])) - suite.Run(t, new(UnaryRoundUnsigned[uint16])) - suite.Run(t, new(UnaryRoundUnsigned[uint32])) - suite.Run(t, new(UnaryRoundUnsigned[uint64])) - suite.Run(t, new(UnaryRoundFloating[float32])) - suite.Run(t, new(UnaryRoundFloating[float64])) - - suite.Run(t, new(UnaryRoundToMultipleSigned[int8])) - suite.Run(t, new(UnaryRoundToMultipleSigned[int16])) - suite.Run(t, new(UnaryRoundToMultipleSigned[int32])) - suite.Run(t, new(UnaryRoundToMultipleSigned[int64])) - suite.Run(t, new(UnaryRoundToMultipleUnsigned[uint8])) - suite.Run(t, new(UnaryRoundToMultipleUnsigned[uint16])) - suite.Run(t, new(UnaryRoundToMultipleUnsigned[uint32])) - suite.Run(t, new(UnaryRoundToMultipleUnsigned[uint64])) - suite.Run(t, new(UnaryRoundToMultipleFloating[float32])) - suite.Run(t, new(UnaryRoundToMultipleFloating[float64])) -} - -const seed = 0x94378165 - -type binaryOp = func(ctx context.Context, left, right compute.Datum) (compute.Datum, error) - -func Add(ctx context.Context, left, right compute.Datum) (compute.Datum, error) { - var opts compute.ArithmeticOptions - return compute.Add(ctx, opts, left, right) -} - -func Subtract(ctx context.Context, left, right compute.Datum) (compute.Datum, error) { - var opts compute.ArithmeticOptions - return compute.Subtract(ctx, opts, left, right) -} - -func AddUnchecked(ctx context.Context, left, right compute.Datum) (compute.Datum, error) { - opts := compute.ArithmeticOptions{NoCheckOverflow: true} - return compute.Add(ctx, opts, left, right) -} - -func SubtractUnchecked(ctx context.Context, left, right compute.Datum) (compute.Datum, error) { - opts := compute.ArithmeticOptions{NoCheckOverflow: true} - return compute.Subtract(ctx, opts, left, right) -} - -func arrayScalarKernel(b *testing.B, sz int, nullProp float64, op binaryOp, dt arrow.DataType) { - b.Run("array scalar", func(b *testing.B) { - var ( - mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - arraySize = int64(sz / dt.(arrow.FixedWidthDataType).Bytes()) - min int64 = 6 - max = min + 15 - sc, _ = scalar.MakeScalarParam(6, dt) - rhs compute.Datum = &compute.ScalarDatum{Value: sc} - rng = gen.NewRandomArrayGenerator(seed, mem) - ) - - lhs := rng.Numeric(dt.ID(), arraySize, min, max, nullProp) - b.Cleanup(func() { - lhs.Release() - }) - - var ( - res compute.Datum - err error - ctx = context.Background() - left = &compute.ArrayDatum{Value: lhs.Data()} - ) - - b.SetBytes(arraySize) - b.ResetTimer() - for n := 0; n < b.N; n++ { - res, err = op(ctx, left, rhs) - b.StopTimer() - if err != nil { - b.Fatal(err) - } - res.Release() - b.StartTimer() - } - }) -} - -func arrayArrayKernel(b *testing.B, sz int, nullProp float64, op binaryOp, dt arrow.DataType) { - b.Run("array array", func(b *testing.B) { - var ( - mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - arraySize = int64(sz / dt.(arrow.FixedWidthDataType).Bytes()) - rmin int64 = 1 - rmax = rmin + 6 // 7 - lmin = rmax + 1 // 8 - lmax = lmin + 6 // 14 - rng = gen.NewRandomArrayGenerator(seed, mem) - ) - - lhs := rng.Numeric(dt.ID(), arraySize, lmin, lmax, nullProp) - rhs := rng.Numeric(dt.ID(), arraySize, rmin, rmax, nullProp) - b.Cleanup(func() { - lhs.Release() - rhs.Release() - }) - var ( - res compute.Datum - err error - ctx = context.Background() - left = &compute.ArrayDatum{Value: lhs.Data()} - right = &compute.ArrayDatum{Value: rhs.Data()} - ) - - b.SetBytes(arraySize) - b.ResetTimer() - for n := 0; n < b.N; n++ { - res, err = op(ctx, left, right) - b.StopTimer() - if err != nil { - b.Fatal(err) - } - res.Release() - b.StartTimer() - } - }) -} - -func BenchmarkScalarArithmetic(b *testing.B) { - args := []struct { - sz int - nullProb float64 - }{ - {CpuCacheSizes[2], 0}, - {CpuCacheSizes[2], 0.5}, - {CpuCacheSizes[2], 1}, - } - - testfns := []struct { - name string - op binaryOp - }{ - {"Add", Add}, - {"AddUnchecked", AddUnchecked}, - {"Subtract", Subtract}, - {"SubtractUnchecked", SubtractUnchecked}, - } - - for _, dt := range numericTypes { - b.Run(dt.String(), func(b *testing.B) { - for _, benchArgs := range args { - b.Run(fmt.Sprintf("sz=%d/nullprob=%.2f", benchArgs.sz, benchArgs.nullProb), func(b *testing.B) { - for _, tfn := range testfns { - b.Run(tfn.name, func(b *testing.B) { - arrayArrayKernel(b, benchArgs.sz, benchArgs.nullProb, tfn.op, dt) - arrayScalarKernel(b, benchArgs.sz, benchArgs.nullProb, tfn.op, dt) - }) - } - }) - } - }) - } -} diff --git a/go/arrow/compute/cast.go b/go/arrow/compute/cast.go deleted file mode 100644 index 6ef6fdddd16ff..0000000000000 --- a/go/arrow/compute/cast.go +++ /dev/null @@ -1,587 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "context" - "fmt" - "sync" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" -) - -var ( - castTable map[arrow.Type]*castFunction - castInit sync.Once - - castDoc = FunctionDoc{ - Summary: "cast values to another data type", - Description: "Behavior when values wouldn't fit in the target type\ncan be controlled through CastOptions.", - ArgNames: []string{"input"}, - OptionsType: "CastOptions", - OptionsRequired: true, - } - castMetaFunc = NewMetaFunction("cast", Unary(), castDoc, - func(ctx context.Context, fo FunctionOptions, d ...Datum) (Datum, error) { - castOpts := fo.(*CastOptions) - if castOpts == nil || castOpts.ToType == nil { - return nil, fmt.Errorf("%w: cast requires that options be passed with a ToType", arrow.ErrInvalid) - } - - if arrow.TypeEqual(d[0].(ArrayLikeDatum).Type(), castOpts.ToType) { - return NewDatum(d[0]), nil - } - - fn, err := getCastFunction(castOpts.ToType) - if err != nil { - return nil, fmt.Errorf("%w from %s", err, d[0].(ArrayLikeDatum).Type()) - } - - return fn.Execute(ctx, fo, d...) - }) -) - -func RegisterScalarCast(reg FunctionRegistry) { - reg.AddFunction(castMetaFunc, false) -} - -type castFunction struct { - ScalarFunction - - inIDs []arrow.Type - out arrow.Type -} - -func newCastFunction(name string, outType arrow.Type) *castFunction { - return &castFunction{ - ScalarFunction: *NewScalarFunction(name, Unary(), EmptyFuncDoc), - out: outType, - inIDs: make([]arrow.Type, 0, 1), - } -} - -func (cf *castFunction) AddTypeCast(in arrow.Type, kernel exec.ScalarKernel) error { - kernel.Init = exec.OptionsInit[kernels.CastState] - if err := cf.AddKernel(kernel); err != nil { - return err - } - cf.inIDs = append(cf.inIDs, in) - return nil -} - -func (cf *castFunction) AddNewTypeCast(inID arrow.Type, inTypes []exec.InputType, out exec.OutputType, - ex exec.ArrayKernelExec, nullHandle exec.NullHandling, memAlloc exec.MemAlloc) error { - - kn := exec.NewScalarKernel(inTypes, out, ex, nil) - kn.NullHandling = nullHandle - kn.MemAlloc = memAlloc - return cf.AddTypeCast(inID, kn) -} - -func (cf *castFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error) { - if err := cf.checkArity(len(vals)); err != nil { - return nil, err - } - - candidates := make([]*exec.ScalarKernel, 0, 1) - for i := range cf.kernels { - if cf.kernels[i].Signature.MatchesInputs(vals) { - candidates = append(candidates, &cf.kernels[i]) - } - } - - if len(candidates) == 0 { - return nil, fmt.Errorf("%w: unsupported cast from %s to %s using function %s", - arrow.ErrNotImplemented, vals[0], cf.out, cf.name) - } - - if len(candidates) == 1 { - // one match! - return candidates[0], nil - } - - // in this situation we may have both an EXACT type and - // a SAME_TYPE_ID match. So we will see if there is an exact - // match among the candidates and if not, we just return the - // first one - for _, k := range candidates { - arg0 := k.Signature.InputTypes[0] - if arg0.Kind == exec.InputExact { - // found one! - return k, nil - } - } - - // just return some kernel that matches since we didn't find an exact - return candidates[0], nil -} - -func unpackDictionary(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - var ( - dictArr = batch.Values[0].Array.MakeArray().(*array.Dictionary) - opts = ctx.State.(kernels.CastState) - dictType = dictArr.DataType().(*arrow.DictionaryType) - toType = opts.ToType - ) - defer dictArr.Release() - - if !arrow.TypeEqual(toType, dictType) && !CanCast(dictType, toType) { - return fmt.Errorf("%w: cast type %s incompatible with dictionary type %s", - arrow.ErrInvalid, toType, dictType) - } - - unpacked, err := TakeArray(ctx.Ctx, dictArr.Dictionary(), dictArr.Indices()) - if err != nil { - return err - } - defer unpacked.Release() - - if !arrow.TypeEqual(dictType, toType) { - unpacked, err = CastArray(ctx.Ctx, unpacked, &opts) - if err != nil { - return err - } - defer unpacked.Release() - } - - out.TakeOwnership(unpacked.Data()) - return nil -} - -func CastFromExtension(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - opts := ctx.State.(kernels.CastState) - - arr := batch.Values[0].Array.MakeArray().(array.ExtensionArray) - defer arr.Release() - - castOpts := CastOptions(opts) - result, err := CastArray(ctx.Ctx, arr.Storage(), &castOpts) - if err != nil { - return err - } - defer result.Release() - - out.TakeOwnership(result.Data()) - return nil -} - -func CastList[SrcOffsetT, DestOffsetT int32 | int64](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - var ( - opts = ctx.State.(kernels.CastState) - childType = out.Type.(arrow.NestedType).Fields()[0].Type - input = &batch.Values[0].Array - offsets = exec.GetSpanOffsets[SrcOffsetT](input, 1) - isDowncast = kernels.SizeOf[SrcOffsetT]() > kernels.SizeOf[DestOffsetT]() - ) - - out.Buffers[0] = input.Buffers[0] - out.Buffers[1] = input.Buffers[1] - - if input.Offset != 0 && len(input.Buffers[0].Buf) > 0 { - out.Buffers[0].WrapBuffer(ctx.AllocateBitmap(input.Len)) - bitutil.CopyBitmap(input.Buffers[0].Buf, int(input.Offset), int(input.Len), - out.Buffers[0].Buf, 0) - } - - // Handle list offsets - // Several cases possible: - // - The source offset is non-zero, in which case we slice the - // underlying values and shift the list offsets (regardless of - // their respective types) - // - the source offset is zero but the source and destination types - // have different list offset types, in which case we cast the offsets - // - otherwise we simply keep the original offsets - if isDowncast { - if offsets[input.Len] > SrcOffsetT(kernels.MaxOf[DestOffsetT]()) { - return fmt.Errorf("%w: array of type %s too large to convert to %s", - arrow.ErrInvalid, input.Type, out.Type) - } - } - - values := input.Children[0].MakeArray() - defer values.Release() - - if input.Offset != 0 { - out.Buffers[1].WrapBuffer( - ctx.Allocate(out.Type.(arrow.OffsetsDataType). - OffsetTypeTraits().BytesRequired(int(input.Len) + 1))) - - shiftedOffsets := exec.GetSpanOffsets[DestOffsetT](out, 1) - for i := 0; i < int(input.Len)+1; i++ { - shiftedOffsets[i] = DestOffsetT(offsets[i] - offsets[0]) - } - - values = array.NewSlice(values, int64(offsets[0]), int64(offsets[input.Len])) - defer values.Release() - } else if kernels.SizeOf[SrcOffsetT]() != kernels.SizeOf[DestOffsetT]() { - out.Buffers[1].WrapBuffer(ctx.Allocate(out.Type.(arrow.OffsetsDataType). - OffsetTypeTraits().BytesRequired(int(input.Len) + 1))) - - kernels.DoStaticCast(exec.GetSpanOffsets[SrcOffsetT](input, 1), - exec.GetSpanOffsets[DestOffsetT](out, 1)) - } - - // handle values - opts.ToType = childType - - castedValues, err := CastArray(ctx.Ctx, values, &opts) - if err != nil { - return err - } - defer castedValues.Release() - - out.Children = make([]exec.ArraySpan, 1) - out.Children[0].SetMembers(castedValues.Data()) - for i, b := range out.Children[0].Buffers { - if b.Owner != nil && b.Owner != values.Data().Buffers()[i] { - b.Owner.Retain() - b.SelfAlloc = true - } - } - return nil -} - -func CastStruct(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - var ( - opts = ctx.State.(kernels.CastState) - inType = batch.Values[0].Array.Type.(*arrow.StructType) - outType = out.Type.(*arrow.StructType) - inFieldCount = inType.NumFields() - outFieldCount = outType.NumFields() - ) - - fieldsToSelect := make([]int, outFieldCount) - for i := range fieldsToSelect { - fieldsToSelect[i] = -1 - } - - outFieldIndex := 0 - for inFieldIndex := 0; inFieldIndex < inFieldCount && outFieldIndex < outFieldCount; inFieldIndex++ { - inField := inType.Field(inFieldIndex) - outField := outType.Field(outFieldIndex) - if inField.Name == outField.Name { - if inField.Nullable && !outField.Nullable { - return fmt.Errorf("%w: cannot cast nullable field to non-nullable field: %s %s", - arrow.ErrType, inType, outType) - } - fieldsToSelect[outFieldIndex] = inFieldIndex - outFieldIndex++ - } - } - - if outFieldIndex < outFieldCount { - return fmt.Errorf("%w: struct fields don't match or are in the wrong order: Input: %s Output: %s", - arrow.ErrType, inType, outType) - } - - input := &batch.Values[0].Array - if len(input.Buffers[0].Buf) > 0 { - out.Buffers[0].WrapBuffer(ctx.AllocateBitmap(input.Len)) - bitutil.CopyBitmap(input.Buffers[0].Buf, int(input.Offset), int(input.Len), - out.Buffers[0].Buf, 0) - } - - out.Children = make([]exec.ArraySpan, outFieldCount) - for outFieldIndex, idx := range fieldsToSelect { - values := input.Children[idx].MakeArray() - defer values.Release() - values = array.NewSlice(values, input.Offset, input.Len) - defer values.Release() - - opts.ToType = outType.Field(outFieldIndex).Type - castedValues, err := CastArray(ctx.Ctx, values, &opts) - if err != nil { - return err - } - defer castedValues.Release() - - out.Children[outFieldIndex].TakeOwnership(castedValues.Data()) - } - return nil -} - -func addListCast[SrcOffsetT, DestOffsetT int32 | int64](fn *castFunction, inType arrow.Type) error { - kernel := exec.NewScalarKernel([]exec.InputType{exec.NewIDInput(inType)}, - kernels.OutputTargetType, CastList[SrcOffsetT, DestOffsetT], nil) - kernel.NullHandling = exec.NullComputedNoPrealloc - kernel.MemAlloc = exec.MemNoPrealloc - return fn.AddTypeCast(inType, kernel) -} - -func addStructToStructCast(fn *castFunction) error { - kernel := exec.NewScalarKernel([]exec.InputType{exec.NewIDInput(arrow.STRUCT)}, - kernels.OutputTargetType, CastStruct, nil) - kernel.NullHandling = exec.NullComputedNoPrealloc - return fn.AddTypeCast(arrow.STRUCT, kernel) -} - -func addCastFuncs(fn []*castFunction) { - for _, f := range fn { - f.AddNewTypeCast(arrow.EXTENSION, []exec.InputType{exec.NewIDInput(arrow.EXTENSION)}, - f.kernels[0].Signature.OutType, CastFromExtension, - exec.NullComputedNoPrealloc, exec.MemNoPrealloc) - castTable[f.out] = f - } -} - -func initCastTable() { - castTable = make(map[arrow.Type]*castFunction) - addCastFuncs(getBooleanCasts()) - addCastFuncs(getNumericCasts()) - addCastFuncs(getBinaryLikeCasts()) - addCastFuncs(getTemporalCasts()) - addCastFuncs(getNestedCasts()) - - nullToExt := newCastFunction("cast_extension", arrow.EXTENSION) - nullToExt.AddNewTypeCast(arrow.NULL, []exec.InputType{exec.NewExactInput(arrow.Null)}, - kernels.OutputTargetType, kernels.CastFromNull, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) - castTable[arrow.EXTENSION] = nullToExt -} - -func getCastFunction(to arrow.DataType) (*castFunction, error) { - castInit.Do(initCastTable) - - fn, ok := castTable[to.ID()] - if ok { - return fn, nil - } - - return nil, fmt.Errorf("%w: unsupported cast to %s", arrow.ErrNotImplemented, to) -} - -func getNestedCasts() []*castFunction { - out := make([]*castFunction, 0) - - addKernels := func(fn *castFunction, kernels []exec.ScalarKernel) { - for _, k := range kernels { - if err := fn.AddTypeCast(k.Signature.InputTypes[0].MatchID(), k); err != nil { - panic(err) - } - } - } - - castLists := newCastFunction("cast_list", arrow.LIST) - addKernels(castLists, kernels.GetCommonCastKernels(arrow.LIST, kernels.OutputTargetType)) - if err := addListCast[int32, int32](castLists, arrow.LIST); err != nil { - panic(err) - } - if err := addListCast[int64, int32](castLists, arrow.LARGE_LIST); err != nil { - panic(err) - } - out = append(out, castLists) - - castLargeLists := newCastFunction("cast_large_list", arrow.LARGE_LIST) - addKernels(castLargeLists, kernels.GetCommonCastKernels(arrow.LARGE_LIST, kernels.OutputTargetType)) - if err := addListCast[int32, int64](castLargeLists, arrow.LIST); err != nil { - panic(err) - } - if err := addListCast[int64, int64](castLargeLists, arrow.LARGE_LIST); err != nil { - panic(err) - } - out = append(out, castLargeLists) - - castFsl := newCastFunction("cast_fixed_size_list", arrow.FIXED_SIZE_LIST) - addKernels(castFsl, kernels.GetCommonCastKernels(arrow.FIXED_SIZE_LIST, kernels.OutputTargetType)) - out = append(out, castFsl) - - castStruct := newCastFunction("cast_struct", arrow.STRUCT) - addKernels(castStruct, kernels.GetCommonCastKernels(arrow.STRUCT, kernels.OutputTargetType)) - if err := addStructToStructCast(castStruct); err != nil { - panic(err) - } - out = append(out, castStruct) - - return out -} - -func getBooleanCasts() []*castFunction { - fn := newCastFunction("cast_boolean", arrow.BOOL) - kns := kernels.GetBooleanCastKernels() - - for _, k := range kns { - if err := fn.AddTypeCast(k.Signature.InputTypes[0].Type.ID(), k); err != nil { - panic(err) - } - } - - return []*castFunction{fn} -} - -func getTemporalCasts() []*castFunction { - output := make([]*castFunction, 0) - addFn := func(name string, id arrow.Type, kernels []exec.ScalarKernel) { - fn := newCastFunction(name, id) - for _, k := range kernels { - if err := fn.AddTypeCast(k.Signature.InputTypes[0].MatchID(), k); err != nil { - panic(err) - } - } - fn.AddNewTypeCast(arrow.DICTIONARY, []exec.InputType{exec.NewIDInput(arrow.DICTIONARY)}, - kernels[0].Signature.OutType, unpackDictionary, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) - output = append(output, fn) - } - - addFn("cast_timestamp", arrow.TIMESTAMP, kernels.GetTimestampCastKernels()) - addFn("cast_date32", arrow.DATE32, kernels.GetDate32CastKernels()) - addFn("cast_date64", arrow.DATE64, kernels.GetDate64CastKernels()) - addFn("cast_time32", arrow.TIME32, kernels.GetTime32CastKernels()) - addFn("cast_time64", arrow.TIME64, kernels.GetTime64CastKernels()) - addFn("cast_duration", arrow.DURATION, kernels.GetDurationCastKernels()) - addFn("cast_month_day_nano_interval", arrow.INTERVAL_MONTH_DAY_NANO, kernels.GetIntervalCastKernels()) - return output -} - -func getNumericCasts() []*castFunction { - out := make([]*castFunction, 0) - - getFn := func(name string, ty arrow.Type, kns []exec.ScalarKernel) *castFunction { - fn := newCastFunction(name, ty) - for _, k := range kns { - if err := fn.AddTypeCast(k.Signature.InputTypes[0].MatchID(), k); err != nil { - panic(err) - } - } - - fn.AddNewTypeCast(arrow.DICTIONARY, []exec.InputType{exec.NewIDInput(arrow.DICTIONARY)}, - kns[0].Signature.OutType, unpackDictionary, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) - - return fn - } - - out = append(out, getFn("cast_int8", arrow.INT8, kernels.GetCastToInteger[int8](arrow.PrimitiveTypes.Int8))) - out = append(out, getFn("cast_int16", arrow.INT16, kernels.GetCastToInteger[int8](arrow.PrimitiveTypes.Int16))) - - castInt32 := getFn("cast_int32", arrow.INT32, kernels.GetCastToInteger[int32](arrow.PrimitiveTypes.Int32)) - castInt32.AddTypeCast(arrow.DATE32, - kernels.GetZeroCastKernel(arrow.DATE32, - exec.NewExactInput(arrow.FixedWidthTypes.Date32), - exec.NewOutputType(arrow.PrimitiveTypes.Int32))) - castInt32.AddTypeCast(arrow.TIME32, - kernels.GetZeroCastKernel(arrow.TIME32, - exec.NewIDInput(arrow.TIME32), exec.NewOutputType(arrow.PrimitiveTypes.Int32))) - out = append(out, castInt32) - - castInt64 := getFn("cast_int64", arrow.INT64, kernels.GetCastToInteger[int64](arrow.PrimitiveTypes.Int64)) - castInt64.AddTypeCast(arrow.DATE64, - kernels.GetZeroCastKernel(arrow.DATE64, - exec.NewIDInput(arrow.DATE64), - exec.NewOutputType(arrow.PrimitiveTypes.Int64))) - castInt64.AddTypeCast(arrow.TIME64, - kernels.GetZeroCastKernel(arrow.TIME64, - exec.NewIDInput(arrow.TIME64), - exec.NewOutputType(arrow.PrimitiveTypes.Int64))) - castInt64.AddTypeCast(arrow.DURATION, - kernels.GetZeroCastKernel(arrow.DURATION, - exec.NewIDInput(arrow.DURATION), - exec.NewOutputType(arrow.PrimitiveTypes.Int64))) - castInt64.AddTypeCast(arrow.TIMESTAMP, - kernels.GetZeroCastKernel(arrow.TIMESTAMP, - exec.NewIDInput(arrow.TIMESTAMP), - exec.NewOutputType(arrow.PrimitiveTypes.Int64))) - out = append(out, castInt64) - - out = append(out, getFn("cast_uint8", arrow.UINT8, kernels.GetCastToInteger[uint8](arrow.PrimitiveTypes.Uint8))) - out = append(out, getFn("cast_uint16", arrow.UINT16, kernels.GetCastToInteger[uint16](arrow.PrimitiveTypes.Uint16))) - out = append(out, getFn("cast_uint32", arrow.UINT32, kernels.GetCastToInteger[uint32](arrow.PrimitiveTypes.Uint32))) - out = append(out, getFn("cast_uint64", arrow.UINT64, kernels.GetCastToInteger[uint64](arrow.PrimitiveTypes.Uint64))) - - out = append(out, getFn("cast_half_float", arrow.FLOAT16, kernels.GetCommonCastKernels(arrow.FLOAT16, exec.NewOutputType(arrow.FixedWidthTypes.Float16)))) - out = append(out, getFn("cast_float", arrow.FLOAT32, kernels.GetCastToFloating[float32](arrow.PrimitiveTypes.Float32))) - out = append(out, getFn("cast_double", arrow.FLOAT64, kernels.GetCastToFloating[float64](arrow.PrimitiveTypes.Float64))) - - // cast to decimal128 - out = append(out, getFn("cast_decimal", arrow.DECIMAL128, kernels.GetCastToDecimal128())) - // cast to decimal256 - out = append(out, getFn("cast_decimal256", arrow.DECIMAL256, kernels.GetCastToDecimal256())) - return out -} - -func getBinaryLikeCasts() []*castFunction { - out := make([]*castFunction, 0) - - addFn := func(name string, ty arrow.Type, kns []exec.ScalarKernel) { - fn := newCastFunction(name, ty) - for _, k := range kns { - if err := fn.AddTypeCast(k.Signature.InputTypes[0].MatchID(), k); err != nil { - panic(err) - } - } - - fn.AddNewTypeCast(arrow.DICTIONARY, []exec.InputType{exec.NewIDInput(arrow.DICTIONARY)}, - kns[0].Signature.OutType, unpackDictionary, exec.NullComputedNoPrealloc, exec.MemNoPrealloc) - - out = append(out, fn) - } - - addFn("cast_binary", arrow.BINARY, kernels.GetToBinaryKernels(arrow.BinaryTypes.Binary)) - addFn("cast_large_binary", arrow.LARGE_BINARY, kernels.GetToBinaryKernels(arrow.BinaryTypes.LargeBinary)) - addFn("cast_string", arrow.STRING, kernels.GetToBinaryKernels(arrow.BinaryTypes.String)) - addFn("cast_large_string", arrow.LARGE_STRING, kernels.GetToBinaryKernels(arrow.BinaryTypes.LargeString)) - addFn("cast_fixed_sized_binary", arrow.FIXED_SIZE_BINARY, kernels.GetFsbCastKernels()) - return out -} - -// CastDatum is a convenience function for casting a Datum to another type. -// It is equivalent to calling CallFunction(ctx, "cast", opts, Datum) and -// should work for Scalar, Array or ChunkedArray Datums. -func CastDatum(ctx context.Context, val Datum, opts *CastOptions) (Datum, error) { - return CallFunction(ctx, "cast", opts, val) -} - -// CastArray is a convenience function for casting an Array to another type. -// It is equivalent to constructing a Datum for the array and using -// CallFunction(ctx, "cast", ...). -func CastArray(ctx context.Context, val arrow.Array, opts *CastOptions) (arrow.Array, error) { - d := NewDatum(val) - defer d.Release() - - out, err := CastDatum(ctx, d, opts) - if err != nil { - return nil, err - } - - defer out.Release() - return out.(*ArrayDatum).MakeArray(), nil -} - -// CastToType is a convenience function equivalent to calling -// CastArray(ctx, val, compute.SafeCastOptions(toType)) -func CastToType(ctx context.Context, val arrow.Array, toType arrow.DataType) (arrow.Array, error) { - return CastArray(ctx, val, SafeCastOptions(toType)) -} - -// CanCast returns true if there is an implementation for casting an array -// or scalar value from the specified DataType to the other data type. -func CanCast(from, to arrow.DataType) bool { - fn, err := getCastFunction(to) - if err != nil { - return false - } - - for _, id := range fn.inIDs { - if from.ID() == id { - return true - } - } - return false -} diff --git a/go/arrow/compute/cast_test.go b/go/arrow/compute/cast_test.go deleted file mode 100644 index db6098225dda8..0000000000000 --- a/go/arrow/compute/cast_test.go +++ /dev/null @@ -1,2867 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute_test - -import ( - "context" - "fmt" - "math" - "strconv" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/decimal256" - "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/apache/arrow/go/v18/internal/types" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" -) - -func getScalars(inputs []compute.Datum, idx int) []scalar.Scalar { - out := make([]scalar.Scalar, len(inputs)) - for i, in := range inputs { - if in.Kind() == compute.KindArray { - arr := in.(*compute.ArrayDatum).MakeArray() - defer arr.Release() - out[i], _ = scalar.GetScalar(arr, idx) - } else { - out[i] = in.(*compute.ScalarDatum).Value - } - } - return out -} - -func getDatums[T any](inputs []T) []compute.Datum { - out := make([]compute.Datum, len(inputs)) - for i, in := range inputs { - out[i] = compute.NewDatum(in) - } - return out -} - -func assertArraysEqual(t *testing.T, expected, actual arrow.Array, opts ...array.EqualOption) bool { - return assert.Truef(t, array.ApproxEqual(expected, actual, opts...), "expected: %s\ngot: %s", expected, actual) -} - -func assertDatumsEqual(t *testing.T, expected, actual compute.Datum, opts []array.EqualOption, scalarOpts []scalar.EqualOption) { - require.Equal(t, expected.Kind(), actual.Kind()) - - switch expected.Kind() { - case compute.KindScalar: - want := expected.(*compute.ScalarDatum).Value - got := actual.(*compute.ScalarDatum).Value - assert.Truef(t, scalar.ApproxEquals(want, got, scalarOpts...), "expected: %s\ngot: %s", want, got) - case compute.KindArray: - want := expected.(*compute.ArrayDatum).MakeArray() - got := actual.(*compute.ArrayDatum).MakeArray() - assertArraysEqual(t, want, got, opts...) - want.Release() - got.Release() - case compute.KindChunked: - want := expected.(*compute.ChunkedDatum).Value - got := actual.(*compute.ChunkedDatum).Value - assert.Truef(t, array.ChunkedEqual(want, got), "expected: %s\ngot: %s", want, got) - default: - assert.Truef(t, actual.Equals(expected), "expected: %s\ngot: %s", expected, actual) - } -} - -func checkScalarNonRecursive(t *testing.T, funcName string, inputs []compute.Datum, expected compute.Datum, opts compute.FunctionOptions) { - out, err := compute.CallFunction(context.Background(), funcName, opts, inputs...) - assert.NoError(t, err) - defer out.Release() - assertDatumsEqual(t, expected, out, nil, nil) -} - -func checkScalarWithScalars(t *testing.T, funcName string, inputs []scalar.Scalar, expected scalar.Scalar, opts compute.FunctionOptions) { - datums := getDatums(inputs) - defer func() { - for _, s := range inputs { - if r, ok := s.(scalar.Releasable); ok { - r.Release() - } - } - for _, d := range datums { - d.Release() - } - }() - out, err := compute.CallFunction(context.Background(), funcName, opts, datums...) - assert.NoError(t, err) - defer out.Release() - if !scalar.Equals(out.(*compute.ScalarDatum).Value, expected) { - var b strings.Builder - b.WriteString(funcName + "(") - for i, in := range inputs { - if i != 0 { - b.WriteByte(',') - } - b.WriteString(in.String()) - } - b.WriteByte(')') - b.WriteString(" = " + out.(*compute.ScalarDatum).Value.String()) - b.WriteString(" != " + expected.String()) - - if !arrow.TypeEqual(out.(*compute.ScalarDatum).Type(), expected.DataType()) { - fmt.Fprintf(&b, " (types differed: %s vs %s)", - out.(*compute.ScalarDatum).Type(), expected.DataType()) - } - t.Fatal(b.String()) - } -} - -func checkScalar(t *testing.T, funcName string, inputs []compute.Datum, expected compute.Datum, opts compute.FunctionOptions) { - checkScalarNonRecursive(t, funcName, inputs, expected, opts) - - if expected.Kind() == compute.KindScalar { - return - } - - exp := expected.(*compute.ArrayDatum).MakeArray() - defer exp.Release() - - // check for at least 1 array, and make sure the others are of equal len - hasArray := false - for _, in := range inputs { - if in.Kind() == compute.KindArray { - assert.EqualValues(t, exp.Len(), in.(*compute.ArrayDatum).Len()) - hasArray = true - } - } - - require.True(t, hasArray) - - // check all the input scalars - for i := 0; i < exp.Len(); i++ { - e, _ := scalar.GetScalar(exp, i) - checkScalarWithScalars(t, funcName, getScalars(inputs, i), e, opts) - if r, ok := e.(scalar.Releasable); ok { - r.Release() - } - } -} - -func assertBufferSame(t *testing.T, left, right arrow.Array, idx int) { - assert.Same(t, left.Data().Buffers()[idx], right.Data().Buffers()[idx]) -} - -func checkScalarUnary(t *testing.T, funcName string, input compute.Datum, exp compute.Datum, opt compute.FunctionOptions) { - checkScalar(t, funcName, []compute.Datum{input}, exp, opt) -} - -func checkCast(t *testing.T, input arrow.Array, exp arrow.Array, opts compute.CastOptions) { - opts.ToType = exp.DataType() - in, out := compute.NewDatum(input), compute.NewDatum(exp) - defer in.Release() - defer out.Release() - checkScalarUnary(t, "cast", in, out, &opts) -} - -func checkCastFails(t *testing.T, input arrow.Array, opt compute.CastOptions) { - _, err := compute.CastArray(context.Background(), input, &opt) - assert.ErrorIs(t, err, arrow.ErrInvalid) - - // for scalars, check that at least one of the input fails - // since many of the tests contain a mix of passing and failing values. - // in some cases we will want to check more precisely - nfail := 0 - for i := 0; i < input.Len(); i++ { - sc, _ := scalar.GetScalar(input, i) - if r, ok := sc.(scalar.Releasable); ok { - defer r.Release() - } - d := compute.NewDatum(sc) - defer d.Release() - out, err := compute.CastDatum(context.Background(), d, &opt) - if err != nil { - nfail++ - } else { - out.Release() - } - } - assert.Greater(t, nfail, 0) -} - -func checkCastZeroCopy(t *testing.T, input arrow.Array, toType arrow.DataType, opts *compute.CastOptions) { - opts.ToType = toType - out, err := compute.CastArray(context.Background(), input, opts) - assert.NoError(t, err) - defer out.Release() - - assert.Len(t, out.Data().Buffers(), len(input.Data().Buffers())) - for i := range out.Data().Buffers() { - assertBufferSame(t, out, input, i) - } -} - -var ( - signedIntTypes = []arrow.DataType{ - arrow.PrimitiveTypes.Int8, - arrow.PrimitiveTypes.Int16, - arrow.PrimitiveTypes.Int32, - arrow.PrimitiveTypes.Int64, - } - unsignedIntTypes = []arrow.DataType{ - arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Uint64, - } - integerTypes = append(signedIntTypes, unsignedIntTypes...) - floatingTypes = []arrow.DataType{ - arrow.PrimitiveTypes.Float32, - arrow.PrimitiveTypes.Float64, - } - numericTypes = append(integerTypes, floatingTypes...) - baseBinaryTypes = []arrow.DataType{ - arrow.BinaryTypes.Binary, - arrow.BinaryTypes.LargeBinary, - arrow.BinaryTypes.String, - arrow.BinaryTypes.LargeString, - } - dictIndexTypes = integerTypes -) - -type CastSuite struct { - suite.Suite - - mem *memory.CheckedAllocator -} - -func (c *CastSuite) allocateEmptyBitmap(len int) *memory.Buffer { - buf := memory.NewResizableBuffer(c.mem) - buf.Resize(int(bitutil.BytesForBits(int64(len)))) - return buf -} - -func (c *CastSuite) maskArrayWithNullsAt(input arrow.Array, toMask []int) arrow.Array { - masked := input.Data().(*array.Data).Copy() - defer masked.Release() - if masked.Buffers()[0] != nil { - masked.Buffers()[0].Release() - } - masked.Buffers()[0] = c.allocateEmptyBitmap(input.Len()) - masked.SetNullN(array.UnknownNullCount) - - if original := input.NullBitmapBytes(); len(original) > 0 { - bitutil.CopyBitmap(original, input.Data().Offset(), input.Len(), masked.Buffers()[0].Bytes(), 0) - } else { - bitutil.SetBitsTo(masked.Buffers()[0].Bytes(), 0, int64(input.Len()), true) - } - - for _, i := range toMask { - bitutil.SetBitTo(masked.Buffers()[0].Bytes(), i, false) - } - - return array.MakeFromData(masked) -} - -func (c *CastSuite) invalidUtf8Arr(dt arrow.DataType) arrow.Array { - bldr := array.NewBinaryBuilder(c.mem, dt.(arrow.BinaryDataType)) - defer bldr.Release() - - bldr.AppendValues([][]byte{ - []byte("Hi"), - []byte("olá mundo"), - []byte("你好世界"), - []byte(""), - []byte("\xa0\xa1"), // invalid utf8! - }, nil) - - return bldr.NewArray() -} - -type binaryBuilderAppend interface { - array.Builder - AppendValues([][]byte, []bool) -} - -func (c *CastSuite) fixedSizeInvalidUtf8(dt arrow.DataType) arrow.Array { - var bldr binaryBuilderAppend - if dt.ID() == arrow.FIXED_SIZE_BINARY { - c.Require().Equal(3, dt.(*arrow.FixedSizeBinaryType).ByteWidth) - bldr = array.NewFixedSizeBinaryBuilder(c.mem, dt.(*arrow.FixedSizeBinaryType)) - } else { - bldr = array.NewBinaryBuilder(c.mem, dt.(arrow.BinaryDataType)) - } - - defer bldr.Release() - - bldr.AppendValues([][]byte{ - []byte("Hi!"), - []byte("lá"), - []byte("你"), - []byte(" "), - []byte("\xa0\xa1\xa2"), // invalid utf8! - }, nil) - - return bldr.NewArray() -} - -func (c *CastSuite) SetupTest() { - c.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) -} - -func (c *CastSuite) TearDownTest() { - c.mem.AssertSize(c.T(), 0) -} - -func (c *CastSuite) TestCanCast() { - expectCanCast := func(from arrow.DataType, toSet []arrow.DataType, expected bool) { - for _, to := range toSet { - c.Equalf(expected, compute.CanCast(from, to), "CanCast from: %s, to: %s, expected: %t", - from, to, expected) - } - } - - canCast := func(from arrow.DataType, toSet []arrow.DataType) { - expectCanCast(from, toSet, true) - } - - cannotCast := func(from arrow.DataType, toSet []arrow.DataType) { - expectCanCast(from, toSet, false) - } - - canCast(arrow.Null, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) - canCast(arrow.Null, numericTypes) - canCast(arrow.Null, baseBinaryTypes) - canCast(arrow.Null, []arrow.DataType{ - arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Timestamp_s, - }) - cannotCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint16, ValueType: arrow.Null}, []arrow.DataType{arrow.Null}) - - canCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) - canCast(arrow.FixedWidthTypes.Boolean, numericTypes) - canCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - cannotCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: arrow.FixedWidthTypes.Boolean}, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) - - cannotCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{arrow.Null}) - cannotCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary}) - cannotCast(arrow.FixedWidthTypes.Boolean, []arrow.DataType{ - arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Timestamp_s}) - - for _, from := range numericTypes { - canCast(from, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) - canCast(from, numericTypes) - canCast(from, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - canCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int32, ValueType: from}, []arrow.DataType{from}) - - cannotCast(from, []arrow.DataType{arrow.Null}) - } - - for _, from := range baseBinaryTypes { - canCast(from, []arrow.DataType{arrow.FixedWidthTypes.Boolean}) - canCast(from, numericTypes) - canCast(from, baseBinaryTypes) - canCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int64, ValueType: from}, []arrow.DataType{from}) - - // any cast which is valid for the dictionary is valid for the dictionary array - canCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint32, ValueType: from}, baseBinaryTypes) - canCast(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int16, ValueType: from}, baseBinaryTypes) - - cannotCast(from, []arrow.DataType{arrow.Null}) - } - - canCast(arrow.BinaryTypes.String, []arrow.DataType{arrow.FixedWidthTypes.Timestamp_ms}) - canCast(arrow.BinaryTypes.LargeString, []arrow.DataType{arrow.FixedWidthTypes.Timestamp_ns}) - // no formatting supported - cannotCast(arrow.FixedWidthTypes.Timestamp_us, []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary}) - - canCast(&arrow.FixedSizeBinaryType{ByteWidth: 3}, []arrow.DataType{ - arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary, arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString, - &arrow.FixedSizeBinaryType{ByteWidth: 3}}) - - arrow.RegisterExtensionType(types.NewSmallintType()) - defer arrow.UnregisterExtensionType("smallint") - canCast(types.NewSmallintType(), []arrow.DataType{arrow.PrimitiveTypes.Int16}) - canCast(types.NewSmallintType(), numericTypes) // any cast which is valid for storage is supported - canCast(arrow.Null, []arrow.DataType{types.NewSmallintType()}) - - canCast(arrow.FixedWidthTypes.Date32, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - canCast(arrow.FixedWidthTypes.Date64, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - canCast(arrow.FixedWidthTypes.Timestamp_ns, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - canCast(arrow.FixedWidthTypes.Timestamp_us, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - canCast(arrow.FixedWidthTypes.Time32ms, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) - canCast(arrow.FixedWidthTypes.Time64ns, []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString}) -} - -func (c *CastSuite) checkCastFails(dt arrow.DataType, input string, opts *compute.CastOptions) { - inArr, _, _ := array.FromJSON(c.mem, dt, strings.NewReader(input), array.WithUseNumber()) - defer inArr.Release() - - checkCastFails(c.T(), inArr, *opts) -} - -func (c *CastSuite) checkCastOpts(dtIn, dtOut arrow.DataType, inJSON, outJSON string, opts compute.CastOptions) { - inArr, _, _ := array.FromJSON(c.mem, dtIn, strings.NewReader(inJSON), array.WithUseNumber()) - outArr, _, _ := array.FromJSON(c.mem, dtOut, strings.NewReader(outJSON), array.WithUseNumber()) - defer inArr.Release() - defer outArr.Release() - - checkCast(c.T(), inArr, outArr, opts) -} - -func (c *CastSuite) checkCast(dtIn, dtOut arrow.DataType, inJSON, outJSON string) { - c.checkCastOpts(dtIn, dtOut, inJSON, outJSON, *compute.DefaultCastOptions(true)) -} - -func (c *CastSuite) checkCastArr(in arrow.Array, dtOut arrow.DataType, json string, opts compute.CastOptions) { - outArr, _, _ := array.FromJSON(c.mem, dtOut, strings.NewReader(json), array.WithUseNumber()) - defer outArr.Release() - checkCast(c.T(), in, outArr, opts) -} - -func (c *CastSuite) checkCastExp(dtIn arrow.DataType, inJSON string, exp arrow.Array) { - inArr, _, _ := array.FromJSON(c.mem, dtIn, strings.NewReader(inJSON), array.WithUseNumber()) - defer inArr.Release() - checkCast(c.T(), inArr, exp, *compute.DefaultCastOptions(true)) -} - -func (c *CastSuite) TestNumericToBool() { - for _, dt := range numericTypes { - c.checkCast(dt, arrow.FixedWidthTypes.Boolean, - `[0, null, 127, 1, 0]`, `[false, null, true, true, false]`) - } - - // check negative numbers - for _, dt := range []arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Float64} { - c.checkCast(dt, arrow.FixedWidthTypes.Boolean, - `[0, null, 127, -1, 0]`, `[false, null, true, true, false]`) - } -} - -func (c *CastSuite) StringToBool() { - for _, dt := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.checkCast(dt, arrow.FixedWidthTypes.Boolean, - `["False", null, "true", "True", "false"]`, `[false, null, true, true, false]`) - - c.checkCast(dt, arrow.FixedWidthTypes.Boolean, - `["0", null, "1", "1", "0"]`, `[false, null, true, true, false]`) - - opts := compute.NewCastOptions(arrow.FixedWidthTypes.Boolean, true) - c.checkCastFails(dt, `["false "]`, opts) - c.checkCastFails(dt, `["T"]`, opts) - } -} - -func (c *CastSuite) TestToIntUpcast() { - c.checkCast(arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int32, - `[0, null, 127, -1, 0]`, `[0, null, 127, -1, 0]`) - - c.checkCast(arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int16, - `[0, 100, 200, 255, 0]`, `[0, 100, 200, 255, 0]`) -} - -func (c *CastSuite) TestToIntDowncastSafe() { - // int16 to uint8 no overflow/underflow - c.checkCast(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint8, - `[0, null, 200, 1, 2]`, `[0, null, 200, 1, 2]`) - - // int16 to uint8, overflow - c.checkCastFails(arrow.PrimitiveTypes.Int16, `[0, null, 256, 0, 0]`, - compute.NewCastOptions(arrow.PrimitiveTypes.Uint8, true)) - // and underflow - c.checkCastFails(arrow.PrimitiveTypes.Int16, `[0, null, -1, 0, 0]`, - compute.NewCastOptions(arrow.PrimitiveTypes.Uint8, true)) - - // int32 to int16, no overflow/underflow - c.checkCast(arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int16, - `[0, null, 2000, 1, 2]`, `[0, null, 2000, 1, 2]`) - - // int32 to int16, overflow - c.checkCastFails(arrow.PrimitiveTypes.Int32, `[0, null, 2000, 70000, 2]`, - compute.NewCastOptions(arrow.PrimitiveTypes.Int16, true)) - - // and underflow - c.checkCastFails(arrow.PrimitiveTypes.Int32, `[0, null, 2000, -70000, 2]`, - compute.NewCastOptions(arrow.PrimitiveTypes.Int16, true)) - - c.checkCastFails(arrow.PrimitiveTypes.Int32, `[0, null, 2000, -70000, 2]`, - compute.NewCastOptions(arrow.PrimitiveTypes.Uint8, true)) - -} - -func (c *CastSuite) TestIntegerSignedToUnsigned() { - i32s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[-2147483648, null, -1, 65535, 2147483647]`)) - defer i32s.Release() - - // same width - checkCastFails(c.T(), i32s, *compute.NewCastOptions(arrow.PrimitiveTypes.Uint32, true)) - // wider - checkCastFails(c.T(), i32s, *compute.NewCastOptions(arrow.PrimitiveTypes.Uint64, true)) - // narrower - checkCastFails(c.T(), i32s, *compute.NewCastOptions(arrow.PrimitiveTypes.Uint16, true)) - - var options compute.CastOptions - options.AllowIntOverflow = true - - u32s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Uint32, - strings.NewReader(`[2147483648, null, 4294967295, 65535, 2147483647]`)) - defer u32s.Release() - checkCast(c.T(), i32s, u32s, options) - - u64s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Uint64, - strings.NewReader(`[18446744071562067968, null, 18446744073709551615, 65535, 2147483647]`), - array.WithUseNumber()) // have to use WithUseNumber so it doesn't lose precision converting to float64 - defer u64s.Release() - checkCast(c.T(), i32s, u64s, options) - - // fail because of overflow, instead of underflow - i32s, _, _ = array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[0, null, 0, 65536, 2147483647]`)) - defer i32s.Release() - checkCastFails(c.T(), i32s, *compute.NewCastOptions(arrow.PrimitiveTypes.Uint16, true)) - - u16s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Uint16, strings.NewReader(`[0, null, 0, 0, 65535]`)) - defer u16s.Release() - checkCast(c.T(), i32s, u16s, options) -} - -func (c *CastSuite) TestIntegerUnsignedToSigned() { - u32s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Uint32, strings.NewReader(`[4294967295, null, 0, 32768]`)) - defer u32s.Release() - // same width - checkCastFails(c.T(), u32s, *compute.SafeCastOptions(arrow.PrimitiveTypes.Int32)) - - // narrower - checkCastFails(c.T(), u32s, *compute.SafeCastOptions(arrow.PrimitiveTypes.Int16)) - sl := array.NewSlice(u32s, 1, int64(u32s.Len())) - defer sl.Release() - checkCastFails(c.T(), sl, *compute.SafeCastOptions(arrow.PrimitiveTypes.Int16)) - - var opts compute.CastOptions - opts.AllowIntOverflow = true - c.checkCastArr(u32s, arrow.PrimitiveTypes.Int32, `[-1, null, 0, 32768]`, opts) - c.checkCastArr(u32s, arrow.PrimitiveTypes.Int64, `[4294967295, null, 0, 32768]`, opts) - c.checkCastArr(u32s, arrow.PrimitiveTypes.Int16, `[-1, null, 0, -32768]`, opts) -} - -func (c *CastSuite) TestToIntDowncastUnsafe() { - opts := compute.CastOptions{AllowIntOverflow: true} - c.checkCastOpts(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint8, - `[0, null, 200, 1, 2]`, `[0, null, 200, 1, 2]`, opts) - - c.checkCastOpts(arrow.PrimitiveTypes.Int16, arrow.PrimitiveTypes.Uint8, - `[0, null, 256, 1, 2, -1]`, `[0, null, 0, 1, 2, 255]`, opts) - - c.checkCastOpts(arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int16, - `[0, null, 2000, 1, 2, -1]`, `[0, null, 2000, 1, 2, -1]`, opts) - - c.checkCastOpts(arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int16, - `[0, null, 2000, 70000, -70000]`, `[0, null, 2000, 4464, -4464]`, opts) -} - -func (c *CastSuite) TestFloatingToInt() { - for _, from := range []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64} { - for _, to := range []arrow.DataType{arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64} { - // float to int no truncation - c.checkCast(from, to, `[1.0, null, 0.0, -1.0, 5.0]`, `[1, null, 0, -1, 5]`) - - // float to int truncate error - opts := compute.SafeCastOptions(to) - c.checkCastFails(from, `[1.5, 0.0, null, 0.5, -1.5, 5.5]`, opts) - - // float to int truncate allowed - opts.AllowFloatTruncate = true - c.checkCastOpts(from, to, `[1.5, 0.0, null, 0.5, -1.5, 5.5]`, `[1, 0, null, 0, -1, 5]`, *opts) - } - } -} - -func (c *CastSuite) TestIntToFloating() { - for _, from := range []arrow.DataType{arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int32} { - two24 := `[16777216, 16777217]` - c.checkCastFails(from, two24, compute.SafeCastOptions(arrow.PrimitiveTypes.Float32)) - one24 := `[16777216]` - c.checkCast(from, arrow.PrimitiveTypes.Float32, one24, one24) - } - - i64s, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int64, - strings.NewReader(`[-9223372036854775808, -9223372036854775807, 0, 9223372036854775806, 9223372036854775807]`), - array.WithUseNumber()) - defer i64s.Release() - - checkCastFails(c.T(), i64s, *compute.SafeCastOptions(arrow.PrimitiveTypes.Float64)) - masked := c.maskArrayWithNullsAt(i64s, []int{0, 1, 3, 4}) - defer masked.Release() - c.checkCastArr(masked, arrow.PrimitiveTypes.Float64, `[null, null, 0, null, null]`, *compute.DefaultCastOptions(true)) - - c.checkCastFails(arrow.PrimitiveTypes.Uint64, `[9007199254740992, 9007199254740993]`, compute.SafeCastOptions(arrow.PrimitiveTypes.Float64)) -} - -func (c *CastSuite) TestDecimal128ToInt() { - opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Int64) - - c.Run("no overflow no truncate", func() { - for _, allowIntOverflow := range []bool{false, true} { - c.Run(fmt.Sprintf("int_overflow=%t", allowIntOverflow), func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run(fmt.Sprintf("dec_truncate=%t", allowDecTruncate), func() { - opts.AllowIntOverflow = allowIntOverflow - opts.AllowDecimalTruncate = allowDecTruncate - - noOverflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["02.0000000000", "-11.0000000000", "22.0000000000", "-121.000000000", null]`)) - - c.checkCastArr(noOverflowNoTrunc, arrow.PrimitiveTypes.Int64, `[2, -11, 22, -121, null]`, *opts) - noOverflowNoTrunc.Release() - }) - } - }) - } - }) - - c.Run("truncate no overflow", func() { - for _, allowIntOverflow := range []bool{false, true} { - c.Run("allow overflow"+strconv.FormatBool(allowIntOverflow), func() { - opts.AllowIntOverflow = allowIntOverflow - truncNoOverflow, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["02.1000000000", "-11.0000004500", "22.0000004500", "-121.1210000000", null]`)) - - opts.AllowDecimalTruncate = true - c.checkCastArr(truncNoOverflow, arrow.PrimitiveTypes.Int64, `[2, -11, 22, -121, null]`, *opts) - - opts.AllowDecimalTruncate = false - checkCastFails(c.T(), truncNoOverflow, *opts) - truncNoOverflow.Release() - }) - } - }) - - c.Run("overflow no truncate", func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("allow truncate "+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - overflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`[ - "12345678901234567890000.0000000000", - "99999999999999999999999.0000000000", - null]`), array.WithUseNumber()) - defer overflowNoTrunc.Release() - opts.AllowIntOverflow = true - c.checkCastArr(overflowNoTrunc, arrow.PrimitiveTypes.Int64, - // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64 - `[4807115922877858896, 200376420520689663, null]`, *opts) - - opts.AllowIntOverflow = false - checkCastFails(c.T(), overflowNoTrunc, *opts) - }) - } - }) - - c.Run("overflow and truncate", func() { - for _, allowIntOverFlow := range []bool{false, true} { - c.Run("allow overflow = "+strconv.FormatBool(allowIntOverFlow), func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("allow truncate = "+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowIntOverflow = allowIntOverFlow - opts.AllowDecimalTruncate = allowDecTruncate - - overflowAndTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`[ - "12345678901234567890000.0045345000", - "99999999999999999999999.0000344300", - null]`), array.WithUseNumber()) - defer overflowAndTruncate.Release() - if opts.AllowIntOverflow && opts.AllowDecimalTruncate { - c.checkCastArr(overflowAndTruncate, arrow.PrimitiveTypes.Int64, - // 12345678901234567890000 % 2**64, 99999999999999999999999 % 2**64 - `[4807115922877858896, 200376420520689663, null]`, *opts) - } else { - checkCastFails(c.T(), overflowAndTruncate, *opts) - } - }) - } - }) - } - }) - - c.Run("negative scale", func() { - bldr := array.NewDecimal128Builder(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: -4}) - defer bldr.Release() - - var err error - for _, d := range []decimal128.Num{decimal128.FromU64(1234567890000), decimal128.FromI64(-120000)} { - d, err = d.Rescale(0, -4) - c.Require().NoError(err) - bldr.Append(d) - } - negScale := bldr.NewArray() - defer negScale.Release() - - opts.AllowIntOverflow = true - opts.AllowDecimalTruncate = true - c.checkCastArr(negScale, arrow.PrimitiveTypes.Int64, `[1234567890000, -120000]`, *opts) - }) -} - -func (c *CastSuite) TestDecimal256ToInt() { - opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Int64) - - c.Run("no overflow no truncate", func() { - for _, allowIntOverflow := range []bool{false, true} { - c.Run(fmt.Sprintf("int_overflow=%t", allowIntOverflow), func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run(fmt.Sprintf("dec_truncate=%t", allowDecTruncate), func() { - opts.AllowIntOverflow = allowIntOverflow - opts.AllowDecimalTruncate = allowDecTruncate - - noOverflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 10}, - strings.NewReader(`["02.0000000000", "-11.0000000000", "22.0000000000", "-121.000000000", null]`)) - - c.checkCastArr(noOverflowNoTrunc, arrow.PrimitiveTypes.Int64, `[2, -11, 22, -121, null]`, *opts) - noOverflowNoTrunc.Release() - }) - } - }) - } - }) - - c.Run("truncate no overflow", func() { - for _, allowIntOverflow := range []bool{false, true} { - c.Run("allow overflow"+strconv.FormatBool(allowIntOverflow), func() { - opts.AllowIntOverflow = allowIntOverflow - truncNoOverflow, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 10}, - strings.NewReader(`["02.1000000000", "-11.0000004500", "22.0000004500", "-121.1210000000", null]`)) - - opts.AllowDecimalTruncate = true - c.checkCastArr(truncNoOverflow, arrow.PrimitiveTypes.Int64, `[2, -11, 22, -121, null]`, *opts) - - opts.AllowDecimalTruncate = false - checkCastFails(c.T(), truncNoOverflow, *opts) - truncNoOverflow.Release() - }) - } - }) - - c.Run("overflow no truncate", func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("allow truncate "+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - overflowNoTrunc, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 10}, - strings.NewReader(`[ - "1234567890123456789000000.0000000000", - "9999999999999999999999999.0000000000", - null]`), array.WithUseNumber()) - defer overflowNoTrunc.Release() - opts.AllowIntOverflow = true - c.checkCastArr(overflowNoTrunc, arrow.PrimitiveTypes.Int64, - // 1234567890123456789000000 % 2**64, 9999999999999999999999999 % 2**64 - `[1096246371337547584, 1590897978359414783, null]`, *opts) - - opts.AllowIntOverflow = false - checkCastFails(c.T(), overflowNoTrunc, *opts) - }) - } - }) - - c.Run("overflow and truncate", func() { - for _, allowIntOverFlow := range []bool{false, true} { - c.Run("allow overflow = "+strconv.FormatBool(allowIntOverFlow), func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("allow truncate = "+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowIntOverflow = allowIntOverFlow - opts.AllowDecimalTruncate = allowDecTruncate - - overflowAndTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 10}, - strings.NewReader(`[ - "1234567890123456789000000.0045345000", - "9999999999999999999999999.0000344300", - null]`), array.WithUseNumber()) - defer overflowAndTruncate.Release() - if opts.AllowIntOverflow && opts.AllowDecimalTruncate { - c.checkCastArr(overflowAndTruncate, arrow.PrimitiveTypes.Int64, - // 1234567890123456789000000 % 2**64, 9999999999999999999999999 % 2**64 - `[1096246371337547584, 1590897978359414783, null]`, *opts) - } else { - checkCastFails(c.T(), overflowAndTruncate, *opts) - } - }) - } - }) - } - }) - - c.Run("negative scale", func() { - bldr := array.NewDecimal256Builder(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: -4}) - defer bldr.Release() - - var err error - for _, d := range []decimal256.Num{decimal256.FromU64(1234567890000), decimal256.FromI64(-120000)} { - d, err = d.Rescale(0, -4) - c.Require().NoError(err) - bldr.Append(d) - } - negScale := bldr.NewArray() - defer negScale.Release() - - opts.AllowIntOverflow = true - opts.AllowDecimalTruncate = true - c.checkCastArr(negScale, arrow.PrimitiveTypes.Int64, `[1234567890000, -120000]`, *opts) - }) -} - -func (c *CastSuite) TestIntegerToDecimal() { - for _, decType := range []arrow.DataType{&arrow.Decimal128Type{Precision: 22, Scale: 2}, &arrow.Decimal256Type{Precision: 22, Scale: 2}} { - c.Run(decType.String(), func() { - for _, intType := range integerTypes { - c.Run(intType.String(), func() { - c.checkCast(intType, decType, `[0, 7, null, 100, 99]`, `["0.00", "7.00", null, "100.00", "99.00"]`) - }) - } - }) - } - - c.Run("extreme value", func() { - for _, dt := range []arrow.DataType{&arrow.Decimal128Type{Precision: 19, Scale: 0}, &arrow.Decimal256Type{Precision: 19, Scale: 0}} { - c.Run(dt.String(), func() { - c.checkCast(arrow.PrimitiveTypes.Int64, dt, - `[-9223372036854775808, 9223372036854775807]`, `["-9223372036854775808", "9223372036854775807"]`) - }) - } - for _, dt := range []arrow.DataType{&arrow.Decimal128Type{Precision: 20, Scale: 0}, &arrow.Decimal256Type{Precision: 20, Scale: 0}} { - c.Run(dt.String(), func() { - c.checkCast(arrow.PrimitiveTypes.Uint64, dt, - `[0, 18446744073709551615]`, `["0", "18446744073709551615"]`) - }) - } - }) - - c.Run("insufficient output precision", func() { - var opts compute.CastOptions - opts.ToType = &arrow.Decimal128Type{Precision: 5, Scale: 3} - c.checkCastFails(arrow.PrimitiveTypes.Int8, `[0]`, &opts) - - opts.ToType = &arrow.Decimal256Type{Precision: 76, Scale: 67} - c.checkCastFails(arrow.PrimitiveTypes.Int32, `[0]`, &opts) - }) -} - -func (c *CastSuite) TestDecimal128ToDecimal128() { - var opts compute.CastOptions - - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - noTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["02.0000000000", "30.0000000000", "22.0000000000", "-121.0000000000", null]`)) - expected, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 10}, - strings.NewReader(`["02.", "30.", "22.", "-121.", null]`)) - - defer noTruncate.Release() - defer expected.Release() - - checkCast(c.T(), noTruncate, expected, opts) - checkCast(c.T(), expected, noTruncate, opts) - }) - } - - c.Run("same scale diff precision", func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - d52, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 5, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - - defer d52.Release() - defer d42.Release() - - checkCast(c.T(), d52, d42, opts) - checkCast(c.T(), d42, d52, opts) - }) - } - }) - - c.Run("rescale leads to trunc", func() { - dP38S10, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.1234567890", "30.1234567890", null]`)) - dP28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 0}, - strings.NewReader(`["-02.", "30.", null]`)) - dP38S10RoundTripped, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.0000000000", "30.0000000000", null]`)) - defer func() { - dP38S10.Release() - dP28S0.Release() - dP38S10RoundTripped.Release() - }() - - opts.AllowDecimalTruncate = true - checkCast(c.T(), dP38S10, dP28S0, opts) - checkCast(c.T(), dP28S0, dP38S10RoundTripped, opts) - - opts.AllowDecimalTruncate = false - opts.ToType = dP28S0.DataType() - checkCastFails(c.T(), dP38S10, opts) - checkCast(c.T(), dP28S0, dP38S10RoundTripped, opts) - }) - - c.Run("precision loss without rescale = trunc", func() { - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34"]`)) - defer d42.Release() - for _, dt := range []arrow.DataType{ - &arrow.Decimal128Type{Precision: 3, Scale: 2}, - &arrow.Decimal128Type{Precision: 4, Scale: 3}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}} { - - opts.AllowDecimalTruncate = true - opts.ToType = dt - out, err := compute.CastArray(context.Background(), d42, &opts) - out.Release() - c.NoError(err) - - opts.AllowDecimalTruncate = false - opts.ToType = dt - checkCastFails(c.T(), d42, opts) - } - }) -} - -func (c *CastSuite) TestDecimal256ToDecimal256() { - var opts compute.CastOptions - - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - noTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, - strings.NewReader(`["02.0000000000", "30.0000000000", "22.0000000000", "-121.0000000000", null]`)) - expected, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 28, Scale: 10}, - strings.NewReader(`["02.", "30.", "22.", "-121.", null]`)) - - defer noTruncate.Release() - defer expected.Release() - - checkCast(c.T(), noTruncate, expected, opts) - checkCast(c.T(), expected, noTruncate, opts) - }) - } - - c.Run("same scale diff precision", func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - d52, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 5, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - - defer d52.Release() - defer d42.Release() - - checkCast(c.T(), d52, d42, opts) - checkCast(c.T(), d42, d52, opts) - }) - } - }) - - c.Run("rescale leads to trunc", func() { - dP38S10, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.1234567890", "30.1234567890", null]`)) - dP28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 28, Scale: 0}, - strings.NewReader(`["-02.", "30.", null]`)) - dP38S10RoundTripped, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.0000000000", "30.0000000000", null]`)) - defer func() { - dP38S10.Release() - dP28S0.Release() - dP38S10RoundTripped.Release() - }() - - opts.AllowDecimalTruncate = true - checkCast(c.T(), dP38S10, dP28S0, opts) - checkCast(c.T(), dP28S0, dP38S10RoundTripped, opts) - - opts.AllowDecimalTruncate = false - opts.ToType = dP28S0.DataType() - checkCastFails(c.T(), dP38S10, opts) - checkCast(c.T(), dP28S0, dP38S10RoundTripped, opts) - }) - - c.Run("precision loss without rescale = trunc", func() { - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34"]`)) - defer d42.Release() - for _, dt := range []arrow.DataType{ - &arrow.Decimal256Type{Precision: 3, Scale: 2}, - &arrow.Decimal256Type{Precision: 4, Scale: 3}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}} { - - opts.AllowDecimalTruncate = true - opts.ToType = dt - out, err := compute.CastArray(context.Background(), d42, &opts) - out.Release() - c.NoError(err) - - opts.AllowDecimalTruncate = false - opts.ToType = dt - checkCastFails(c.T(), d42, opts) - } - }) -} - -func (c *CastSuite) TestDecimal128ToDecimal256() { - var opts compute.CastOptions - - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - noTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["02.0000000000", "30.0000000000", "22.0000000000", "-121.0000000000", null]`)) - expected, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 28, Scale: 10}, - strings.NewReader(`["02.", "30.", "22.", "-121.", null]`)) - - defer noTruncate.Release() - defer expected.Release() - - checkCast(c.T(), noTruncate, expected, opts) - }) - } - - c.Run("same scale diff precision", func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - d52, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 5, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - d402, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 40, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - - defer d52.Release() - defer d42.Release() - defer d402.Release() - - checkCast(c.T(), d52, d42, opts) - checkCast(c.T(), d52, d402, opts) - }) - } - }) - - c.Run("rescale leads to trunc", func() { - d128P38S10, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.1234567890", "30.1234567890", null]`)) - d128P28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 0}, - strings.NewReader(`["-02.", "30.", null]`)) - d256P28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 28, Scale: 0}, - strings.NewReader(`["-02.", "30.", null]`)) - d256P38S10RoundTripped, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.0000000000", "30.0000000000", null]`)) - defer func() { - d128P38S10.Release() - d128P28S0.Release() - d256P28S0.Release() - d256P38S10RoundTripped.Release() - }() - - opts.AllowDecimalTruncate = true - checkCast(c.T(), d128P38S10, d256P28S0, opts) - checkCast(c.T(), d128P28S0, d256P38S10RoundTripped, opts) - - opts.AllowDecimalTruncate = false - opts.ToType = d256P28S0.DataType() - checkCastFails(c.T(), d128P38S10, opts) - checkCast(c.T(), d128P28S0, d256P38S10RoundTripped, opts) - }) - - c.Run("precision loss without rescale = trunc", func() { - d128P4S2, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34"]`)) - defer d128P4S2.Release() - for _, dt := range []arrow.DataType{ - &arrow.Decimal256Type{Precision: 3, Scale: 2}, - &arrow.Decimal256Type{Precision: 4, Scale: 3}, - &arrow.Decimal256Type{Precision: 2, Scale: 1}} { - - opts.AllowDecimalTruncate = true - opts.ToType = dt - out, err := compute.CastArray(context.Background(), d128P4S2, &opts) - out.Release() - c.NoError(err) - - opts.AllowDecimalTruncate = false - opts.ToType = dt - checkCastFails(c.T(), d128P4S2, opts) - } - }) -} - -func (c *CastSuite) TestDecimal256ToDecimal128() { - var opts compute.CastOptions - - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - noTruncate, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 42, Scale: 10}, - strings.NewReader(`["02.0000000000", "30.0000000000", "22.0000000000", "-121.0000000000", null]`)) - expected, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 0}, - strings.NewReader(`["02.", "30.", "22.", "-121.", null]`)) - - defer noTruncate.Release() - defer expected.Release() - - checkCast(c.T(), noTruncate, expected, opts) - checkCast(c.T(), expected, noTruncate, opts) - }) - } - - c.Run("same scale diff precision", func() { - for _, allowDecTruncate := range []bool{false, true} { - c.Run("decTruncate="+strconv.FormatBool(allowDecTruncate), func() { - opts.AllowDecimalTruncate = allowDecTruncate - - dP42S2, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 42, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34", "0.56"]`)) - - defer dP42S2.Release() - defer d42.Release() - - checkCast(c.T(), dP42S2, d42, opts) - checkCast(c.T(), d42, dP42S2, opts) - }) - } - }) - - c.Run("rescale leads to trunc", func() { - d256P52S10, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 52, Scale: 10}, - strings.NewReader(`["-02.1234567890", "30.1234567890", null]`)) - d256P42S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 42, Scale: 0}, - strings.NewReader(`["-02.", "30.", null]`)) - d128P28S0, _, _ := array.FromJSON(c.mem, &arrow.Decimal128Type{Precision: 28, Scale: 0}, - strings.NewReader(`["-02.", "30.", null]`)) - d128P38S10RoundTripped, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 38, Scale: 10}, - strings.NewReader(`["-02.0000000000", "30.0000000000", null]`)) - defer func() { - d256P52S10.Release() - d256P42S0.Release() - d128P28S0.Release() - d128P38S10RoundTripped.Release() - }() - - opts.AllowDecimalTruncate = true - checkCast(c.T(), d256P52S10, d128P28S0, opts) - checkCast(c.T(), d256P42S0, d128P38S10RoundTripped, opts) - - opts.AllowDecimalTruncate = false - opts.ToType = d128P28S0.DataType() - checkCastFails(c.T(), d256P52S10, opts) - checkCast(c.T(), d256P42S0, d128P38S10RoundTripped, opts) - }) - - c.Run("precision loss without rescale = trunc", func() { - d42, _, _ := array.FromJSON(c.mem, &arrow.Decimal256Type{Precision: 4, Scale: 2}, - strings.NewReader(`["12.34"]`)) - defer d42.Release() - for _, dt := range []arrow.DataType{ - &arrow.Decimal128Type{Precision: 3, Scale: 2}, - &arrow.Decimal128Type{Precision: 4, Scale: 3}, - &arrow.Decimal128Type{Precision: 2, Scale: 1}} { - - opts.AllowDecimalTruncate = true - opts.ToType = dt - out, err := compute.CastArray(context.Background(), d42, &opts) - out.Release() - c.NoError(err) - - opts.AllowDecimalTruncate = false - opts.ToType = dt - checkCastFails(c.T(), d42, opts) - } - }) -} - -func (c *CastSuite) TestFloatingToDecimal() { - for _, fltType := range []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64} { - c.Run("from "+fltType.String(), func() { - for _, decType := range []arrow.DataType{&arrow.Decimal128Type{Precision: 5, Scale: 2}, &arrow.Decimal256Type{Precision: 5, Scale: 2}} { - c.Run("to "+decType.String(), func() { - c.checkCast(fltType, decType, - `[0.0, null, 123.45, 123.456, 999.994]`, `["0.00", null, "123.45", "123.46", "999.99"]`) - - c.Run("overflow", func() { - opts := compute.CastOptions{ToType: decType} - c.checkCastFails(fltType, `[999.996]`, &opts) - - opts.AllowDecimalTruncate = true - c.checkCastOpts(fltType, decType, `[0.0, null, 999.996, 123.45, 999.994]`, - `["0.00", null, "0.00", "123.45", "999.99"]`, opts) - }) - }) - } - }) - } - - dec128 := func(prec, scale int32) arrow.DataType { - return &arrow.Decimal128Type{Precision: prec, Scale: scale} - } - dec256 := func(prec, scale int32) arrow.DataType { - return &arrow.Decimal256Type{Precision: prec, Scale: scale} - } - - type decFunc func(int32, int32) arrow.DataType - - for _, decType := range []decFunc{dec128, dec256} { - // 2**64 + 2**41 (exactly representable as a float) - c.checkCast(arrow.PrimitiveTypes.Float32, decType(20, 0), - `[1.8446746e+19, -1.8446746e+19]`, - `[18446746272732807168, -18446746272732807168]`) - - c.checkCast(arrow.PrimitiveTypes.Float64, decType(20, 0), - `[1.8446744073709556e+19, -1.8446744073709556e+19]`, - `[18446744073709555712, -18446744073709555712]`) - - c.checkCast(arrow.PrimitiveTypes.Float32, decType(20, 4), - `[1.8446746e+15, -1.8446746e+15]`, - `[1844674627273280.7168, -1844674627273280.7168]`) - - c.checkCast(arrow.PrimitiveTypes.Float64, decType(20, 4), - `[1.8446744073709556e+15, -1.8446744073709556e+15]`, - `[1844674407370955.5712, -1844674407370955.5712]`) - } -} - -func (c *CastSuite) TestDecimalToFloating() { - for _, flt := range []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64} { - c.Run(flt.String(), func() { - for _, dec := range []arrow.DataType{&arrow.Decimal128Type{Precision: 5, Scale: 2}, &arrow.Decimal256Type{Precision: 5, Scale: 2}} { - c.Run(dec.String(), func() { - c.checkCast(dec, flt, `["0.00", null, "123.45", "999.99"]`, - `[0.0, null, 123.45, 999.99]`) - }) - } - }) - } -} - -func (c *CastSuite) TestDateToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.checkCast(arrow.FixedWidthTypes.Date32, stype, - `[0, null]`, `["1970-01-01", null]`) - c.checkCast(arrow.FixedWidthTypes.Date64, stype, - `[86400000, null]`, `["1970-01-02", null]`) - } -} - -func (c *CastSuite) TestTimeToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.checkCast(arrow.FixedWidthTypes.Time32s, stype, `[1, 62]`, `["00:00:01", "00:01:02"]`) - c.checkCast(arrow.FixedWidthTypes.Time64ns, stype, `[0, 1]`, `["00:00:00.000000000", "00:00:00.000000001"]`) - } -} - -func (c *CastSuite) TestTimestampToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.checkCast(&arrow.TimestampType{Unit: arrow.Second}, stype, - `[-30610224000, -5364662400]`, `["1000-01-01 00:00:00", "1800-01-01 00:00:00"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Millisecond}, stype, - `[-30610224000000, -5364662400000]`, `["1000-01-01 00:00:00.000", "1800-01-01 00:00:00.000"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Microsecond}, stype, - `[-30610224000000000, -5364662400000000]`, `["1000-01-01 00:00:00.000000", "1800-01-01 00:00:00.000000"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Nanosecond}, stype, - `[-596933876543210988, 349837323456789012]`, `["1951-02-01 01:02:03.456789012", "1981-02-01 01:02:03.456789012"]`) - } -} - -func (c *CastSuite) TestTimestampWithZoneToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.checkCast(arrow.FixedWidthTypes.Timestamp_s, stype, - `[-30610224000, -5364662400]`, `["1000-01-01 00:00:00Z", "1800-01-01 00:00:00Z"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Second, TimeZone: "America/Phoenix"}, stype, - `[-34226955, 1456767743]`, `["1968-11-30 13:30:45-0700", "2016-02-29 10:42:23-0700"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "America/Phoenix"}, stype, - `[-34226955877, 1456767743456]`, `["1968-11-30 13:30:44.123-0700", "2016-02-29 10:42:23.456-0700"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "America/Phoenix"}, stype, - `[-34226955877000, 1456767743456789]`, `["1968-11-30 13:30:44.123000-0700", "2016-02-29 10:42:23.456789-0700"]`) - - c.checkCast(&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "America/Phoenix"}, stype, - `[-34226955876543211, 1456767743456789246]`, `["1968-11-30 13:30:44.123456789-0700", "2016-02-29 10:42:23.456789246-0700"]`) - } -} - -func (c *CastSuite) assertBinaryZeroCopy(lhs, rhs arrow.Array) { - // null bitmap and data buffers are always zero-copied - assertBufferSame(c.T(), lhs, rhs, 0) - assertBufferSame(c.T(), lhs, rhs, 2) - - lOffsetByteWidth := lhs.DataType().Layout().Buffers[1].ByteWidth - rOffsetByteWidth := rhs.DataType().Layout().Buffers[1].ByteWidth - if lOffsetByteWidth == rOffsetByteWidth { - assertBufferSame(c.T(), lhs, rhs, 1) - return - } - - offsets := make([]arrow.Array, 0, 2) - for _, arr := range []arrow.Array{lhs, rhs} { - length := arr.Len() - buffer := arr.Data().Buffers()[1] - - byteWidth := arr.DataType().Layout().Buffers[1].ByteWidth - switch byteWidth { - case 4: - data := array.NewData(arrow.PrimitiveTypes.Int32, length, []*memory.Buffer{nil, buffer}, nil, 0, 0) - defer data.Release() - i32 := array.NewInt32Data(data) - i64, err := compute.CastArray(context.Background(), i32, compute.SafeCastOptions(arrow.PrimitiveTypes.Int64)) - c.Require().NoError(err) - i32.Release() - defer i64.Release() - offsets = append(offsets, i64) - default: - data := array.NewData(arrow.PrimitiveTypes.Int64, length, []*memory.Buffer{nil, buffer}, nil, 0, 0) - defer data.Release() - i64 := array.NewInt64Data(data) - defer i64.Release() - offsets = append(offsets, i64) - } - } - c.Truef(array.Equal(offsets[0], offsets[1]), "lhs: %s\nrhs: %s", offsets[0], offsets[1]) -} - -func (c *CastSuite) TestBinaryToString() { - for _, btype := range []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary} { - c.Run(btype.String(), func() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run(stype.String(), func() { - // empty -> empty always works - c.checkCast(btype, stype, `[]`, `[]`) - - invalidUtf8 := c.invalidUtf8Arr(btype) - defer invalidUtf8.Release() - - invalidutf8Str := c.invalidUtf8Arr(stype) - defer invalidutf8Str.Release() - - // invalid utf8 masked by a null bit is not an error - masked := c.maskArrayWithNullsAt(invalidUtf8, []int{4}) - expMasked := c.maskArrayWithNullsAt(invalidutf8Str, []int{4}) - defer masked.Release() - defer expMasked.Release() - - checkCast(c.T(), masked, expMasked, *compute.SafeCastOptions(stype)) - - opts := compute.SafeCastOptions(stype) - checkCastFails(c.T(), invalidUtf8, *opts) - - // override utf8 check - opts.AllowInvalidUtf8 = true - strs, err := compute.CastArray(context.Background(), invalidUtf8, opts) - c.NoError(err) - defer strs.Release() - c.assertBinaryZeroCopy(invalidUtf8, strs) - }) - } - }) - } - - c.Run("fixed size binary", func() { - fromType := &arrow.FixedSizeBinaryType{ByteWidth: 3} - invalidUtf8Arr := c.fixedSizeInvalidUtf8(fromType) - defer invalidUtf8Arr.Release() - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run(stype.String(), func() { - c.checkCast(fromType, stype, `[]`, `[]`) - - // invalid utf-8 masked by a null bit is not an error - strInvalidUtf8 := c.fixedSizeInvalidUtf8(stype) - defer strInvalidUtf8.Release() - - masked := c.maskArrayWithNullsAt(invalidUtf8Arr, []int{4}) - expMasked := c.maskArrayWithNullsAt(strInvalidUtf8, []int{4}) - defer masked.Release() - defer expMasked.Release() - - checkCast(c.T(), masked, expMasked, *compute.SafeCastOptions(stype)) - - opts := compute.SafeCastOptions(stype) - checkCastFails(c.T(), invalidUtf8Arr, *opts) - - // override utf8 check - opts.AllowInvalidUtf8 = true - strs, err := compute.CastArray(context.Background(), invalidUtf8Arr, opts) - c.NoError(err) - defer strs.Release() - - // null buffer is not always the same if input is sliced - assertBufferSame(c.T(), invalidUtf8Arr, strs, 0) - - c.Same(invalidUtf8Arr.Data().Buffers()[1], strs.Data().Buffers()[2]) - }) - } - }) -} - -func (c *CastSuite) TestBinaryOrStringToBinary() { - for _, fromType := range baseBinaryTypes { - c.Run(fromType.String(), func() { - for _, toType := range []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary} { - c.Run(toType.String(), func() { - // empty -> empty always works - c.checkCast(fromType, toType, `[]`, `[]`) - - invalidUtf8 := c.invalidUtf8Arr(fromType) - defer invalidUtf8.Release() - - // invalid utf-8 is not an error for binary - out, err := compute.CastToType(context.Background(), invalidUtf8, toType) - c.NoError(err) - defer out.Release() - c.assertBinaryZeroCopy(invalidUtf8, out) - - // invalid utf-8 masked by a null is also not an erro - invalidutf8Bin := c.invalidUtf8Arr(toType) - defer invalidutf8Bin.Release() - - // invalid utf8 masked by a null bit is not an error - masked := c.maskArrayWithNullsAt(invalidUtf8, []int{4}) - expMasked := c.maskArrayWithNullsAt(invalidutf8Bin, []int{4}) - defer masked.Release() - defer expMasked.Release() - - checkCast(c.T(), masked, expMasked, *compute.SafeCastOptions(toType)) - }) - } - }) - } - - c.Run("fixed size binary", func() { - fromType := &arrow.FixedSizeBinaryType{ByteWidth: 3} - invalidUtf8Arr := c.fixedSizeInvalidUtf8(fromType) - defer invalidUtf8Arr.Release() - - checkCast(c.T(), invalidUtf8Arr, invalidUtf8Arr, *compute.DefaultCastOptions(true)) - checkCastFails(c.T(), invalidUtf8Arr, *compute.SafeCastOptions(&arrow.FixedSizeBinaryType{ByteWidth: 5})) - for _, toType := range []arrow.DataType{arrow.BinaryTypes.Binary, arrow.BinaryTypes.LargeBinary} { - c.Run(toType.String(), func() { - c.checkCast(fromType, toType, `[]`, `[]`) - - out, err := compute.CastToType(context.Background(), invalidUtf8Arr, toType) - c.NoError(err) - defer out.Release() - assertBufferSame(c.T(), invalidUtf8Arr, out, 0) - - c.Same(invalidUtf8Arr.Data().Buffers()[1], out.Data().Buffers()[2]) - }) - } - }) -} - -func (c *CastSuite) TestStringToString() { - for _, fromType := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run("from "+fromType.String(), func() { - for _, toType := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run("to "+toType.String(), func() { - c.checkCast(fromType, toType, `[]`, `[]`) - - invalidUtf8 := c.invalidUtf8Arr(fromType) - defer invalidUtf8.Release() - - invalidutf8Str := c.invalidUtf8Arr(toType) - defer invalidutf8Str.Release() - - // invalid utf8 masked by a null bit is not an error - masked := c.maskArrayWithNullsAt(invalidUtf8, []int{4}) - expMasked := c.maskArrayWithNullsAt(invalidutf8Str, []int{4}) - defer masked.Release() - defer expMasked.Release() - - checkCast(c.T(), masked, expMasked, *compute.SafeCastOptions(toType)) - - opts := compute.SafeCastOptions(toType) - // override utf8 check - opts.AllowInvalidUtf8 = true - // utf-8 is not checked by cast when the origin (utf-8) guarantees utf-8 - strs, err := compute.CastArray(context.Background(), invalidUtf8, opts) - c.NoError(err) - defer strs.Release() - c.assertBinaryZeroCopy(invalidUtf8, strs) - }) - } - }) - } -} - -func (c *CastSuite) TestStringToInt() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - for _, dt := range signedIntTypes { - c.checkCast(stype, dt, - `["0", null, "127", "-1", "0", "0x0", "0x7F"]`, - `[0, null, 127, -1, 0, 0, 127]`) - } - - c.checkCast(stype, arrow.PrimitiveTypes.Int32, - `["2147483647", null, "-2147483648", "0", "0X0", "0x7FFFFFFF", "-0X1", "-0x10000000"]`, - `[2147483647, null, -2147483648, 0, 0, 2147483647, -1, -268435456]`) - - c.checkCast(stype, arrow.PrimitiveTypes.Int64, - `["9223372036854775807", null, "-9223372036854775808", "0", "0x0", "0x7FFFFFFFFFFFFFFf", "-0x0FFFFFFFFFFFFFFF"]`, - `[9223372036854775807, null, -9223372036854775808, 0, 0, 9223372036854775807, -1152921504606846975]`) - - for _, dt := range unsignedIntTypes { - c.checkCast(stype, dt, `["0", null, "127", "255", "0", "0x0", "0xff", "0X7f"]`, - `[0, null, 127, 255, 0, 0, 255, 127]`) - } - - c.checkCast(stype, arrow.PrimitiveTypes.Uint32, - `["2147483647", null, "4294967295", "0", "0x0", "0x7FFFFFFf", "0xFFFFFFFF"]`, - `[2147483647, null, 4294967295, 0, 0, 2147483647, 4294967295]`) - - c.checkCast(stype, arrow.PrimitiveTypes.Uint64, - `["9223372036854775807", null, "18446744073709551615", "0", "0x0", "0x7FFFFFFFFFFFFFFf", "0xfFFFFFFFFFFFFFFf"]`, - `[9223372036854775807, null, 18446744073709551615, 0, 0, 9223372036854775807, 18446744073709551615]`) - - for _, notInt8 := range []string{"z", "12 z", "128", "-129", "0.5", "0x", "0xfff", "-0xf0"} { - c.checkCastFails(stype, `["`+notInt8+`"]`, compute.SafeCastOptions(arrow.PrimitiveTypes.Int8)) - } - - for _, notUint8 := range []string{"256", "-1", "0.5", "0x", "0x3wa", "0x123"} { - c.checkCastFails(stype, `["`+notUint8+`"]`, compute.SafeCastOptions(arrow.PrimitiveTypes.Uint8)) - } - } -} - -func (c *CastSuite) TestStringToFloating() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - for _, dt := range []arrow.DataType{arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Float64} { - c.checkCast(stype, dt, `["0.1", null, "127.3", "1e3", "200.4", "0.5"]`, - `[0.1, null, 127.3, 1000, 200.4, 0.5]`) - - for _, notFloat := range []string{"z"} { - c.checkCastFails(stype, `["`+notFloat+`"]`, compute.SafeCastOptions(dt)) - } - } - } -} - -func (c *CastSuite) TestUnsupportedInputType() { - // casting to a supported target type, but with an unsupported - // input for that target type. - arr, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3]`)) - defer arr.Release() - - toType := arrow.ListOf(arrow.BinaryTypes.String) - _, err := compute.CastToType(context.Background(), arr, toType) - c.ErrorIs(err, arrow.ErrNotImplemented) - c.ErrorContains(err, "function 'cast_list' has no kernel matching input types (int32)") - - // test calling through the generic kernel API - datum := compute.NewDatum(arr) - defer datum.Release() - _, err = compute.CallFunction(context.Background(), "cast", compute.SafeCastOptions(toType), datum) - c.ErrorIs(err, arrow.ErrNotImplemented) - c.ErrorContains(err, "function 'cast_list' has no kernel matching input types (int32)") -} - -func (c *CastSuite) TestUnsupportedTargetType() { - arr, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3]`)) - defer arr.Release() - - toType := arrow.DenseUnionOf([]arrow.Field{{Name: "a", Type: arrow.PrimitiveTypes.Int32}}, []arrow.UnionTypeCode{0}) - _, err := compute.CastToType(context.Background(), arr, toType) - c.ErrorIs(err, arrow.ErrNotImplemented) - c.ErrorContains(err, "unsupported cast to dense_union from int32") - - // test calling through the generic kernel API - datum := compute.NewDatum(arr) - defer datum.Release() - _, err = compute.CallFunction(context.Background(), "cast", compute.SafeCastOptions(toType), datum) - c.ErrorIs(err, arrow.ErrNotImplemented) - c.ErrorContains(err, "unsupported cast to dense_union from int32") -} - -func (c *CastSuite) checkCastSelfZeroCopy(dt arrow.DataType, json string) { - arr, _, _ := array.FromJSON(c.mem, dt, strings.NewReader(json)) - defer arr.Release() - - checkCastZeroCopy(c.T(), arr, dt, compute.NewCastOptions(dt, true)) -} - -func (c *CastSuite) checkCastZeroCopy(from arrow.DataType, json string, to arrow.DataType) { - arr, _, _ := array.FromJSON(c.mem, from, strings.NewReader(json)) - defer arr.Release() - checkCastZeroCopy(c.T(), arr, to, compute.NewCastOptions(to, true)) -} - -func (c *CastSuite) TestTimestampToTimestamp() { - tests := []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Timestamp_ms}, - {arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Timestamp_us}, - {arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Timestamp_ns}, - } - - var opts compute.CastOptions - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - c.checkCast(tt.coarse, tt.fine, `[0, null, 200, 1, 2]`, `[0, null, 200000, 1000, 2000]`) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, `[0, null, 200456, 1123, 2456]`, &opts) - - // with truncation allowed, divide/truncate - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, `[0, null, 200456, 1123, 2456]`, `[0, null, 200, 1, 2]`, opts) - }) - } - - tests = []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Timestamp_ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - c.checkCast(tt.coarse, tt.fine, `[0, null, 200, 1, 2]`, `[0, null, 200000000000, 1000000000, 2000000000]`) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, `[0, null, 200456000000, 1123000000, 2456000000]`, &opts) - - // with truncation allowed, divide/truncate - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, `[0, null, 200456000000, 1123000000, 2456000000]`, `[0, null, 200, 1, 2]`, opts) - }) - } -} - -func (c *CastSuite) TestTimestampZeroCopy() { - for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Timestamp_s /*, arrow.PrimitiveTypes.Int64*/} { - c.checkCastZeroCopy(arrow.FixedWidthTypes.Timestamp_s, `[0, null, 2000, 1000, 0]`, dt) - } - - c.checkCastZeroCopy(arrow.PrimitiveTypes.Int64, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Timestamp_s) -} - -func (c *CastSuite) TestTimestampToTimestampMultiplyOverflow() { - opts := compute.CastOptions{ToType: arrow.FixedWidthTypes.Timestamp_ns} - // 1000-01-01, 1800-01-01, 2000-01-01, 2300-01-01, 3000-01-01 - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_s, `[-30610224000, -5364662400, 946684800, 10413792000, 32503680000]`, &opts) -} - -var ( - timestampJSON = `["1970-01-01T00:00:59.123456789","2000-02-29T23:23:23.999999999", - "1899-01-01T00:59:20.001001001","2033-05-18T03:33:20.000000000", - "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002", - "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132", - "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163", - "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45", - "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null]` - timestampSecondsJSON = `["1970-01-01T00:00:59","2000-02-29T23:23:23", - "1899-01-01T00:59:20","2033-05-18T03:33:20", - "2020-01-01T01:05:05", "2019-12-31T02:10:10", - "2019-12-30T03:15:15", "2009-12-31T04:20:20", - "2010-01-01T05:25:25", "2010-01-03T06:30:30", - "2010-01-04T07:35:35", "2006-01-01T08:40:40", - "2005-12-31T09:45:45", "2008-12-28", "2008-12-29", - "2012-01-01 01:02:03", null]` - timestampExtremeJSON = `["1677-09-20T00:00:59.123456", "2262-04-13T23:23:23.999999"]` -) - -func (c *CastSuite) TestTimestampToDate() { - stamps, _, _ := array.FromJSON(c.mem, arrow.FixedWidthTypes.Timestamp_ns, strings.NewReader(timestampJSON)) - defer stamps.Release() - date32, _, _ := array.FromJSON(c.mem, arrow.FixedWidthTypes.Date32, - strings.NewReader(`[ - 0, 11016, -25932, 23148, - 18262, 18261, 18260, 14609, - 14610, 14612, 14613, 13149, - 13148, 14241, 14242, 15340, null - ]`)) - defer date32.Release() - date64, _, _ := array.FromJSON(c.mem, arrow.FixedWidthTypes.Date64, - strings.NewReader(`[ - 0, 951782400000, -2240524800000, 1999987200000, - 1577836800000, 1577750400000, 1577664000000, 1262217600000, - 1262304000000, 1262476800000, 1262563200000, 1136073600000, - 1135987200000, 1230422400000, 1230508800000, 1325376000000, null]`), array.WithUseNumber()) - defer date64.Release() - - checkCast(c.T(), stamps, date32, *compute.DefaultCastOptions(true)) - checkCast(c.T(), stamps, date64, *compute.DefaultCastOptions(true)) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Date32, - timestampExtremeJSON, `[-106753, 106753]`) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Date64, - timestampExtremeJSON, `[-9223459200000, 9223459200000]`) - for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Microsecond, arrow.Millisecond, arrow.Nanosecond} { - dt := &arrow.TimestampType{Unit: u} - c.checkCastExp(dt, timestampSecondsJSON, date32) - c.checkCastExp(dt, timestampSecondsJSON, date64) - } -} - -func (c *CastSuite) TestZonedTimestampToDate() { - c.Run("Pacific/Marquesas", func() { - dt := &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Pacific/Marquesas"} - c.checkCast(dt, arrow.FixedWidthTypes.Date32, - timestampJSON, `[-1, 11016, -25933, 23147, - 18261, 18260, 18259, 14608, - 14609, 14611, 14612, 13148, - 13148, 14240, 14241, 15339, null]`) - c.checkCast(dt, arrow.FixedWidthTypes.Date64, timestampJSON, - `[-86400000, 951782400000, -2240611200000, 1999900800000, - 1577750400000, 1577664000000, 1577577600000, 1262131200000, - 1262217600000, 1262390400000, 1262476800000, 1135987200000, - 1135987200000, 1230336000000, 1230422400000, 1325289600000, null]`) - }) - - for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} { - dt := &arrow.TimestampType{Unit: u, TimeZone: "Australia/Broken_Hill"} - c.checkCast(dt, arrow.FixedWidthTypes.Date32, timestampSecondsJSON, `[ - 0, 11017, -25932, 23148, - 18262, 18261, 18260, 14609, - 14610, 14612, 14613, 13149, - 13148, 14241, 14242, 15340, null]`) - c.checkCast(dt, arrow.FixedWidthTypes.Date64, timestampSecondsJSON, `[ - 0, 951868800000, -2240524800000, 1999987200000, 1577836800000, - 1577750400000, 1577664000000, 1262217600000, 1262304000000, - 1262476800000, 1262563200000, 1136073600000, 1135987200000, - 1230422400000, 1230508800000, 1325376000000, null]`) - } - - // invalid timezones - for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} { - dt := &arrow.TimestampType{Unit: u, TimeZone: "Mars/Mariner_Valley"} - c.checkCastFails(dt, timestampSecondsJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Date32, false)) - c.checkCastFails(dt, timestampSecondsJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Date64, false)) - } -} - -func (c *CastSuite) TestTimestampToTime() { - c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time64ns, - timestampJSON, `[ - 59123456789, 84203999999999, 3560001001001, 12800000000000, - 3905001000000, 7810002000000, 11715003000000, 15620004132000, - 19525005321000, 23430006163000, 27335000000000, 31240000000000, - 35145000000000, 0, 0, 3723000000000, null]`) - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ns, timestampJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Time64us, true)) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time64us, - timestampExtremeJSON, `[59123456, 84203999999]`) - - timesSec := `[59, 84203, 3560, 12800, - 3905, 7810, 11715, 15620, - 19525, 23430, 27335, 31240, - 35145, 0, 0, 3723, null]` - timesMs := `[59000, 84203000, 3560000, 12800000, - 3905000, 7810000, 11715000, 15620000, - 19525000, 23430000, 27335000, 31240000, - 35145000, 0, 0, 3723000, null]` - timesUs := `[59000000, 84203000000, 3560000000, 12800000000, - 3905000000, 7810000000, 11715000000, 15620000000, - 19525000000, 23430000000, 27335000000, 31240000000, - 35145000000, 0, 0, 3723000000, null]` - timesNs := `[59000000000, 84203000000000, 3560000000000, 12800000000000, - 3905000000000, 7810000000000, 11715000000000, 15620000000000, - 19525000000000, 23430000000000, 27335000000000, 31240000000000, - 35145000000000, 0, 0, 3723000000000, null]` - - c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time32s, - timestampSecondsJSON, timesSec) - c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time32ms, - timestampSecondsJSON, timesMs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time32s, - timestampSecondsJSON, timesSec) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time32ms, - timestampSecondsJSON, timesMs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time64us, - timestampSecondsJSON, timesUs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time64ns, - timestampSecondsJSON, timesNs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time32ms, - timestampSecondsJSON, timesMs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time32s, - timestampSecondsJSON, timesSec) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time64us, - timestampSecondsJSON, timesUs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time64ns, - timestampSecondsJSON, timesNs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time32ms, - timestampSecondsJSON, timesMs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time32s, - timestampSecondsJSON, timesSec) - - trunc := compute.CastOptions{AllowTimeTruncate: true} - - timestampsUS := `["1970-01-01T00:00:59.123456","2000-02-29T23:23:23.999999", - "1899-01-01T00:59:20.001001","2033-05-18T03:33:20.000000", - "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002", - "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132", - "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163", - "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45", - "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null]` - timestampsMS := `["1970-01-01T00:00:59.123","2000-02-29T23:23:23.999", - "1899-01-01T00:59:20.001","2033-05-18T03:33:20.000", - "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002", - "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004", - "2010-01-01T05:25:25.005", "2010-01-03T06:30:30.006", - "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45", - "2008-12-28", "2008-12-29", "2012-01-01 01:02:03", null]` - - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ns, timestampJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Time64us, true)) - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ns, timestampJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Time32ms, true)) - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ns, timestampJSON, compute.NewCastOptions(arrow.FixedWidthTypes.Time32s, true)) - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_us, timestampsUS, compute.NewCastOptions(arrow.FixedWidthTypes.Time32ms, true)) - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_us, timestampsUS, compute.NewCastOptions(arrow.FixedWidthTypes.Time32s, true)) - c.checkCastFails(arrow.FixedWidthTypes.Timestamp_ms, timestampsMS, compute.NewCastOptions(arrow.FixedWidthTypes.Time32s, true)) - - timesNsUs := `[59123456, 84203999999, 3560001001, 12800000000, - 3905001000, 7810002000, 11715003000, 15620004132, - 19525005321, 23430006163, 27335000000, 31240000000, - 35145000000, 0, 0, 3723000000, null]` - timesNsMs := `[59123, 84203999, 3560001, 12800000, - 3905001, 7810002, 11715003, 15620004, - 19525005, 23430006, 27335000, 31240000, - 35145000, 0, 0, 3723000, null]` - timesUsNs := `[59123456000, 84203999999000, 3560001001000, 12800000000000, - 3905001000000, 7810002000000, 11715003000000, 15620004132000, - 19525005321000, 23430006163000, 27335000000000, 31240000000000, - 35145000000000, 0, 0, 3723000000000, null]` - timesMsNs := `[59123000000, 84203999000000, 3560001000000, 12800000000000, - 3905001000000, 7810002000000, 11715003000000, 15620004000000, - 19525005000000, 23430006000000, 27335000000000, 31240000000000, - 35145000000000, 0, 0, 3723000000000, null]` - timesMsUs := `[59123000, 84203999000, 3560001000, 12800000000, - 3905001000, 7810002000, 11715003000, 15620004000, - 19525005000, 23430006000, 27335000000, 31240000000, - 35145000000, 0, 0, 3723000000, null]` - - c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time64us, timestampJSON, timesNsUs, trunc) - c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time32ms, timestampJSON, timesNsMs, trunc) - c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_ns, arrow.FixedWidthTypes.Time32s, timestampJSON, timesSec, trunc) - c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time32ms, timestampsUS, timesNsMs, trunc) - c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time32s, timestampsUS, timesSec, trunc) - c.checkCastOpts(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time32s, timestampsMS, timesSec, trunc) - - // upscaling tests - c.checkCast(arrow.FixedWidthTypes.Timestamp_us, arrow.FixedWidthTypes.Time64ns, timestampsUS, timesUsNs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time64ns, timestampsMS, timesMsNs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_ms, arrow.FixedWidthTypes.Time64us, timestampsMS, timesMsUs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time64ns, timestampSecondsJSON, timesNs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time64us, timestampSecondsJSON, timesUs) - c.checkCast(arrow.FixedWidthTypes.Timestamp_s, arrow.FixedWidthTypes.Time32ms, timestampSecondsJSON, timesMs) - - // invalid timezones - for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} { - dt := &arrow.TimestampType{Unit: u, TimeZone: "Mars/Mariner_Valley"} - switch u { - case arrow.Second, arrow.Millisecond: - c.checkCastFails(dt, timestampSecondsJSON, compute.NewCastOptions(&arrow.Time32Type{Unit: u}, false)) - default: - c.checkCastFails(dt, timestampSecondsJSON, compute.NewCastOptions(&arrow.Time64Type{Unit: u}, false)) - } - } -} - -func (c *CastSuite) TestZonedTimestampToTime() { - c.checkCast(&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Pacific/Marquesas"}, - arrow.FixedWidthTypes.Time64ns, timestampJSON, `[52259123456789, 50003999999999, 56480001001001, 65000000000000, - 56105001000000, 60010002000000, 63915003000000, 67820004132000, - 71725005321000, 75630006163000, 79535000000000, 83440000000000, - 945000000000, 52200000000000, 52200000000000, 55923000000000, null]`) - - timesSec := `[ - 34259, 35603, 35960, 47000, - 41705, 45610, 49515, 53420, - 57325, 61230, 65135, 69040, - 72945, 37800, 37800, 41523, null - ]` - timesMs := `[ - 34259000, 35603000, 35960000, 47000000, - 41705000, 45610000, 49515000, 53420000, - 57325000, 61230000, 65135000, 69040000, - 72945000, 37800000, 37800000, 41523000, null - ]` - timesUs := `[ - 34259000000, 35603000000, 35960000000, 47000000000, - 41705000000, 45610000000, 49515000000, 53420000000, - 57325000000, 61230000000, 65135000000, 69040000000, - 72945000000, 37800000000, 37800000000, 41523000000, null - ]` - timesNs := `[ - 34259000000000, 35603000000000, 35960000000000, 47000000000000, - 41705000000000, 45610000000000, 49515000000000, 53420000000000, - 57325000000000, 61230000000000, 65135000000000, 69040000000000, - 72945000000000, 37800000000000, 37800000000000, 41523000000000, null - ]` - - c.checkCast(&arrow.TimestampType{Unit: arrow.Second, TimeZone: "Australia/Broken_Hill"}, - arrow.FixedWidthTypes.Time32s, timestampSecondsJSON, timesSec) - c.checkCast(&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "Australia/Broken_Hill"}, - arrow.FixedWidthTypes.Time32ms, timestampSecondsJSON, timesMs) - c.checkCast(&arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "Australia/Broken_Hill"}, - arrow.FixedWidthTypes.Time64us, timestampSecondsJSON, timesUs) - c.checkCast(&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Australia/Broken_Hill"}, - arrow.FixedWidthTypes.Time64ns, timestampSecondsJSON, timesNs) -} - -func (c *CastSuite) TestTimeToTime() { - var opts compute.CastOptions - - tests := []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Time32s, arrow.FixedWidthTypes.Time32ms}, - {arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Time64us}, - {arrow.FixedWidthTypes.Time64us, arrow.FixedWidthTypes.Time64ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - coarse := `[0, null, 200, 1, 2]` - promoted := `[0, null, 200000, 1000, 2000]` - willBeTruncated := `[0, null, 200456, 1123, 2456]` - - c.checkCast(tt.coarse, tt.fine, coarse, promoted) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, willBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) - }) - } - - tests = []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Time32s, arrow.FixedWidthTypes.Time64us}, - {arrow.FixedWidthTypes.Time32ms, arrow.FixedWidthTypes.Time64ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - coarse := `[0, null, 200, 1, 2]` - promoted := `[0, null, 200000000, 1000000, 2000000]` - willBeTruncated := `[0, null, 200456000, 1123000, 2456000]` - - c.checkCast(tt.coarse, tt.fine, coarse, promoted) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, willBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) - }) - } - - tests = []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Time32s, arrow.FixedWidthTypes.Time64ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - coarse := `[0, null, 200, 1, 2]` - promoted := `[0, null, 200000000000, 1000000000, 2000000000]` - willBeTruncated := `[0, null, 200456000000, 1123000000, 2456000000]` - - c.checkCast(tt.coarse, tt.fine, coarse, promoted) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, willBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) - }) - } -} - -func (c *CastSuite) TestTimeZeroCopy() { - for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Time32s /*, arrow.PrimitiveTypes.Int32*/} { - c.checkCastZeroCopy(arrow.FixedWidthTypes.Time32s, `[0, null, 2000, 1000, 0]`, dt) - } - c.checkCastZeroCopy(arrow.PrimitiveTypes.Int32, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Time32s) - - for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Time64us /*, arrow.PrimitiveTypes.Int64*/} { - c.checkCastZeroCopy(arrow.FixedWidthTypes.Time64us, `[0, null, 2000, 1000, 0]`, dt) - } - c.checkCastZeroCopy(arrow.PrimitiveTypes.Int64, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Time64us) -} - -func (c *CastSuite) TestDateToDate() { - day32 := `[0, null, 100, 1, 10]` - day64 := `[0, null, 8640000000, 86400000, 864000000]` - - // multiply promotion - c.checkCast(arrow.FixedWidthTypes.Date32, arrow.FixedWidthTypes.Date64, day32, day64) - // no truncation - c.checkCast(arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Date32, day64, day32) - - day64WillBeTruncated := `[0, null, 8640000123, 86400456, 864000789]` - - opts := compute.CastOptions{ToType: arrow.FixedWidthTypes.Date32} - c.checkCastFails(arrow.FixedWidthTypes.Date64, day64WillBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(arrow.FixedWidthTypes.Date64, arrow.FixedWidthTypes.Date32, - day64WillBeTruncated, day32, opts) -} - -func (c *CastSuite) TestDateZeroCopy() { - for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Date32 /*, arrow.PrimitiveTypes.Int32*/} { - c.checkCastZeroCopy(arrow.FixedWidthTypes.Date32, `[0, null, 2000, 1000, 0]`, dt) - } - c.checkCastZeroCopy(arrow.PrimitiveTypes.Int32, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Date32) - - for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Date64 /*, arrow.PrimitiveTypes.Int64*/} { - c.checkCastZeroCopy(arrow.FixedWidthTypes.Date64, `[0, null, 172800000, 86400000, 0]`, dt) - } - c.checkCastZeroCopy(arrow.PrimitiveTypes.Int64, `[0, null, 172800000, 86400000, 0]`, arrow.FixedWidthTypes.Date64) -} - -func (c *CastSuite) TestDurationToDuration() { - var opts compute.CastOptions - - tests := []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Duration_s, arrow.FixedWidthTypes.Duration_ms}, - {arrow.FixedWidthTypes.Duration_ms, arrow.FixedWidthTypes.Duration_us}, - {arrow.FixedWidthTypes.Duration_us, arrow.FixedWidthTypes.Duration_ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - coarse := `[0, null, 200, 1, 2]` - promoted := `[0, null, 200000, 1000, 2000]` - willBeTruncated := `[0, null, 200456, 1123, 2456]` - - c.checkCast(tt.coarse, tt.fine, coarse, promoted) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, willBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) - }) - } - - tests = []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Duration_s, arrow.FixedWidthTypes.Duration_us}, - {arrow.FixedWidthTypes.Duration_ms, arrow.FixedWidthTypes.Duration_ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - coarse := `[0, null, 200, 1, 2]` - promoted := `[0, null, 200000000, 1000000, 2000000]` - willBeTruncated := `[0, null, 200456000, 1123000, 2456000]` - - c.checkCast(tt.coarse, tt.fine, coarse, promoted) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, willBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) - }) - } - - tests = []struct { - coarse, fine arrow.DataType - }{ - {arrow.FixedWidthTypes.Duration_s, arrow.FixedWidthTypes.Duration_ns}, - } - - for _, tt := range tests { - c.Run("coarse "+tt.coarse.String()+" fine "+tt.fine.String(), func() { - coarse := `[0, null, 200, 1, 2]` - promoted := `[0, null, 200000000000, 1000000000, 2000000000]` - willBeTruncated := `[0, null, 200456000000, 1123000000, 2456000000]` - - c.checkCast(tt.coarse, tt.fine, coarse, promoted) - - opts.AllowTimeTruncate = false - opts.ToType = tt.coarse - c.checkCastFails(tt.fine, willBeTruncated, &opts) - - opts.AllowTimeTruncate = true - c.checkCastOpts(tt.fine, tt.coarse, willBeTruncated, coarse, opts) - }) - } -} - -func (c *CastSuite) TestDurationZeroCopy() { - for _, dt := range []arrow.DataType{arrow.FixedWidthTypes.Duration_s /*, arrow.PrimitiveTypes.Int64*/} { - c.checkCastZeroCopy(arrow.FixedWidthTypes.Duration_s, `[0, null, 2000, 1000, 0]`, dt) - } - c.checkCastZeroCopy(arrow.PrimitiveTypes.Int64, `[0, null, 2000, 1000, 0]`, arrow.FixedWidthTypes.Duration_s) -} - -func (c *CastSuite) TestDurationToDurationMultiplyOverflow() { - opts := compute.CastOptions{ToType: arrow.FixedWidthTypes.Duration_ns} - c.checkCastFails(arrow.FixedWidthTypes.Duration_s, `[10000000000, 1, 2, 3, 10000000000]`, &opts) -} - -func (c *CastSuite) TestStringToTimestamp() { - for _, dt := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.checkCast(dt, &arrow.TimestampType{Unit: arrow.Second}, `["1970-01-01", null, "2000-02-29"]`, `[0, null, 951782400]`) - c.checkCast(dt, &arrow.TimestampType{Unit: arrow.Microsecond}, `["1970-01-01", null, "2000-02-29"]`, `[0, null, 951782400000000]`) - - for _, u := range []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond} { - for _, notTS := range []string{"", "xxx"} { - opts := compute.NewCastOptions(&arrow.TimestampType{Unit: u}, true) - c.checkCastFails(dt, `["`+notTS+`"]`, opts) - } - } - - zoned, _, _ := array.FromJSON(c.mem, dt, strings.NewReader(`["2020-02-29T00:00:00Z", "2020-03-02T10:11:12+0102"]`)) - defer zoned.Release() - mixed, _, _ := array.FromJSON(c.mem, dt, strings.NewReader(`["2020-03-02T10:11:12+0102", "2020-02-29T00:00:00"]`)) - defer mixed.Release() - - c.checkCastArr(zoned, &arrow.TimestampType{Unit: arrow.Second, TimeZone: "UTC"}, `[1582934400, 1583140152]`, *compute.DefaultCastOptions(true)) - - // timestamp with zone offset should not parse as naive - checkCastFails(c.T(), zoned, *compute.NewCastOptions(&arrow.TimestampType{Unit: arrow.Second}, true)) - - // mixed zoned/unzoned should not parse as naive - checkCastFails(c.T(), mixed, *compute.NewCastOptions(&arrow.TimestampType{Unit: arrow.Second}, true)) - - // timestamp with zone offset can parse as any time zone (since they're unambiguous) - c.checkCastArr(zoned, arrow.FixedWidthTypes.Timestamp_s, `[1582934400, 1583140152]`, *compute.DefaultCastOptions(true)) - c.checkCastArr(zoned, &arrow.TimestampType{Unit: arrow.Second, TimeZone: "America/Phoenix"}, `[1582934400, 1583140152]`, *compute.DefaultCastOptions(true)) - } -} - -func (c *CastSuite) TestIntToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run(stype.String(), func() { - c.checkCast(arrow.PrimitiveTypes.Int8, stype, - `[0, 1, 127, -128, null]`, `["0", "1", "127", "-128", null]`) - - c.checkCast(arrow.PrimitiveTypes.Uint8, stype, - `[0, 1, 255, null]`, `["0", "1", "255", null]`) - - c.checkCast(arrow.PrimitiveTypes.Int16, stype, - `[0, 1, 32767, -32768, null]`, `["0", "1", "32767", "-32768", null]`) - - c.checkCast(arrow.PrimitiveTypes.Uint16, stype, - `[0, 1, 65535, null]`, `["0", "1", "65535", null]`) - - c.checkCast(arrow.PrimitiveTypes.Int32, stype, - `[0, 1, 2147483647, -2147483648, null]`, - `["0", "1", "2147483647", "-2147483648", null]`) - - c.checkCast(arrow.PrimitiveTypes.Uint32, stype, - `[0, 1, 4294967295, null]`, `["0", "1", "4294967295", null]`) - - c.checkCast(arrow.PrimitiveTypes.Int64, stype, - `[0, 1, 9223372036854775807, -9223372036854775808, null]`, - `["0", "1", "9223372036854775807", "-9223372036854775808", null]`) - - c.checkCast(arrow.PrimitiveTypes.Uint64, stype, - `[0, 1, 18446744073709551615, null]`, `["0", "1", "18446744073709551615", null]`) - }) - } -} - -func (c *CastSuite) TestFloatingToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run(stype.String(), func() { - bldr := array.NewFloat32Builder(c.mem) - defer bldr.Release() - bldr.AppendValues([]float32{ - 0, float32(math.Copysign(0, -1)), 1.5, float32(math.Inf(-1)), - float32(math.Inf(0)), float32(math.NaN())}, nil) - bldr.AppendNull() - arr := bldr.NewArray() - defer arr.Release() - - bldr64 := array.NewFloat64Builder(c.mem) - defer bldr64.Release() - bldr64.AppendValues([]float64{ - 0, math.Copysign(0, -1), 1.5, math.Inf(-1), math.Inf(0), math.NaN()}, nil) - bldr64.AppendNull() - arr64 := bldr64.NewArray() - defer arr64.Release() - - c.checkCastArr(arr, stype, `["0", "-0", "1.5", "-Inf", "+Inf", "NaN", null]`, *compute.DefaultCastOptions(true)) - - c.checkCastArr(arr64, stype, `["0", "-0", "1.5", "-Inf", "+Inf", "NaN", null]`, *compute.DefaultCastOptions(true)) - }) - } -} - -func (c *CastSuite) TestBooleanToString() { - for _, stype := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - c.Run(stype.String(), func() { - c.checkCast(arrow.FixedWidthTypes.Boolean, stype, - `[true, true, false, null]`, `["true", "true", "false", null]`) - }) - } -} - -func (c *CastSuite) TestIdentityCasts() { - c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Boolean, `[false, true, null, false]`) - - c.checkCastSelfZeroCopy(arrow.Null, `[null, null, null]`) - for _, typ := range numericTypes { - c.checkCastSelfZeroCopy(typ, `[1, 2, null, 4]`) - } - - // ["foo", "bar"] base64 encoded for binary - c.checkCastSelfZeroCopy(arrow.BinaryTypes.Binary, `["Zm9v", "YmFy"]`) - c.checkCastSelfZeroCopy(arrow.BinaryTypes.String, `["foo", "bar"]`) - c.checkCastSelfZeroCopy(&arrow.FixedSizeBinaryType{ByteWidth: 3}, `["Zm9v", "YmFy"]`) - - c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Time32ms, `[1, 2, 3, 4]`) - c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Time64us, `[1, 2, 3, 4]`) - c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Date32, `[1, 2, 3, 4]`) - c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Date64, `[86400000, 0]`) - c.checkCastSelfZeroCopy(arrow.FixedWidthTypes.Timestamp_s, `[1, 2, 3, 4]`) - - c.checkCastSelfZeroCopy(&arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.PrimitiveTypes.Int8}, - `[1, 2, 3, 1, null, 3]`) -} - -func (c *CastSuite) TestListToPrimitive() { - arr, _, _ := array.FromJSON(c.mem, arrow.ListOf(arrow.PrimitiveTypes.Int8), strings.NewReader(`[[1, 2], [3, 4]]`)) - defer arr.Release() - - _, err := compute.CastToType(context.Background(), arr, arrow.PrimitiveTypes.Uint8) - c.ErrorIs(err, arrow.ErrNotImplemented) -} - -type makeList func(arrow.DataType) arrow.DataType - -var listFactories = []makeList{ - func(dt arrow.DataType) arrow.DataType { return arrow.ListOf(dt) }, - func(dt arrow.DataType) arrow.DataType { return arrow.LargeListOf(dt) }, -} - -func (c *CastSuite) checkListToList(valTypes []arrow.DataType, jsonData string) { - for _, makeSrc := range listFactories { - for _, makeDest := range listFactories { - for _, srcValueType := range valTypes { - for _, dstValueType := range valTypes { - srcType := makeSrc(srcValueType) - dstType := makeDest(dstValueType) - c.Run(fmt.Sprintf("from %s to %s", srcType, dstType), func() { - c.checkCast(srcType, dstType, jsonData, jsonData) - }) - } - } - } - } -} - -func (c *CastSuite) TestListToList() { - c.checkListToList([]arrow.DataType{arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Int64}, - `[[0], [1], null, [2, 3, 4], [5, 6], null, [], [7], [8, 9]]`) -} - -func (c *CastSuite) TestListToListNoNulls() { - c.checkListToList([]arrow.DataType{arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float32, arrow.PrimitiveTypes.Int64}, - `[[0], [1], [2, 3, 4], [5, 6], [], [7], [8, 9]]`) -} - -func (c *CastSuite) TestListToListOptionsPassthru() { - for _, makeSrc := range listFactories { - for _, makeDest := range listFactories { - opts := compute.SafeCastOptions(makeDest(arrow.PrimitiveTypes.Int16)) - c.checkCastFails(makeSrc(arrow.PrimitiveTypes.Int32), `[[87654321]]`, opts) - - opts.AllowIntOverflow = true - c.checkCastOpts(makeSrc(arrow.PrimitiveTypes.Int32), makeDest(arrow.PrimitiveTypes.Int16), - `[[87654321]]`, `[[32689]]`, *opts) - } - } -} - -func (c *CastSuite) checkStructToStruct(types []arrow.DataType) { - for _, srcType := range types { - c.Run(srcType.String(), func() { - for _, destType := range types { - c.Run(destType.String(), func() { - fieldNames := []string{"a", "b"} - a1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[1, 2, 3, 4, null]`)) - b1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[null, 7, 8, 9, 0]`)) - a2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[1, 2, 3, 4, null]`)) - b2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[null, 7, 8, 9, 0]`)) - src, _ := array.NewStructArray([]arrow.Array{a1, b1}, fieldNames) - dest, _ := array.NewStructArray([]arrow.Array{a2, b2}, fieldNames) - defer func() { - a1.Release() - b1.Release() - a2.Release() - b2.Release() - src.Release() - dest.Release() - }() - - checkCast(c.T(), src, dest, *compute.DefaultCastOptions(true)) - c.Run("with nulls", func() { - nullBitmap := memory.NewBufferBytes([]byte{10}) - srcNullData := src.Data().(*array.Data).Copy() - srcNullData.Buffers()[0] = nullBitmap - srcNullData.SetNullN(3) - defer srcNullData.Release() - destNullData := dest.Data().(*array.Data).Copy() - destNullData.Buffers()[0] = nullBitmap - destNullData.SetNullN(3) - defer destNullData.Release() - - srcNulls := array.NewStructData(srcNullData) - destNulls := array.NewStructData(destNullData) - defer srcNulls.Release() - defer destNulls.Release() - - checkCast(c.T(), srcNulls, destNulls, *compute.DefaultCastOptions(true)) - }) - }) - } - }) - } -} - -func (c *CastSuite) checkStructToStructSubset(types []arrow.DataType) { - for _, srcType := range types { - c.Run(srcType.String(), func() { - for _, destType := range types { - c.Run(destType.String(), func() { - fieldNames := []string{"a", "b", "c", "d", "e"} - - a1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[1, 2, 5]`)) - defer a1.Release() - b1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[3, 4, 7]`)) - defer b1.Release() - c1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[9, 11, 44]`)) - defer c1.Release() - d1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[6, 51, 49]`)) - defer d1.Release() - e1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[19, 17, 74]`)) - defer e1.Release() - - a2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[1, 2, 5]`)) - defer a2.Release() - b2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[3, 4, 7]`)) - defer b2.Release() - c2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[9, 11, 44]`)) - defer c2.Release() - d2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[6, 51, 49]`)) - defer d2.Release() - e2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[19, 17, 74]`)) - defer e2.Release() - - src, _ := array.NewStructArray([]arrow.Array{a1, b1, c1, d1, e1}, fieldNames) - defer src.Release() - dest1, _ := array.NewStructArray([]arrow.Array{a2}, []string{"a"}) - defer dest1.Release() - - opts := *compute.DefaultCastOptions(true) - checkCast(c.T(), src, dest1, opts) - - dest2, _ := array.NewStructArray([]arrow.Array{b2, c2}, []string{"b", "c"}) - defer dest2.Release() - checkCast(c.T(), src, dest2, opts) - - dest3, _ := array.NewStructArray([]arrow.Array{c2, d2, e2}, []string{"c", "d", "e"}) - defer dest3.Release() - checkCast(c.T(), src, dest3, opts) - - dest4, _ := array.NewStructArray([]arrow.Array{a2, b2, c2, e2}, []string{"a", "b", "c", "e"}) - defer dest4.Release() - checkCast(c.T(), src, dest4, opts) - - dest5, _ := array.NewStructArray([]arrow.Array{a2, b2, c2, d2, e2}, []string{"a", "b", "c", "d", "e"}) - defer dest5.Release() - checkCast(c.T(), src, dest5, opts) - - // field does not exist - dest6 := arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "d", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, - arrow.Field{Name: "f", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - ) - options6 := compute.SafeCastOptions(dest6) - _, err := compute.CastArray(context.TODO(), src, options6) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "struct fields don't match or are in the wrong order") - - // fields in wrong order - dest7 := arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - ) - options7 := compute.SafeCastOptions(dest7) - _, err = compute.CastArray(context.TODO(), src, options7) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "struct fields don't match or are in the wrong order") - }) - } - }) - } -} - -func (c *CastSuite) checkStructToStructSubsetWithNulls(types []arrow.DataType) { - for _, srcType := range types { - c.Run(srcType.String(), func() { - for _, destType := range types { - c.Run(destType.String(), func() { - fieldNames := []string{"a", "b", "c", "d", "e"} - - a1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[1, 2, 5]`)) - defer a1.Release() - b1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[3, null, 7]`)) - defer b1.Release() - c1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[9, 11, 44]`)) - defer c1.Release() - d1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[6, 51, null]`)) - defer d1.Release() - e1, _, _ := array.FromJSON(c.mem, srcType, strings.NewReader(`[null, 17, 74]`)) - defer e1.Release() - - a2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[1, 2, 5]`)) - defer a2.Release() - b2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[3, null, 7]`)) - defer b2.Release() - c2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[9, 11, 44]`)) - defer c2.Release() - d2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[6, 51, null]`)) - defer d2.Release() - e2, _, _ := array.FromJSON(c.mem, destType, strings.NewReader(`[null, 17, 74]`)) - defer e2.Release() - - // 0, 1, 0 - nullBitmap := memory.NewBufferBytes([]byte{2}) - srcNull, _ := array.NewStructArrayWithNulls([]arrow.Array{a1, b1, c1, d1, e1}, fieldNames, nullBitmap, 2, 0) - defer srcNull.Release() - - dest1Null, _ := array.NewStructArrayWithNulls([]arrow.Array{a2}, []string{"a"}, nullBitmap, -1, 0) - defer dest1Null.Release() - opts := compute.DefaultCastOptions(true) - checkCast(c.T(), srcNull, dest1Null, *opts) - - dest2Null, _ := array.NewStructArrayWithNulls([]arrow.Array{b2, c2}, []string{"b", "c"}, nullBitmap, -1, 0) - defer dest2Null.Release() - checkCast(c.T(), srcNull, dest2Null, *opts) - - dest3Null, _ := array.NewStructArrayWithNulls([]arrow.Array{a2, d2, e2}, []string{"a", "d", "e"}, nullBitmap, -1, 0) - defer dest3Null.Release() - checkCast(c.T(), srcNull, dest3Null, *opts) - - dest4Null, _ := array.NewStructArrayWithNulls([]arrow.Array{a2, b2, c2, e2}, []string{"a", "b", "c", "e"}, nullBitmap, -1, 0) - defer dest4Null.Release() - checkCast(c.T(), srcNull, dest4Null, *opts) - - dest5Null, _ := array.NewStructArrayWithNulls([]arrow.Array{a2, b2, c2, d2, e2}, []string{"a", "b", "c", "d", "e"}, nullBitmap, -1, 0) - defer dest5Null.Release() - checkCast(c.T(), srcNull, dest5Null, *opts) - - // field does not exist - dest6Null := arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "d", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, - arrow.Field{Name: "f", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - ) - options6Null := compute.SafeCastOptions(dest6Null) - _, err := compute.CastArray(context.TODO(), srcNull, options6Null) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "struct fields don't match or are in the wrong order") - - // fields in wrong order - dest7Null := arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int16, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - ) - options7Null := compute.SafeCastOptions(dest7Null) - _, err = compute.CastArray(context.TODO(), srcNull, options7Null) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "struct fields don't match or are in the wrong order") - }) - } - }) - } -} - -func (c *CastSuite) TestStructToSameSizedAndNamedStruct() { - c.checkStructToStruct(numericTypes) -} - -func (c *CastSuite) TestStructToStructSubset() { - c.checkStructToStructSubset(numericTypes) -} - -func (c *CastSuite) TestStructToStructSubsetWithNulls() { - c.checkStructToStructSubsetWithNulls(numericTypes) -} - -func (c *CastSuite) TestStructToSameSizedButDifferentNamedStruct() { - fieldNames := []string{"a", "b"} - a, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1, 2]`)) - defer a.Release() - b, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[3, 4]`)) - defer b.Release() - - src, _ := array.NewStructArray([]arrow.Array{a, b}, fieldNames) - defer src.Release() - - dest := arrow.StructOf( - arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "d", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - ) - opts := compute.SafeCastOptions(dest) - _, err := compute.CastArray(context.TODO(), src, opts) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "struct fields don't match or are in the wrong order") -} - -func (c *CastSuite) TestStructToBiggerStruct() { - fieldNames := []string{"a", "b"} - a, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1, 2]`)) - defer a.Release() - b, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[3, 4]`)) - defer b.Release() - - src, _ := array.NewStructArray([]arrow.Array{a, b}, fieldNames) - defer src.Release() - - dest := arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - arrow.Field{Name: "c", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - ) - opts := compute.SafeCastOptions(dest) - _, err := compute.CastArray(context.TODO(), src, opts) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "struct fields don't match or are in the wrong order") -} - -func (c *CastSuite) TestStructToDifferentNullabilityStruct() { - c.Run("non-nullable to nullable", func() { - fieldsSrcNonNullable := []arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Int8}, - {Name: "b", Type: arrow.PrimitiveTypes.Int8}, - {Name: "c", Type: arrow.PrimitiveTypes.Int8}, - } - srcNonNull, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsSrcNonNullable...), - strings.NewReader(`[ - {"a": 11, "b": 32, "c": 95}, - {"a": 23, "b": 46, "c": 11}, - {"a": 56, "b": 37, "c": 44} - ]`)) - c.Require().NoError(err) - defer srcNonNull.Release() - - fieldsDest1Nullable := []arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - } - destNullable, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsDest1Nullable...), - strings.NewReader(`[ - {"a": 11, "b": 32, "c": 95}, - {"a": 23, "b": 46, "c": 11}, - {"a": 56, "b": 37, "c": 44} - ]`)) - c.Require().NoError(err) - defer destNullable.Release() - - checkCast(c.T(), srcNonNull, destNullable, *compute.DefaultCastOptions(true)) - - fieldsDest2Nullable := []arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - } - - data := array.NewData(arrow.StructOf(fieldsDest2Nullable...), destNullable.Len(), destNullable.Data().Buffers(), - []arrow.ArrayData{destNullable.Data().Children()[0], destNullable.Data().Children()[2]}, - destNullable.NullN(), 0) - defer data.Release() - dest2Nullable := array.NewStructData(data) - defer dest2Nullable.Release() - checkCast(c.T(), srcNonNull, dest2Nullable, *compute.DefaultCastOptions(true)) - - fieldsDest3Nullable := []arrow.Field{ - {Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - } - - data = array.NewData(arrow.StructOf(fieldsDest3Nullable...), destNullable.Len(), destNullable.Data().Buffers(), - []arrow.ArrayData{destNullable.Data().Children()[1]}, destNullable.NullN(), 0) - defer data.Release() - dest3Nullable := array.NewStructData(data) - defer dest3Nullable.Release() - checkCast(c.T(), srcNonNull, dest3Nullable, *compute.DefaultCastOptions(true)) - }) - c.Run("nullable to non-nullable", func() { - fieldsSrcNullable := []arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "b", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "c", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - } - srcNullable, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsSrcNullable...), - strings.NewReader(`[ - {"a": 1, "b": 3, "c": 9}, - {"a": null, "b": 4, "c": 11}, - {"a": 5, "b": null, "c": 44} - ]`)) - c.Require().NoError(err) - defer srcNullable.Release() - - fieldsDest1NonNullable := []arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - {Name: "b", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - } - dest1NonNullable := arrow.StructOf(fieldsDest1NonNullable...) - options1NoNullable := compute.SafeCastOptions(dest1NonNullable) - _, err = compute.CastArray(context.TODO(), srcNullable, options1NoNullable) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "cannot cast nullable field to non-nullable field") - - fieldsDest2NonNullable := []arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - } - dest2NonNullable := arrow.StructOf(fieldsDest2NonNullable...) - options2NoNullable := compute.SafeCastOptions(dest2NonNullable) - _, err = compute.CastArray(context.TODO(), srcNullable, options2NoNullable) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "cannot cast nullable field to non-nullable field") - - fieldsDest3NonNullable := []arrow.Field{ - {Name: "c", Type: arrow.PrimitiveTypes.Int64, Nullable: false}, - } - dest3NonNullable := arrow.StructOf(fieldsDest3NonNullable...) - options3NoNullable := compute.SafeCastOptions(dest3NonNullable) - _, err = compute.CastArray(context.TODO(), srcNullable, options3NoNullable) - c.ErrorIs(err, arrow.ErrType) - c.ErrorContains(err, "cannot cast nullable field to non-nullable field") - }) -} - -func (c *CastSuite) smallIntArrayFromJSON(data string) arrow.Array { - arr, _, _ := array.FromJSON(c.mem, types.NewSmallintType(), strings.NewReader(data)) - return arr -} - -func (c *CastSuite) TestExtensionTypeToIntDowncast() { - smallint := types.NewSmallintType() - arrow.RegisterExtensionType(smallint) - defer arrow.UnregisterExtensionType("smallint") - - c.Run("smallint(int16) to int16", func() { - arr := c.smallIntArrayFromJSON(`[0, 100, 200, 1, 2]`) - defer arr.Release() - - checkCastZeroCopy(c.T(), arr, arrow.PrimitiveTypes.Int16, compute.DefaultCastOptions(true)) - - c.checkCast(smallint, arrow.PrimitiveTypes.Uint8, - `[0, 100, 200, 1, 2]`, `[0, 100, 200, 1, 2]`) - }) - - c.Run("smallint(int16) to uint8 with overflow", func() { - opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Uint8) - c.checkCastFails(smallint, `[0, null, 256, 1, 3]`, opts) - - opts.AllowIntOverflow = true - c.checkCastOpts(smallint, arrow.PrimitiveTypes.Uint8, - `[0, null, 256, 1, 3]`, `[0, null, 0, 1, 3]`, *opts) - }) - - c.Run("smallint(int16) to uint8 with underflow", func() { - opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Uint8) - c.checkCastFails(smallint, `[0, null, -1, 1, 3]`, opts) - - opts.AllowIntOverflow = true - c.checkCastOpts(smallint, arrow.PrimitiveTypes.Uint8, - `[0, null, -1, 1, 3]`, `[0, null, 255, 1, 3]`, *opts) - }) -} - -func (c *CastSuite) TestNoOutBitmapIfIsAllValid() { - a, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1]`)) - defer a.Release() - - opts := compute.SafeCastOptions(arrow.PrimitiveTypes.Int32) - result, err := compute.CastArray(context.Background(), a, opts) - c.NoError(err) - c.NotNil(a.Data().Buffers()[0]) - c.Nil(result.Data().Buffers()[0]) -} - -func (c *CastSuite) TestFromDictionary() { - ctx := compute.WithAllocator(context.Background(), c.mem) - - dictionaries := []arrow.Array{} - - for _, ty := range numericTypes { - a, _, _ := array.FromJSON(c.mem, ty, strings.NewReader(`[23, 12, 45, 12, null]`)) - defer a.Release() - dictionaries = append(dictionaries, a) - } - - for _, ty := range []arrow.DataType{arrow.BinaryTypes.String, arrow.BinaryTypes.LargeString} { - a, _, _ := array.FromJSON(c.mem, ty, strings.NewReader(`["foo", "bar", "baz", "foo", null]`)) - defer a.Release() - dictionaries = append(dictionaries, a) - } - - for _, d := range dictionaries { - for _, ty := range dictIndexTypes { - indices, _, _ := array.FromJSON(c.mem, ty, strings.NewReader(`[4, 0, 1, 2, 0, 4, null, 2]`)) - - expected, err := compute.Take(ctx, compute.TakeOptions{}, &compute.ArrayDatum{d.Data()}, &compute.ArrayDatum{indices.Data()}) - c.Require().NoError(err) - exp := expected.(*compute.ArrayDatum).MakeArray() - - dictArr := array.NewDictionaryArray(&arrow.DictionaryType{IndexType: ty, ValueType: d.DataType()}, indices, d) - checkCast(c.T(), dictArr, exp, *compute.SafeCastOptions(d.DataType())) - - indices.Release() - expected.Release() - exp.Release() - dictArr.Release() - return - } - } -} - -func TestCasts(t *testing.T) { - suite.Run(t, new(CastSuite)) -} - -const rngseed = 0x94378165 - -func benchmarkNumericCast(b *testing.B, fromType, toType arrow.DataType, opts compute.CastOptions, size, min, max int64, nullprob float64) { - rng := gen.NewRandomArrayGenerator(rngseed, memory.DefaultAllocator) - arr := rng.Numeric(fromType.ID(), size, min, max, nullprob) - var ( - err error - out compute.Datum - ctx = context.Background() - input = compute.NewDatum(arr.Data()) - ) - - b.Cleanup(func() { - arr.Release() - input.Release() - }) - - opts.ToType = toType - b.ResetTimer() - b.SetBytes(size * int64(fromType.(arrow.FixedWidthDataType).Bytes())) - for i := 0; i < b.N; i++ { - out, err = compute.CastDatum(ctx, input, &opts) - if err != nil { - b.Fatal(err) - } - out.Release() - } -} - -func benchmarkFloatingToIntegerCast(b *testing.B, fromType, toType arrow.DataType, opts compute.CastOptions, size, min, max int64, nullprob float64) { - rng := gen.NewRandomArrayGenerator(rngseed, memory.DefaultAllocator) - arr := rng.Numeric(toType.ID(), size, min, max, nullprob) - asFloat, err := compute.CastToType(context.Background(), arr, fromType) - if err != nil { - b.Fatal(err) - } - arr.Release() - - var ( - out compute.Datum - ctx = context.Background() - input = compute.NewDatum(asFloat.Data()) - ) - - b.Cleanup(func() { - asFloat.Release() - input.Release() - }) - - opts.ToType = toType - b.ResetTimer() - b.SetBytes(size * int64(fromType.(arrow.FixedWidthDataType).Bytes())) - for i := 0; i < b.N; i++ { - out, err = compute.CastDatum(ctx, input, &opts) - if err != nil { - b.Fatal(err) - } - out.Release() - } -} - -func BenchmarkCasting(b *testing.B) { - type benchfn func(b *testing.B, fromType, toType arrow.DataType, opts compute.CastOptions, size, min, max int64, nullprob float64) - - tests := []struct { - from, to arrow.DataType - min, max int64 - safe bool - fn benchfn - }{ - {arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int32, math.MinInt32, math.MaxInt32, true, benchmarkNumericCast}, - {arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int32, math.MinInt32, math.MaxInt32, false, benchmarkNumericCast}, - {arrow.PrimitiveTypes.Uint32, arrow.PrimitiveTypes.Int32, 0, math.MaxInt32, true, benchmarkNumericCast}, - {arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Float64, 0, 1000, true, benchmarkNumericCast}, - {arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Float64, 0, 1000, false, benchmarkNumericCast}, - {arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Int32, -1000, 1000, true, benchmarkFloatingToIntegerCast}, - {arrow.PrimitiveTypes.Float64, arrow.PrimitiveTypes.Int32, -1000, 1000, false, benchmarkFloatingToIntegerCast}, - } - - for _, tt := range tests { - for _, sz := range []int64{int64(CpuCacheSizes[1]) /* L2 Cache Size */} { - for _, nullProb := range []float64{0, 0.1, 0.5, 0.9, 1} { - arraySize := sz / int64(tt.from.(arrow.FixedWidthDataType).Bytes()) - opts := compute.DefaultCastOptions(tt.safe) - b.Run(fmt.Sprintf("sz=%d/nullprob=%.2f/from=%s/to=%s/safe=%t", arraySize, nullProb, tt.from, tt.to, tt.safe), func(b *testing.B) { - tt.fn(b, tt.from, tt.to, *opts, arraySize, tt.min, tt.max, nullProb) - }) - } - } - } -} diff --git a/go/arrow/compute/datum.go b/go/arrow/compute/datum.go deleted file mode 100644 index 9619fe09610de..0000000000000 --- a/go/arrow/compute/datum.go +++ /dev/null @@ -1,305 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "fmt" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/scalar" -) - -//go:generate go run golang.org/x/tools/cmd/stringer -type=DatumKind -linecomment - -// DatumKind is an enum used for denoting which kind of type a datum is encapsulating -type DatumKind int - -const ( - KindNone DatumKind = iota // none - KindScalar // scalar - KindArray // array - KindChunked // chunked_array - KindRecord // record_batch - KindTable // table -) - -const UnknownLength int64 = -1 - -// DatumIsValue returns true if the datum passed is a Scalar, Array -// or ChunkedArray type (e.g. it contains a specific value not a -// group of values) -func DatumIsValue(d Datum) bool { - switch d.Kind() { - case KindScalar, KindArray, KindChunked: - return true - } - return false -} - -// Datum is a variant interface for wrapping the various Arrow data structures -// for now the various Datum types just hold a Value which is the type they -// are wrapping, but it might make sense in the future for those types -// to actually be aliases or embed their types instead. Not sure yet. -type Datum interface { - fmt.Stringer - Kind() DatumKind - Len() int64 - Equals(Datum) bool - Release() - - data() any -} - -// ArrayLikeDatum is an interface for treating a Datum similarly to an Array, -// so that it is easy to differentiate between Record/Table/Collection and Scalar, -// Array/ChunkedArray for ease of use. Chunks will return an empty slice for Scalar, -// a slice with 1 element for Array, and the slice of chunks for a chunked array. -type ArrayLikeDatum interface { - Datum - NullN() int64 - Type() arrow.DataType - Chunks() []arrow.Array -} - -// TableLikeDatum is an interface type for specifying either a RecordBatch or a -// Table as both contain a schema as opposed to just a single data type. -type TableLikeDatum interface { - Datum - Schema() *arrow.Schema -} - -// EmptyDatum is the null case, a Datum with nothing in it. -type EmptyDatum struct{} - -func (EmptyDatum) String() string { return "nullptr" } -func (EmptyDatum) Kind() DatumKind { return KindNone } -func (EmptyDatum) Len() int64 { return UnknownLength } -func (EmptyDatum) Release() {} -func (EmptyDatum) Equals(other Datum) bool { - _, ok := other.(EmptyDatum) - return ok -} -func (EmptyDatum) data() any { return nil } - -// ScalarDatum contains a scalar value -type ScalarDatum struct { - Value scalar.Scalar -} - -func (ScalarDatum) Kind() DatumKind { return KindScalar } -func (ScalarDatum) Len() int64 { return 1 } -func (ScalarDatum) Chunks() []arrow.Array { return nil } -func (d *ScalarDatum) Type() arrow.DataType { return d.Value.DataType() } -func (d *ScalarDatum) String() string { return d.Value.String() } -func (d *ScalarDatum) ToScalar() (scalar.Scalar, error) { - return d.Value, nil -} -func (d *ScalarDatum) data() any { return d.Value } -func (d *ScalarDatum) NullN() int64 { - if d.Value.IsValid() { - return 0 - } - return 1 -} - -type releasable interface { - Release() -} - -func (d *ScalarDatum) Release() { - if v, ok := d.Value.(releasable); ok { - v.Release() - } -} - -func (d *ScalarDatum) Equals(other Datum) bool { - if rhs, ok := other.(*ScalarDatum); ok { - return scalar.Equals(d.Value, rhs.Value) - } - return false -} - -// ArrayDatum references an array.Data object which can be used to create -// array instances from if needed. -type ArrayDatum struct { - Value arrow.ArrayData -} - -func (ArrayDatum) Kind() DatumKind { return KindArray } -func (d *ArrayDatum) Type() arrow.DataType { return d.Value.DataType() } -func (d *ArrayDatum) Len() int64 { return int64(d.Value.Len()) } -func (d *ArrayDatum) NullN() int64 { return int64(d.Value.NullN()) } -func (d *ArrayDatum) String() string { return fmt.Sprintf("Array:{%s}", d.Value.DataType()) } -func (d *ArrayDatum) MakeArray() arrow.Array { return array.MakeFromData(d.Value) } -func (d *ArrayDatum) Chunks() []arrow.Array { return []arrow.Array{d.MakeArray()} } -func (d *ArrayDatum) ToScalar() (scalar.Scalar, error) { - return scalar.NewListScalarData(d.Value), nil -} -func (d *ArrayDatum) Release() { - d.Value.Release() - d.Value = nil -} -func (d *ArrayDatum) data() any { return d.Value } -func (d *ArrayDatum) Equals(other Datum) bool { - rhs, ok := other.(*ArrayDatum) - if !ok { - return false - } - - left := d.MakeArray() - defer left.Release() - right := rhs.MakeArray() - defer right.Release() - - return array.Equal(left, right) -} - -// ChunkedDatum contains a chunked array for use with expressions and compute. -type ChunkedDatum struct { - Value *arrow.Chunked -} - -func (ChunkedDatum) Kind() DatumKind { return KindChunked } -func (d *ChunkedDatum) Type() arrow.DataType { return d.Value.DataType() } -func (d *ChunkedDatum) Len() int64 { return int64(d.Value.Len()) } -func (d *ChunkedDatum) NullN() int64 { return int64(d.Value.NullN()) } -func (d *ChunkedDatum) String() string { return fmt.Sprintf("Array:{%s}", d.Value.DataType()) } -func (d *ChunkedDatum) Chunks() []arrow.Array { return d.Value.Chunks() } -func (d *ChunkedDatum) data() any { return d.Value } -func (d *ChunkedDatum) Release() { - d.Value.Release() - d.Value = nil -} - -func (d *ChunkedDatum) Equals(other Datum) bool { - if rhs, ok := other.(*ChunkedDatum); ok { - return array.ChunkedEqual(d.Value, rhs.Value) - } - return false -} - -// RecordDatum contains an array.Record for passing a full record to an expression -// or to compute. -type RecordDatum struct { - Value arrow.Record -} - -func (RecordDatum) Kind() DatumKind { return KindRecord } -func (RecordDatum) String() string { return "RecordBatch" } -func (r *RecordDatum) Len() int64 { return r.Value.NumRows() } -func (r *RecordDatum) Schema() *arrow.Schema { return r.Value.Schema() } -func (r *RecordDatum) data() any { return r.Value } -func (r *RecordDatum) Release() { - r.Value.Release() - r.Value = nil -} - -func (r *RecordDatum) Equals(other Datum) bool { - if rhs, ok := other.(*RecordDatum); ok { - return array.RecordEqual(r.Value, rhs.Value) - } - return false -} - -// TableDatum contains a table so that multiple record batches can be worked with -// together as a single table for being passed to compute and expression handling. -type TableDatum struct { - Value arrow.Table -} - -func (TableDatum) Kind() DatumKind { return KindTable } -func (TableDatum) String() string { return "Table" } -func (d *TableDatum) Len() int64 { return d.Value.NumRows() } -func (d *TableDatum) Schema() *arrow.Schema { return d.Value.Schema() } -func (d *TableDatum) data() any { return d.Value } -func (d *TableDatum) Release() { - d.Value.Release() - d.Value = nil -} - -func (d *TableDatum) Equals(other Datum) bool { - if rhs, ok := other.(*TableDatum); ok { - return array.TableEqual(d.Value, rhs.Value) - } - return false -} - -// NewDatum will construct the appropriate Datum type based on what is passed in -// as the argument. -// -// An arrow.Array gets an ArrayDatum -// An array.Chunked gets a ChunkedDatum -// An array.Record gets a RecordDatum -// an array.Table gets a TableDatum -// a scalar.Scalar gets a ScalarDatum -// -// Anything else is passed to scalar.MakeScalar and receives a scalar -// datum of that appropriate type. -func NewDatum(value interface{}) Datum { - switch v := value.(type) { - case Datum: - return NewDatum(v.data()) - case arrow.Array: - v.Data().Retain() - return &ArrayDatum{v.Data()} - case scalar.Releasable: - v.Retain() - return NewDatumWithoutOwning(v) - case scalar.Scalar: - return &ScalarDatum{v} - default: - return &ScalarDatum{scalar.MakeScalar(value)} - } -} - -// NewDatumWithoutOwning is like NewDatum only it does not call Retain on -// the passed in value (if applicable). This means that if the resulting -// Datum should not have Release called on it and the original value needs -// to outlive the Datum. -// -// Only use this if you know what you're doing. For the most part this is -// just a convenience function.+- - -func NewDatumWithoutOwning(value interface{}) Datum { - switch v := value.(type) { - case arrow.Array: - return &ArrayDatum{v.Data()} - case arrow.ArrayData: - return &ArrayDatum{v} - case *arrow.Chunked: - return &ChunkedDatum{v} - case arrow.Record: - return &RecordDatum{v} - case arrow.Table: - return &TableDatum{v} - case scalar.Scalar: - return &ScalarDatum{v} - default: - return &ScalarDatum{scalar.MakeScalar(value)} - } -} - -var ( - _ ArrayLikeDatum = (*ScalarDatum)(nil) - _ ArrayLikeDatum = (*ArrayDatum)(nil) - _ ArrayLikeDatum = (*ChunkedDatum)(nil) - _ TableLikeDatum = (*RecordDatum)(nil) - _ TableLikeDatum = (*TableDatum)(nil) -) diff --git a/go/arrow/compute/datumkind_string.go b/go/arrow/compute/datumkind_string.go deleted file mode 100644 index 3603e5e495414..0000000000000 --- a/go/arrow/compute/datumkind_string.go +++ /dev/null @@ -1,30 +0,0 @@ -// Code generated by "stringer -type=DatumKind -linecomment"; DO NOT EDIT. - -//go:build go1.18 - -package compute - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[KindNone-0] - _ = x[KindScalar-1] - _ = x[KindArray-2] - _ = x[KindChunked-3] - _ = x[KindRecord-4] - _ = x[KindTable-5] -} - -const _DatumKind_name = "nonescalararraychunked_arrayrecord_batchtable" - -var _DatumKind_index = [...]uint8{0, 4, 10, 15, 28, 40, 45} - -func (i DatumKind) String() string { - if i < 0 || i >= DatumKind(len(_DatumKind_index)-1) { - return "DatumKind(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _DatumKind_name[_DatumKind_index[i]:_DatumKind_index[i+1]] -} diff --git a/go/arrow/compute/doc.go b/go/arrow/compute/doc.go deleted file mode 100644 index 7c763cb18d0ff..0000000000000 --- a/go/arrow/compute/doc.go +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package compute is a native-go implementation of an Acero-like -// arrow compute engine. It requires go1.18+ -// -// While consumers of Arrow that are able to use CGO could utilize the -// C Data API (using the cdata package) and could link against the -// acero library directly, there are consumers who cannot use CGO. This -// is an attempt to provide for those users, and in general create a -// native-go arrow compute engine. -// -// The overwhelming majority of things in this package require go1.18 as -// it utilizes generics. The files in this package and its sub-packages -// are all excluded from being built by go versions lower than 1.18 so -// that the larger Arrow module itself is still compatible with go1.17. -// -// Everything in this package should be considered Experimental for now. -package compute - -//go:generate stringer -type=FuncKind -linecomment diff --git a/go/arrow/compute/example_test.go b/go/arrow/compute/example_test.go deleted file mode 100644 index d427fb622d24a..0000000000000 --- a/go/arrow/compute/example_test.go +++ /dev/null @@ -1,91 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute_test - -import ( - "context" - "fmt" - "log" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/memory" -) - -// This example demonstrates how to register a custom scalar function. -func Example_customFunction() { - pool := memory.NewGoAllocator() - - ctx := context.Background() - execCtx := compute.DefaultExecCtx() - ctx = compute.SetExecCtx(ctx, execCtx) - - add42 := compute.NewScalarFunction("add_42", compute.Arity{ - NArgs: 1, - }, compute.FunctionDoc{ - Summary: "Returns the input values plus 42", - ArgNames: []string{"input"}, - }) - - if err := add42.AddNewKernel( - []exec.InputType{ - // We accept a single argument (array) of Int8 type. - { - Kind: exec.InputExact, - Type: arrow.PrimitiveTypes.Int8, - }, - }, - // We'll return a single Int8 array. - exec.NewOutputType(arrow.PrimitiveTypes.Int8), - func(ctx *exec.KernelCtx, span *exec.ExecSpan, result *exec.ExecResult) error { - // The second buffer contains the values. Both for the input and the output arrays. - for i, x := range span.Values[0].Array.Buffers[1].Buf { - result.Buffers[1].Buf[i] = x + 42 - } - return nil - }, - nil, - ); err != nil { - log.Fatal(err) - } - execCtx.Registry.AddFunction(add42, true) - - inputArrayBuilder := array.NewInt8Builder(pool) - for i := 0; i < 16; i++ { - inputArrayBuilder.Append(int8(i)) - } - inputArray := inputArrayBuilder.NewArray() - - outputArrayDatum, err := compute.CallFunction( - compute.SetExecCtx(context.Background(), execCtx), - "add_42", - nil, - &compute.ArrayDatum{Value: inputArray.Data()}, - ) - if err != nil { - log.Fatal(err) - } - - fmt.Println(array.NewInt8Data(outputArrayDatum.(*compute.ArrayDatum).Value).Int8Values()) - - // Output: - // [42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57] -} diff --git a/go/arrow/compute/exec.go b/go/arrow/compute/exec.go deleted file mode 100644 index 1142297c1c396..0000000000000 --- a/go/arrow/compute/exec.go +++ /dev/null @@ -1,199 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "context" - "fmt" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/internal/debug" -) - -func haveChunkedArray(values []Datum) bool { - for _, v := range values { - if v.Kind() == KindChunked { - return true - } - } - return false -} - -// ExecSpanFromBatch constructs and returns a new ExecSpan from the values -// inside of the ExecBatch which could be scalar or arrays. -// -// This is mostly used for tests but is also a convenience method for other -// cases. -func ExecSpanFromBatch(batch *ExecBatch) *exec.ExecSpan { - out := &exec.ExecSpan{Len: batch.Len, Values: make([]exec.ExecValue, len(batch.Values))} - for i, v := range batch.Values { - outVal := &out.Values[i] - if v.Kind() == KindScalar { - outVal.Scalar = v.(*ScalarDatum).Value - } else { - outVal.Array.SetMembers(v.(*ArrayDatum).Value) - outVal.Scalar = nil - } - } - return out -} - -// this is the primary driver of execution -func execInternal(ctx context.Context, fn Function, opts FunctionOptions, passedLen int64, args ...Datum) (result Datum, err error) { - if opts == nil { - if err = checkOptions(fn, opts); err != nil { - return - } - opts = fn.DefaultOptions() - } - - // we only allow Array, ChunkedArray, and Scalars for now. - // RecordBatch and Table datums are disallowed. - if err = checkAllIsValue(args); err != nil { - return - } - - inTypes := make([]arrow.DataType, len(args)) - for i, a := range args { - inTypes[i] = a.(ArrayLikeDatum).Type() - } - - var ( - k exec.Kernel - executor KernelExecutor - ) - - switch fn.Kind() { - case FuncScalar: - executor = scalarExecPool.Get().(*scalarExecutor) - defer func() { - executor.Clear() - scalarExecPool.Put(executor.(*scalarExecutor)) - }() - case FuncVector: - executor = vectorExecPool.Get().(*vectorExecutor) - defer func() { - executor.Clear() - vectorExecPool.Put(executor.(*vectorExecutor)) - }() - default: - return nil, fmt.Errorf("%w: direct execution of %s", arrow.ErrNotImplemented, fn.Kind()) - } - - if k, err = fn.DispatchBest(inTypes...); err != nil { - return - } - - var newArgs []Datum - // cast arguments if necessary - for i, arg := range args { - if !arrow.TypeEqual(inTypes[i], arg.(ArrayLikeDatum).Type()) { - if newArgs == nil { - newArgs = make([]Datum, len(args)) - copy(newArgs, args) - } - newArgs[i], err = CastDatum(ctx, arg, SafeCastOptions(inTypes[i])) - if err != nil { - return nil, err - } - defer newArgs[i].Release() - } - } - if newArgs != nil { - args = newArgs - } - - kctx := &exec.KernelCtx{Ctx: ctx, Kernel: k} - init := k.GetInitFn() - kinitArgs := exec.KernelInitArgs{Kernel: k, Inputs: inTypes, Options: opts} - if init != nil { - kctx.State, err = init(kctx, kinitArgs) - if err != nil { - return - } - } - - if err = executor.Init(kctx, kinitArgs); err != nil { - return - } - - input := ExecBatch{Values: args, Len: 0} - if input.NumValues() == 0 { - if passedLen != -1 { - input.Len = passedLen - } - } else { - inferred, allSame := inferBatchLength(input.Values) - input.Len = inferred - switch fn.Kind() { - case FuncScalar: - if passedLen != -1 && passedLen != inferred { - return nil, fmt.Errorf("%w: passed batch length for execution did not match actual length for scalar fn execution", - arrow.ErrInvalid) - } - case FuncVector: - vkernel := k.(*exec.VectorKernel) - if !(allSame || !vkernel.CanExecuteChunkWise) { - return nil, fmt.Errorf("%w: vector kernel arguments must all be the same length", arrow.ErrInvalid) - } - } - } - - ectx := GetExecCtx(ctx) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - ch := make(chan Datum, ectx.ExecChannelSize) - go func() { - defer close(ch) - if err = executor.Execute(ctx, &input, ch); err != nil { - cancel() - } - }() - - result = executor.WrapResults(ctx, ch, haveChunkedArray(input.Values)) - if err == nil { - debug.Assert(executor.CheckResultType(result) == nil, "invalid result type") - } - - if ctx.Err() == context.Canceled && result != nil { - result.Release() - } - - return -} - -// CallFunction is a one-shot invoker for all types of functions. -// -// It will perform kernel-dispatch, argument checking, iteration of -// ChunkedArray inputs and wrapping of outputs. -// -// To affect the execution options, you must call SetExecCtx and pass -// the resulting context in here. -func CallFunction(ctx context.Context, funcName string, opts FunctionOptions, args ...Datum) (Datum, error) { - ectx := GetExecCtx(ctx) - fn, ok := ectx.Registry.GetFunction(funcName) - if !ok { - return nil, fmt.Errorf("%w: function '%s' not found", arrow.ErrKey, funcName) - } - - return fn.Execute(ctx, opts, args...) -} diff --git a/go/arrow/compute/exec/hash_util.go b/go/arrow/compute/exec/hash_util.go deleted file mode 100644 index 0c8f7df5a3237..0000000000000 --- a/go/arrow/compute/exec/hash_util.go +++ /dev/null @@ -1,24 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package exec - -// ADAPTED FROM HASH UTILITIES FOR BOOST - -func HashCombine(seed, value uint64) uint64 { - seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2) - return seed -} diff --git a/go/arrow/compute/exec/kernel.go b/go/arrow/compute/exec/kernel.go deleted file mode 100644 index 600e52c681686..0000000000000 --- a/go/arrow/compute/exec/kernel.go +++ /dev/null @@ -1,695 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exec - -import ( - "context" - "fmt" - "hash/maphash" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "golang.org/x/exp/slices" -) - -var hashSeed = maphash.MakeSeed() - -type ctxAllocKey struct{} - -// WithAllocator returns a new context with the provided allocator -// embedded into the context. -func WithAllocator(ctx context.Context, mem memory.Allocator) context.Context { - return context.WithValue(ctx, ctxAllocKey{}, mem) -} - -// GetAllocator retrieves the allocator from the context, or returns -// memory.DefaultAllocator if there was no allocator in the provided -// context. -func GetAllocator(ctx context.Context) memory.Allocator { - mem, ok := ctx.Value(ctxAllocKey{}).(memory.Allocator) - if !ok { - return memory.DefaultAllocator - } - return mem -} - -// Kernel defines the minimum interface required for the basic execution -// kernel. It will grow as the implementation requires. -type Kernel interface { - GetInitFn() KernelInitFn - GetSig() *KernelSignature -} - -// NonAggKernel builds on the base Kernel interface for -// non aggregate execution kernels. Specifically this will -// represent Scalar and Vector kernels. -type NonAggKernel interface { - Kernel - Exec(*KernelCtx, *ExecSpan, *ExecResult) error - GetNullHandling() NullHandling - GetMemAlloc() MemAlloc - CanFillSlices() bool -} - -// KernelCtx is a small struct holding the context for a kernel execution -// consisting of a pointer to the kernel, initialized state (if needed) -// and the context for this execution. -type KernelCtx struct { - Ctx context.Context - Kernel Kernel - State KernelState -} - -func (k *KernelCtx) Allocate(bufsize int) *memory.Buffer { - buf := memory.NewResizableBuffer(GetAllocator(k.Ctx)) - buf.Resize(bufsize) - return buf -} - -func (k *KernelCtx) AllocateBitmap(nbits int64) *memory.Buffer { - nbytes := bitutil.BytesForBits(nbits) - return k.Allocate(int(nbytes)) -} - -// TypeMatcher define an interface for matching Input or Output types -// for execution kernels. There are multiple implementations of this -// interface provided by this package. -type TypeMatcher interface { - fmt.Stringer - Matches(typ arrow.DataType) bool - Equals(other TypeMatcher) bool -} - -type sameTypeIDMatcher struct { - accepted arrow.Type -} - -func (s sameTypeIDMatcher) Matches(typ arrow.DataType) bool { return s.accepted == typ.ID() } -func (s sameTypeIDMatcher) Equals(other TypeMatcher) bool { - if s == other { - return true - } - - o, ok := other.(*sameTypeIDMatcher) - if !ok { - return false - } - - return s.accepted == o.accepted -} - -func (s sameTypeIDMatcher) String() string { - return "Type::" + s.accepted.String() -} - -// SameTypeID returns a type matcher which will match -// any DataType that uses the same arrow.Type ID as the one -// passed in here. -func SameTypeID(id arrow.Type) TypeMatcher { return &sameTypeIDMatcher{id} } - -type timeUnitMatcher struct { - id arrow.Type - unit arrow.TimeUnit -} - -func (s timeUnitMatcher) Matches(typ arrow.DataType) bool { - if typ.ID() != s.id { - return false - } - return s.unit == typ.(arrow.TemporalWithUnit).TimeUnit() -} - -func (s timeUnitMatcher) String() string { - return strings.ToLower(s.id.String()) + "(" + s.unit.String() + ")" -} - -func (s *timeUnitMatcher) Equals(other TypeMatcher) bool { - if s == other { - return true - } - - o, ok := other.(*timeUnitMatcher) - if !ok { - return false - } - return o.id == s.id && o.unit == s.unit -} - -// TimestampTypeUnit returns a TypeMatcher that will match only -// a Timestamp datatype with the specified TimeUnit. -func TimestampTypeUnit(unit arrow.TimeUnit) TypeMatcher { - return &timeUnitMatcher{arrow.TIMESTAMP, unit} -} - -// Time32TypeUnit returns a TypeMatcher that will match only -// a Time32 datatype with the specified TimeUnit. -func Time32TypeUnit(unit arrow.TimeUnit) TypeMatcher { - return &timeUnitMatcher{arrow.TIME32, unit} -} - -// Time64TypeUnit returns a TypeMatcher that will match only -// a Time64 datatype with the specified TimeUnit. -func Time64TypeUnit(unit arrow.TimeUnit) TypeMatcher { - return &timeUnitMatcher{arrow.TIME64, unit} -} - -// DurationTypeUnit returns a TypeMatcher that will match only -// a Duration datatype with the specified TimeUnit. -func DurationTypeUnit(unit arrow.TimeUnit) TypeMatcher { - return &timeUnitMatcher{arrow.DURATION, unit} -} - -type integerMatcher struct{} - -func (integerMatcher) String() string { return "integer" } -func (integerMatcher) Matches(typ arrow.DataType) bool { return arrow.IsInteger(typ.ID()) } -func (integerMatcher) Equals(other TypeMatcher) bool { - _, ok := other.(integerMatcher) - return ok -} - -type binaryLikeMatcher struct{} - -func (binaryLikeMatcher) String() string { return "binary-like" } -func (binaryLikeMatcher) Matches(typ arrow.DataType) bool { return arrow.IsBinaryLike(typ.ID()) } -func (binaryLikeMatcher) Equals(other TypeMatcher) bool { - _, ok := other.(binaryLikeMatcher) - return ok -} - -type largeBinaryLikeMatcher struct{} - -func (largeBinaryLikeMatcher) String() string { return "large-binary-like" } -func (largeBinaryLikeMatcher) Matches(typ arrow.DataType) bool { - return arrow.IsLargeBinaryLike(typ.ID()) -} -func (largeBinaryLikeMatcher) Equals(other TypeMatcher) bool { - _, ok := other.(largeBinaryLikeMatcher) - return ok -} - -type fsbLikeMatcher struct{} - -func (fsbLikeMatcher) String() string { return "fixed-size-binary-like" } -func (fsbLikeMatcher) Matches(typ arrow.DataType) bool { return arrow.IsFixedSizeBinary(typ.ID()) } -func (fsbLikeMatcher) Equals(other TypeMatcher) bool { - _, ok := other.(fsbLikeMatcher) - return ok -} - -// Integer returns a TypeMatcher which will match any integral type like int8 or uint16 -func Integer() TypeMatcher { return integerMatcher{} } - -// BinaryLike returns a TypeMatcher that will match Binary or String -func BinaryLike() TypeMatcher { return binaryLikeMatcher{} } - -// LargeBinaryLike returns a TypeMatcher which will match LargeBinary or LargeString -func LargeBinaryLike() TypeMatcher { return largeBinaryLikeMatcher{} } - -// FixedSizeBinaryLike returns a TypeMatcher that will match FixedSizeBinary -// or Decimal128/256 -func FixedSizeBinaryLike() TypeMatcher { return fsbLikeMatcher{} } - -type primitiveMatcher struct{} - -func (primitiveMatcher) String() string { return "primitive" } -func (primitiveMatcher) Matches(typ arrow.DataType) bool { return arrow.IsPrimitive(typ.ID()) } -func (primitiveMatcher) Equals(other TypeMatcher) bool { - _, ok := other.(primitiveMatcher) - return ok -} - -// Primitive returns a TypeMatcher that will match any type that arrow.IsPrimitive -// returns true for. -func Primitive() TypeMatcher { return primitiveMatcher{} } - -type reeMatcher struct { - runEndsMatcher TypeMatcher - encodedMatcher TypeMatcher -} - -func (r reeMatcher) Matches(typ arrow.DataType) bool { - if typ.ID() != arrow.RUN_END_ENCODED { - return false - } - - dt := typ.(*arrow.RunEndEncodedType) - return r.runEndsMatcher.Matches(dt.RunEnds()) && r.encodedMatcher.Matches(dt.Encoded()) -} - -func (r reeMatcher) Equals(other TypeMatcher) bool { - o, ok := other.(reeMatcher) - if !ok { - return false - } - return r.runEndsMatcher.Equals(o.runEndsMatcher) && r.encodedMatcher.Equals(o.encodedMatcher) -} - -func (r reeMatcher) String() string { - return "run_end_encoded(run_ends=" + r.runEndsMatcher.String() + ", values=" + r.encodedMatcher.String() + ")" -} - -// RunEndEncoded returns a matcher which matches a RunEndEncoded -// type whose encoded type is matched by the passed in matcher. -func RunEndEncoded(runEndsMatcher, encodedMatcher TypeMatcher) TypeMatcher { - return reeMatcher{ - runEndsMatcher: runEndsMatcher, - encodedMatcher: encodedMatcher} -} - -// InputKind is an enum representing the type of Input matching -// that will be done. Either accepting any type, an exact specific type -// or using a TypeMatcher. -type InputKind int8 - -const ( - InputAny InputKind = iota - InputExact - InputUseMatcher -) - -// InputType is used for type checking arguments passed to a kernel -// and stored within a KernelSignature. The type-checking rule can -// be supplied either with an exact DataType instance or a custom -// TypeMatcher. -type InputType struct { - Kind InputKind - Type arrow.DataType - Matcher TypeMatcher -} - -func NewExactInput(dt arrow.DataType) InputType { return InputType{Kind: InputExact, Type: dt} } -func NewMatchedInput(match TypeMatcher) InputType { - return InputType{Kind: InputUseMatcher, Matcher: match} -} -func NewIDInput(id arrow.Type) InputType { return NewMatchedInput(SameTypeID(id)) } - -func (it InputType) MatchID() arrow.Type { - switch it.Kind { - case InputExact: - return it.Type.ID() - case InputUseMatcher: - if idMatch, ok := it.Matcher.(*sameTypeIDMatcher); ok { - return idMatch.accepted - } - } - debug.Assert(false, "MatchID called on non-id matching InputType") - return -1 -} - -func (it InputType) String() string { - switch it.Kind { - case InputAny: - return "any" - case InputUseMatcher: - return it.Matcher.String() - case InputExact: - return it.Type.String() - } - return "" -} - -func (it *InputType) Equals(other *InputType) bool { - if it == other { - return true - } - - if it.Kind != other.Kind { - return false - } - - switch it.Kind { - case InputAny: - return true - case InputExact: - return arrow.TypeEqual(it.Type, other.Type) - case InputUseMatcher: - return it.Matcher.Equals(other.Matcher) - default: - return false - } -} - -func (it InputType) Hash() uint64 { - var h maphash.Hash - - h.SetSeed(hashSeed) - result := HashCombine(h.Sum64(), uint64(it.Kind)) - switch it.Kind { - case InputExact: - result = HashCombine(result, arrow.HashType(hashSeed, it.Type)) - } - return result -} - -func (it InputType) Matches(dt arrow.DataType) bool { - switch it.Kind { - case InputExact: - return arrow.TypeEqual(it.Type, dt) - case InputUseMatcher: - return it.Matcher.Matches(dt) - case InputAny: - return true - default: - debug.Assert(false, "invalid InputKind") - return true - } -} - -// ResolveKind defines the way that a particular OutputType resolves -// its type. Either it has a fixed type to resolve to or it contains -// a Resolver which will compute the resolved type based on -// the input types. -type ResolveKind int8 - -const ( - ResolveFixed ResolveKind = iota - ResolveComputed -) - -// TypeResolver is simply a function that takes a KernelCtx and a list of input types -// and returns the resolved type or an error. -type TypeResolver = func(*KernelCtx, []arrow.DataType) (arrow.DataType, error) - -type OutputType struct { - Kind ResolveKind - Type arrow.DataType - Resolver TypeResolver -} - -func NewOutputType(dt arrow.DataType) OutputType { - return OutputType{Kind: ResolveFixed, Type: dt} -} - -func NewComputedOutputType(resolver TypeResolver) OutputType { - return OutputType{Kind: ResolveComputed, Resolver: resolver} -} - -func (o OutputType) String() string { - if o.Kind == ResolveFixed { - return o.Type.String() - } - return "computed" -} - -func (o OutputType) Resolve(ctx *KernelCtx, types []arrow.DataType) (arrow.DataType, error) { - switch o.Kind { - case ResolveFixed: - return o.Type, nil - } - - return o.Resolver(ctx, types) -} - -// NullHandling is an enum representing how a particular Kernel -// wants the executor to handle nulls. -type NullHandling int8 - -const ( - // Compute the output validity bitmap by intersection the validity - // bitmaps of the arguments using bitwise-and operations. This means - // that values in the output are valid/non-null only if the corresponding - // values in all input arguments were valid/non-null. Kernels generally - // do not have to touch the bitmap afterwards, but a kernel's exec function - // is permitted to alter the bitmap after the null intersection is computed - // if necessary. - NullIntersection NullHandling = iota - // Kernel expects a pre-allocated buffer to write the result bitmap - // into. - NullComputedPrealloc - // Kernel will allocate and set the validity bitmap of the output - NullComputedNoPrealloc - // kernel output is never null and a validity bitmap doesn't need to - // be allocated - NullNoOutput -) - -// MemAlloc is the preference for preallocating memory of fixed-width -// type outputs during kernel execution. -type MemAlloc int8 - -const ( - // For data types that support pre-allocation (fixed-width), the - // kernel expects to be provided a pre-allocated buffer to write into. - // Non-fixed-width types must always allocate their own buffers. - // The allocation is made for the same length as the execution batch, - // so vector kernels yielding differently sized outputs should not - // use this. - // - // It is valid for the data to not be preallocated but the validity - // bitmap is (or is computed using intersection). - // - // For variable-size output types like Binary or String, or for nested - // types, this option has no effect. - MemPrealloc MemAlloc = iota - // The kernel is responsible for allocating its own data buffer - // for fixed-width output types. - MemNoPrealloc -) - -type KernelState any - -// KernelInitArgs are the arguments required to initialize an Kernel's -// state using the input types and any options. -type KernelInitArgs struct { - Kernel Kernel - Inputs []arrow.DataType - // Options are opaque and specific to the Kernel being initialized, - // may be nil if the kernel doesn't require options. - Options any -} - -// KernelInitFn is any function that receives a KernelCtx and initialization -// arguments and returns the initialized state or an error. -type KernelInitFn = func(*KernelCtx, KernelInitArgs) (KernelState, error) - -// KernelSignature holds the input and output types for a kernel. -// -// Variable argument functions with a minimum of N arguments should pass -// up to N input types to be used to validate for invocation. The first -// N-1 types will be matched against the first N-1 arguments and the last -// type will be matched against the remaining arguments. -type KernelSignature struct { - InputTypes []InputType - OutType OutputType - IsVarArgs bool - - // store the hashcode after it is computed so we don't - // need to recompute it - hashCode uint64 -} - -func (k KernelSignature) String() string { - var b strings.Builder - if k.IsVarArgs { - b.WriteString("varargs[") - } else { - b.WriteByte('(') - } - - for i, t := range k.InputTypes { - if i != 0 { - b.WriteString(", ") - } - b.WriteString(t.String()) - } - if k.IsVarArgs { - b.WriteString("*]") - } else { - b.WriteByte(')') - } - - b.WriteString(" -> ") - b.WriteString(k.OutType.String()) - return b.String() -} - -func (k KernelSignature) Equals(other KernelSignature) bool { - if k.IsVarArgs != other.IsVarArgs { - return false - } - - return slices.EqualFunc(k.InputTypes, other.InputTypes, func(e1, e2 InputType) bool { - return e1.Equals(&e2) - }) -} - -func (k *KernelSignature) Hash() uint64 { - if k.hashCode != 0 { - return k.hashCode - } - - var h maphash.Hash - h.SetSeed(hashSeed) - result := h.Sum64() - for _, typ := range k.InputTypes { - result = HashCombine(result, typ.Hash()) - } - k.hashCode = result - return result -} - -func (k KernelSignature) MatchesInputs(types []arrow.DataType) bool { - switch k.IsVarArgs { - case true: - // check that it has enough to match at least the non-vararg types - if len(types) < (len(k.InputTypes) - 1) { - return false - } - - for i, t := range types { - if !k.InputTypes[Min(i, len(k.InputTypes)-1)].Matches(t) { - return false - } - } - case false: - if len(types) != len(k.InputTypes) { - return false - } - for i, t := range types { - if !k.InputTypes[i].Matches(t) { - return false - } - } - } - return true -} - -// ArrayKernelExec is an alias definition for a kernel's execution function. -// -// This is used for both stateless and stateful kernels. If a kernel -// depends on some execution state, it can be accessed from the KernelCtx -// object, which also contains the context.Context object which can be -// used for shortcircuiting by checking context.Done / context.Err. -// This allows kernels to control handling timeouts or cancellation of -// computation. -type ArrayKernelExec = func(*KernelCtx, *ExecSpan, *ExecResult) error - -type kernel struct { - Init KernelInitFn - Signature *KernelSignature - Data KernelState - Parallelizable bool -} - -func (k kernel) GetInitFn() KernelInitFn { return k.Init } -func (k kernel) GetSig() *KernelSignature { return k.Signature } - -// A ScalarKernel is the kernel implementation for a Scalar Function. -// In addition to the members found in the base Kernel, it contains -// the null handling and memory pre-allocation preferences. -type ScalarKernel struct { - kernel - - ExecFn ArrayKernelExec - CanWriteIntoSlices bool - NullHandling NullHandling - MemAlloc MemAlloc -} - -// NewScalarKernel constructs a new kernel for scalar execution, constructing -// a KernelSignature with the provided input types and output type, and using -// the passed in execution implementation and initialization function. -func NewScalarKernel(in []InputType, out OutputType, exec ArrayKernelExec, init KernelInitFn) ScalarKernel { - return NewScalarKernelWithSig(&KernelSignature{ - InputTypes: in, - OutType: out, - }, exec, init) -} - -// NewScalarKernelWithSig is a convenience when you already have a signature -// to use for constructing a kernel. It's equivalent to passing the components -// of the signature (input and output types) to NewScalarKernel. -func NewScalarKernelWithSig(sig *KernelSignature, exec ArrayKernelExec, init KernelInitFn) ScalarKernel { - return ScalarKernel{ - kernel: kernel{Signature: sig, Init: init, Parallelizable: true}, - ExecFn: exec, - CanWriteIntoSlices: true, - NullHandling: NullIntersection, - MemAlloc: MemPrealloc, - } -} - -func (s *ScalarKernel) Exec(ctx *KernelCtx, sp *ExecSpan, out *ExecResult) error { - return s.ExecFn(ctx, sp, out) -} - -func (s ScalarKernel) GetNullHandling() NullHandling { return s.NullHandling } -func (s ScalarKernel) GetMemAlloc() MemAlloc { return s.MemAlloc } -func (s ScalarKernel) CanFillSlices() bool { return s.CanWriteIntoSlices } - -// ChunkedExec is the signature for executing a stateful vector kernel -// against a ChunkedArray input. It is optional -type ChunkedExec func(*KernelCtx, []*arrow.Chunked, *ExecResult) ([]*ExecResult, error) - -// FinalizeFunc is an optional finalizer function for any postprocessing -// that may need to be done on data before returning it -type FinalizeFunc func(*KernelCtx, []*ArraySpan) ([]*ArraySpan, error) - -// VectorKernel is a structure for implementations of vector functions. -// It can optionally contain a finalizer function, the null handling -// and memory pre-allocation preferences (different defaults from -// scalar kernels when using NewVectorKernel), and other execution related -// options. -type VectorKernel struct { - kernel - - ExecFn ArrayKernelExec - ExecChunked ChunkedExec - Finalize FinalizeFunc - NullHandling NullHandling - MemAlloc MemAlloc - CanWriteIntoSlices bool - CanExecuteChunkWise bool - OutputChunked bool -} - -// NewVectorKernel constructs a new kernel for execution of vector functions, -// which take into account more than just the individual scalar values -// of its input. Output of a vector kernel may be a different length -// than its inputs. -func NewVectorKernel(inTypes []InputType, outType OutputType, exec ArrayKernelExec, init KernelInitFn) VectorKernel { - return NewVectorKernelWithSig(&KernelSignature{ - InputTypes: inTypes, OutType: outType}, exec, init) -} - -// NewVectorKernelWithSig is a convenience function for creating a kernel -// when you already have a signature constructed. -func NewVectorKernelWithSig(sig *KernelSignature, exec ArrayKernelExec, init KernelInitFn) VectorKernel { - return VectorKernel{ - kernel: kernel{Signature: sig, Init: init, Parallelizable: true}, - ExecFn: exec, - CanWriteIntoSlices: true, - CanExecuteChunkWise: true, - OutputChunked: true, - NullHandling: NullComputedNoPrealloc, - MemAlloc: MemNoPrealloc, - } -} - -func (s *VectorKernel) Exec(ctx *KernelCtx, sp *ExecSpan, out *ExecResult) error { - return s.ExecFn(ctx, sp, out) -} - -func (s VectorKernel) GetNullHandling() NullHandling { return s.NullHandling } -func (s VectorKernel) GetMemAlloc() MemAlloc { return s.MemAlloc } -func (s VectorKernel) CanFillSlices() bool { return s.CanWriteIntoSlices } diff --git a/go/arrow/compute/exec/kernel_test.go b/go/arrow/compute/exec/kernel_test.go deleted file mode 100644 index 248bad323a307..0000000000000 --- a/go/arrow/compute/exec/kernel_test.go +++ /dev/null @@ -1,588 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exec_test - -import ( - "fmt" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/stretchr/testify/assert" -) - -func TestTypeMatcherSameTypeID(t *testing.T) { - matcher := exec.SameTypeID(arrow.DECIMAL128) - assert.True(t, matcher.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 2})) - assert.False(t, matcher.Matches(arrow.PrimitiveTypes.Int8)) - - assert.Equal(t, "Type::DECIMAL128", matcher.String()) - - assert.True(t, matcher.Equals(matcher)) - assert.True(t, matcher.Equals(exec.SameTypeID(arrow.DECIMAL))) - assert.False(t, matcher.Equals(exec.SameTypeID(arrow.TIMESTAMP))) - assert.False(t, matcher.Equals(exec.Time32TypeUnit(arrow.Microsecond))) -} - -func TestTypeMatcherTimestampTypeUnit(t *testing.T) { - matcher := exec.TimestampTypeUnit(arrow.Millisecond) - matcher2 := exec.Time32TypeUnit(arrow.Millisecond) - matcher3 := exec.Time64TypeUnit(arrow.Microsecond) - matcher4 := exec.DurationTypeUnit(arrow.Microsecond) - - assert.True(t, matcher.Matches(arrow.FixedWidthTypes.Timestamp_ms)) - assert.True(t, matcher.Matches(&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "utc"})) - assert.False(t, matcher.Matches(arrow.FixedWidthTypes.Timestamp_s)) - assert.False(t, matcher.Matches(arrow.FixedWidthTypes.Time32ms)) - assert.True(t, matcher2.Matches(arrow.FixedWidthTypes.Time32ms)) - - assert.True(t, matcher3.Matches(arrow.FixedWidthTypes.Time64us)) - assert.False(t, matcher3.Matches(arrow.FixedWidthTypes.Time64ns)) - assert.True(t, matcher4.Matches(arrow.FixedWidthTypes.Duration_us)) - assert.False(t, matcher4.Matches(arrow.FixedWidthTypes.Duration_ms)) - - // check String() representation - assert.Equal(t, "timestamp(s)", exec.TimestampTypeUnit(arrow.Second).String()) - assert.Equal(t, "timestamp(ms)", exec.TimestampTypeUnit(arrow.Millisecond).String()) - assert.Equal(t, "timestamp(us)", exec.TimestampTypeUnit(arrow.Microsecond).String()) - assert.Equal(t, "timestamp(ns)", exec.TimestampTypeUnit(arrow.Nanosecond).String()) - - // equals implementation - assert.True(t, matcher.Equals(matcher)) - assert.True(t, matcher.Equals(exec.TimestampTypeUnit(arrow.Millisecond))) - assert.False(t, matcher.Equals(exec.TimestampTypeUnit(arrow.Microsecond))) - assert.False(t, matcher.Equals(exec.Time32TypeUnit(arrow.Millisecond))) - assert.False(t, matcher3.Equals(matcher2)) - assert.False(t, matcher4.Equals(matcher3)) - assert.True(t, matcher4.Equals(exec.DurationTypeUnit(arrow.Microsecond))) - assert.False(t, matcher.Equals(exec.SameTypeID(arrow.TIMESTAMP))) -} - -func TestIntegerMatcher(t *testing.T) { - match := exec.Integer() - - assert.Equal(t, "integer", match.String()) - assert.True(t, match.Matches(arrow.PrimitiveTypes.Int8)) - assert.True(t, match.Matches(arrow.PrimitiveTypes.Uint64)) - assert.True(t, match.Equals(exec.Integer())) - assert.False(t, match.Equals(exec.BinaryLike())) -} - -func TestBinaryLikeMatcher(t *testing.T) { - match := exec.BinaryLike() - - assert.Equal(t, "binary-like", match.String()) - assert.True(t, match.Matches(arrow.BinaryTypes.String)) - assert.True(t, match.Matches(arrow.BinaryTypes.Binary)) - assert.False(t, match.Matches(arrow.BinaryTypes.LargeString)) - assert.False(t, match.Matches(arrow.BinaryTypes.LargeBinary)) - assert.False(t, match.Equals(exec.LargeBinaryLike())) - assert.True(t, match.Equals(exec.BinaryLike())) -} - -func TestLargeBinaryLikeMatcher(t *testing.T) { - match := exec.LargeBinaryLike() - - assert.Equal(t, "large-binary-like", match.String()) - assert.False(t, match.Matches(arrow.BinaryTypes.String)) - assert.False(t, match.Matches(arrow.BinaryTypes.Binary)) - assert.True(t, match.Matches(arrow.BinaryTypes.LargeString)) - assert.True(t, match.Matches(arrow.BinaryTypes.LargeBinary)) - assert.True(t, match.Equals(exec.LargeBinaryLike())) - assert.False(t, match.Equals(exec.BinaryLike())) -} - -func TestFixedSizeBinaryMatcher(t *testing.T) { - match := exec.FixedSizeBinaryLike() - - assert.Equal(t, "fixed-size-binary-like", match.String()) - assert.False(t, match.Matches(arrow.BinaryTypes.String)) - assert.True(t, match.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 5})) - assert.True(t, match.Matches(&arrow.Decimal256Type{Precision: 12, Scale: 10})) - assert.True(t, match.Matches(&arrow.FixedSizeBinaryType{})) - assert.False(t, match.Equals(exec.LargeBinaryLike())) - assert.True(t, match.Equals(exec.FixedSizeBinaryLike())) -} - -func TestPrimitiveMatcher(t *testing.T) { - match := exec.Primitive() - - assert.Equal(t, "primitive", match.String()) - assert.True(t, match.Equals(exec.Primitive())) - - types := []arrow.DataType{ - arrow.FixedWidthTypes.Boolean, - arrow.PrimitiveTypes.Uint8, - arrow.PrimitiveTypes.Int8, - arrow.PrimitiveTypes.Uint16, - arrow.PrimitiveTypes.Int16, - arrow.PrimitiveTypes.Uint32, - arrow.PrimitiveTypes.Int32, - arrow.PrimitiveTypes.Uint64, - arrow.PrimitiveTypes.Int64, - arrow.FixedWidthTypes.Float16, - arrow.PrimitiveTypes.Float32, - arrow.PrimitiveTypes.Float64, - arrow.FixedWidthTypes.Date32, - arrow.FixedWidthTypes.Date64, - arrow.FixedWidthTypes.Time32ms, - arrow.FixedWidthTypes.Time64ns, - arrow.FixedWidthTypes.Timestamp_ms, - arrow.FixedWidthTypes.Duration_ms, - arrow.FixedWidthTypes.MonthInterval, - arrow.FixedWidthTypes.DayTimeInterval, - arrow.FixedWidthTypes.MonthDayNanoInterval, - } - - for _, typ := range types { - assert.True(t, match.Matches(typ)) - } - - assert.False(t, match.Matches(arrow.Null)) -} - -func TestREEMatcher(t *testing.T) { - tests := []struct { - runEnds exec.TypeMatcher - enc exec.TypeMatcher - matchRunEnds arrow.DataType - nomatchRunEnds arrow.DataType - matchEnc arrow.DataType - nomatchEnc arrow.DataType - }{ - {exec.Integer(), exec.Integer(), arrow.PrimitiveTypes.Int16, arrow.FixedWidthTypes.Float16, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String}, - {exec.SameTypeID(arrow.INT32), exec.BinaryLike(), arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int64, arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32}, - {exec.SameTypeID(arrow.INT64), exec.SameTypeID(arrow.STRUCT), arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int32, arrow.StructOf(arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int16}), arrow.PrimitiveTypes.Int8}, - } - - for _, tt := range tests { - t.Run(tt.enc.String(), func(t *testing.T) { - matcher := exec.RunEndEncoded(tt.runEnds, tt.enc) - assert.False(t, matcher.Matches(tt.matchEnc)) - assert.True(t, matcher.Matches(arrow.RunEndEncodedOf(tt.matchRunEnds, tt.matchEnc))) - assert.False(t, matcher.Matches(arrow.RunEndEncodedOf(tt.matchRunEnds, tt.nomatchEnc))) - assert.False(t, matcher.Matches(arrow.RunEndEncodedOf(tt.nomatchRunEnds, tt.matchEnc))) - assert.False(t, matcher.Matches(arrow.RunEndEncodedOf(tt.nomatchRunEnds, tt.nomatchEnc))) - - assert.Equal(t, "run_end_encoded(run_ends="+tt.runEnds.String()+", values="+tt.enc.String()+")", matcher.String()) - - assert.True(t, matcher.Equals(exec.RunEndEncoded(tt.runEnds, tt.enc))) - assert.False(t, matcher.Equals(exec.Primitive())) - assert.False(t, matcher.Equals(exec.RunEndEncoded(exec.SameTypeID(tt.nomatchRunEnds.ID()), exec.SameTypeID(tt.nomatchEnc.ID())))) - assert.False(t, matcher.Equals(exec.RunEndEncoded(exec.SameTypeID(tt.matchRunEnds.ID()), exec.SameTypeID(tt.nomatchEnc.ID())))) - assert.False(t, matcher.Equals(exec.RunEndEncoded(exec.SameTypeID(tt.nomatchRunEnds.ID()), exec.SameTypeID(tt.matchEnc.ID())))) - }) - } -} - -func TestInputTypeAnyType(t *testing.T) { - var ty exec.InputType - assert.Equal(t, exec.InputAny, ty.Kind) -} - -func TestInputType(t *testing.T) { - ty1 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) - assert.Equal(t, exec.InputExact, ty1.Kind) - assert.True(t, arrow.TypeEqual(arrow.PrimitiveTypes.Int8, ty1.Type)) - assert.Equal(t, "int8", ty1.String()) - - ty2 := exec.NewIDInput(arrow.DECIMAL) - assert.Equal(t, exec.InputUseMatcher, ty2.Kind) - assert.Equal(t, "Type::DECIMAL128", ty2.String()) - assert.True(t, ty2.Matcher.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 2})) - assert.False(t, ty2.Matcher.Matches(arrow.PrimitiveTypes.Int16)) - - ty3 := exec.NewMatchedInput(exec.TimestampTypeUnit(arrow.Microsecond)) - assert.Equal(t, "timestamp(us)", ty3.String()) - - var ty4 exec.InputType - assert.Equal(t, "any", ty4.String()) - // InputAny matches anything - assert.True(t, ty4.Matches((arrow.DataType)(nil))) -} - -func TestInputTypeEquals(t *testing.T) { - t1 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) - t2 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) - t3 := exec.NewExactInput(arrow.PrimitiveTypes.Int32) - - t5 := exec.NewIDInput(arrow.DECIMAL) - t6 := exec.NewIDInput(arrow.DECIMAL) - - assert.True(t, t1.Equals(&t2)) - assert.False(t, t1.Equals(&t3)) - assert.False(t, t1.Equals(&t5)) - assert.True(t, t5.Equals(&t5)) - assert.True(t, t5.Equals(&t6)) - - var ty exec.InputType - assert.True(t, ty.Equals(&exec.InputType{Kind: exec.InputAny})) - - // for now, an ID matcher for arrow.INT32 and a ExactInput for - // arrow.PrimitiveTypes.Int32 are treated as being different. - // this could be made equivalent later if desireable - - // check that field metadata is excluded from equality checks - t7 := exec.NewExactInput(arrow.ListOfField( - arrow.Field{Name: "item", Type: arrow.BinaryTypes.String, - Nullable: true, Metadata: arrow.NewMetadata([]string{"foo"}, []string{"bar"})})) - t8 := exec.NewExactInput(arrow.ListOf(arrow.BinaryTypes.String)) - assert.True(t, t7.Equals(&t8)) -} - -func TestInputTypeHash(t *testing.T) { - var ( - t0 exec.InputType - t1 = exec.NewExactInput(arrow.PrimitiveTypes.Int8) - t2 = exec.NewIDInput(arrow.DECIMAL) - ) - - // these checks try to determine first of all whether hash - // always returns the same value, and whether the elements - // of the type are all incorporated into the hash - assert.Equal(t, t0.Hash(), t0.Hash()) - assert.Equal(t, t1.Hash(), t1.Hash()) - assert.Equal(t, t2.Hash(), t2.Hash()) - assert.NotEqual(t, t0.Hash(), t1.Hash()) - assert.NotEqual(t, t0.Hash(), t2.Hash()) - assert.NotEqual(t, t1.Hash(), t2.Hash()) -} - -func TestInputTypeMatches(t *testing.T) { - in1 := exec.NewExactInput(arrow.PrimitiveTypes.Int8) - - assert.True(t, in1.Matches(arrow.PrimitiveTypes.Int8)) - assert.False(t, in1.Matches(arrow.PrimitiveTypes.Int16)) - - in2 := exec.NewIDInput(arrow.DECIMAL) - assert.True(t, in2.Matches(&arrow.Decimal128Type{Precision: 12, Scale: 2})) - - ty2 := &arrow.Decimal128Type{Precision: 12, Scale: 2} - ty3 := arrow.PrimitiveTypes.Float64 - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - arr2 := array.MakeArrayOfNull(mem, ty2, 1) - arr3 := array.MakeArrayOfNull(mem, ty3, 1) - defer arr2.Release() - defer arr3.Release() - - scalar2, err := scalar.GetScalar(arr2, 0) - assert.NoError(t, err) - - datumArr := compute.NewDatum(arr2) - defer datumArr.Release() - datumScalar := compute.NewDatum(scalar2) - defer datumScalar.Release() - - assert.False(t, in2.Matches(ty3)) - assert.False(t, in2.Matches(arr3.DataType())) -} - -func TestOutputType(t *testing.T) { - ty1 := exec.NewOutputType(arrow.PrimitiveTypes.Int8) - assert.Equal(t, exec.ResolveFixed, ty1.Kind) - assert.True(t, arrow.TypeEqual(arrow.PrimitiveTypes.Int8, ty1.Type)) - - dummyResolver := func(_ *exec.KernelCtx, args []arrow.DataType) (arrow.DataType, error) { - return arrow.PrimitiveTypes.Int32, nil - } - - ty2 := exec.NewComputedOutputType(dummyResolver) - assert.Equal(t, exec.ResolveComputed, ty2.Kind) - - outType2, err := ty2.Resolve(nil, nil) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, outType2) - - ty3 := ty1 - assert.Equal(t, exec.ResolveFixed, ty3.Kind) - assert.True(t, arrow.TypeEqual(ty1.Type, ty3.Type)) - - ty4 := ty2 - assert.Equal(t, exec.ResolveComputed, ty4.Kind) - outType4, err := ty4.Resolve(nil, nil) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, outType4) - - assert.Equal(t, "int8", ty3.String()) - assert.Equal(t, "computed", ty4.String()) -} - -func TestOutputTypeResolve(t *testing.T) { - ty1 := exec.NewOutputType(arrow.PrimitiveTypes.Int32) - - result, err := ty1.Resolve(nil, nil) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, result) - - result, err = ty1.Resolve(nil, []arrow.DataType{arrow.PrimitiveTypes.Int8}) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, result) - - result, err = ty1.Resolve(nil, []arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int8}) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, result) - - resolver := func(_ *exec.KernelCtx, args []arrow.DataType) (arrow.DataType, error) { - return args[0], nil - } - ty2 := exec.NewComputedOutputType(resolver) - - result, err = ty2.Resolve(nil, []arrow.DataType{arrow.BinaryTypes.String}) - assert.NoError(t, err) - assert.Same(t, arrow.BinaryTypes.String, result) - - // type resolver that returns an error - ty3 := exec.NewComputedOutputType(func(_ *exec.KernelCtx, dt []arrow.DataType) (arrow.DataType, error) { - // checking the value types versus the function arity should be validated - // elsewhere. this is just for illustration purposes - if len(dt) == 0 { - return nil, fmt.Errorf("%w: need at least one argument", arrow.ErrInvalid) - } - return dt[0], nil - }) - - _, err = ty3.Resolve(nil, []arrow.DataType{}) - assert.ErrorIs(t, err, arrow.ErrInvalid) - - // resolver returns a fixed value - ty4 := exec.NewComputedOutputType(func(*exec.KernelCtx, []arrow.DataType) (arrow.DataType, error) { - return arrow.PrimitiveTypes.Int32, nil - }) - result, err = ty4.Resolve(nil, []arrow.DataType{arrow.PrimitiveTypes.Int8}) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, result) - result, err = ty4.Resolve(nil, []arrow.DataType{}) - assert.NoError(t, err) - assert.Same(t, arrow.PrimitiveTypes.Int32, result) -} - -func TestKernelSignatureEquals(t *testing.T) { - sig1 := exec.KernelSignature{ - InputTypes: []exec.InputType{}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String)} - sig1Copy := exec.KernelSignature{ - InputTypes: []exec.InputType{}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String)} - sig2 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - - // output type doesn't matter (for now) - sig3 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.PrimitiveTypes.Int32), - } - - sig4 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewExactInput(arrow.PrimitiveTypes.Int16), - }, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - sig4Copy := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewExactInput(arrow.PrimitiveTypes.Int16), - }, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - sig5 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewExactInput(arrow.PrimitiveTypes.Int16), - exec.NewExactInput(arrow.PrimitiveTypes.Int32), - }, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - - assert.True(t, sig1.Equals(sig1)) - assert.True(t, sig2.Equals(sig3)) - assert.False(t, sig3.Equals(sig4)) - - // different sig objects but same sig - assert.True(t, sig1.Equals(sig1Copy)) - assert.True(t, sig4.Equals(sig4Copy)) - - // match first 2 args, but not third - assert.False(t, sig4.Equals(sig5)) -} - -func TestKernelSignatureVarArgsEqual(t *testing.T) { - sig1 := exec.KernelSignature{ - InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - IsVarArgs: true, - } - sig2 := exec.KernelSignature{ - InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - IsVarArgs: true, - } - sig3 := exec.KernelSignature{ - InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - - assert.True(t, sig1.Equals(sig2)) - assert.False(t, sig2.Equals(sig3)) -} - -func TestKernelSignatureHash(t *testing.T) { - sig1 := exec.KernelSignature{ - InputTypes: []exec.InputType{}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - sig2 := exec.KernelSignature{ - InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - sig3 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewExactInput(arrow.PrimitiveTypes.Int32)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - - assert.Equal(t, sig1.Hash(), sig1.Hash()) - assert.Equal(t, sig2.Hash(), sig2.Hash()) - assert.NotEqual(t, sig1.Hash(), sig2.Hash()) - assert.NotEqual(t, sig2.Hash(), sig3.Hash()) -} - -func TestKernelSignatureMatchesInputs(t *testing.T) { - // () -> boolean - sig1 := exec.KernelSignature{ - OutType: exec.NewOutputType(arrow.FixedWidthTypes.Boolean)} - - assert.True(t, sig1.MatchesInputs([]arrow.DataType{})) - assert.False(t, sig1.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8})) - - // (int8, decimal) -> boolean - sig2 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewIDInput(arrow.DECIMAL)}, - OutType: exec.NewOutputType(arrow.FixedWidthTypes.Boolean), - } - assert.False(t, sig2.MatchesInputs([]arrow.DataType{})) - assert.False(t, sig2.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8})) - assert.True(t, sig2.MatchesInputs([]arrow.DataType{ - arrow.PrimitiveTypes.Int8, - &arrow.Decimal128Type{Precision: 12, Scale: 2}})) - - // (int8, int32) -> boolean - sig3 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewExactInput(arrow.PrimitiveTypes.Int32), - }, - OutType: exec.NewOutputType(arrow.FixedWidthTypes.Boolean), - } - assert.False(t, sig3.MatchesInputs(nil)) - assert.True(t, sig3.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int32})) - assert.False(t, sig3.MatchesInputs([]arrow.DataType{arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int16})) -} - -func TestKernelSignatureVarArgsMatchesInputs(t *testing.T) { - { - sig := exec.KernelSignature{ - InputTypes: []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - IsVarArgs: true, - } - - args := []arrow.DataType{arrow.PrimitiveTypes.Int8} - assert.True(t, sig.MatchesInputs(args)) - args = append(args, arrow.PrimitiveTypes.Int8, arrow.PrimitiveTypes.Int8) - assert.True(t, sig.MatchesInputs(args)) - args = append(args, arrow.PrimitiveTypes.Int32) - assert.False(t, sig.MatchesInputs(args)) - } - { - sig := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewExactInput(arrow.BinaryTypes.String), - }, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - IsVarArgs: true, - } - - args := []arrow.DataType{arrow.PrimitiveTypes.Int8} - assert.True(t, sig.MatchesInputs(args)) - args = append(args, arrow.BinaryTypes.String, arrow.BinaryTypes.String) - assert.True(t, sig.MatchesInputs(args)) - args = append(args, arrow.PrimitiveTypes.Int32) - assert.False(t, sig.MatchesInputs(args)) - } -} - -func TestKernelSignatureToString(t *testing.T) { - inTypes := []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewIDInput(arrow.DECIMAL), - exec.NewExactInput(arrow.BinaryTypes.String), - } - - sig := exec.KernelSignature{ - InputTypes: inTypes, OutType: exec.NewOutputType(arrow.BinaryTypes.String), - } - assert.Equal(t, "(int8, Type::DECIMAL128, utf8) -> utf8", sig.String()) - - outType := exec.NewComputedOutputType(func(*exec.KernelCtx, []arrow.DataType) (arrow.DataType, error) { - return nil, arrow.ErrInvalid - }) - sig2 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8), - exec.NewIDInput(arrow.DECIMAL)}, - OutType: outType, - } - assert.Equal(t, "(int8, Type::DECIMAL128) -> computed", sig2.String()) -} - -func TestKernelSignatureVarArgsToString(t *testing.T) { - sig1 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - IsVarArgs: true, - } - assert.Equal(t, "varargs[int8*] -> utf8", sig1.String()) - - sig2 := exec.KernelSignature{ - InputTypes: []exec.InputType{ - exec.NewExactInput(arrow.BinaryTypes.String), - exec.NewExactInput(arrow.PrimitiveTypes.Int8)}, - OutType: exec.NewOutputType(arrow.BinaryTypes.String), - IsVarArgs: true, - } - assert.Equal(t, "varargs[utf8, int8*] -> utf8", sig2.String()) -} diff --git a/go/arrow/compute/exec/span.go b/go/arrow/compute/exec/span.go deleted file mode 100644 index 6156acfd008aa..0000000000000 --- a/go/arrow/compute/exec/span.go +++ /dev/null @@ -1,634 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exec - -import ( - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" -) - -// BufferSpan is a lightweight Buffer holder for ArraySpans that does not -// take ownership of the underlying memory.Buffer at all or could be -// used to reference raw byte slices instead. -type BufferSpan struct { - // Buf should be the byte slice representing this buffer, if this is - // nil then this bufferspan should be considered empty. - Buf []byte - // Owner should point to an underlying parent memory.Buffer if this - // memory is owned by a different, existing, buffer. Retain is not - // called on this buffer, so it must not be released as long as - // this BufferSpan refers to it. - Owner *memory.Buffer - // SelfAlloc tracks whether or not this bufferspan is the only owner - // of the Owning memory.Buffer. This happens when preallocating - // memory or if a kernel allocates it's own buffer for a result. - // In these cases, we have to know so we can properly maintain the - // refcount if this is later turned into an ArrayData object. - SelfAlloc bool -} - -// SetBuffer sets the given buffer into this BufferSpan and marks -// SelfAlloc as false. This should be called when setting a buffer -// that is externally owned/created. -func (b *BufferSpan) SetBuffer(buf *memory.Buffer) { - b.Buf = buf.Bytes() - b.Owner = buf - b.SelfAlloc = false -} - -// WrapBuffer wraps this bufferspan around a buffer and marks -// SelfAlloc as true. This should be called when setting a buffer -// that was allocated as part of an execution rather than just -// re-using an existing buffer from an input array. -func (b *BufferSpan) WrapBuffer(buf *memory.Buffer) { - b.Buf = buf.Bytes() - b.Owner = buf - b.SelfAlloc = true -} - -// ArraySpan is a light-weight, non-owning version of arrow.ArrayData -// for more efficient handling with computation and engines. We use -// explicit go Arrays to define the buffers and some scratch space -// for easily populating and shifting around pointers to memory without -// having to worry about and deal with retain/release during calculations. -type ArraySpan struct { - Type arrow.DataType - Len int64 - Nulls int64 - Offset int64 - Buffers [3]BufferSpan - - // Scratch is a holding spot for things such as - // offsets or union type codes when converting from scalars - Scratch [2]uint64 - - Children []ArraySpan -} - -// if an error is encountered, call Release on a preallocated span -// to ensure it releases any self-allocated buffers, it will -// not call release on buffers it doesn't own (SelfAlloc != true) -func (a *ArraySpan) Release() { - for _, c := range a.Children { - c.Release() - } - - for _, b := range a.Buffers { - if b.SelfAlloc { - b.Owner.Release() - } - } -} - -func (a *ArraySpan) MayHaveNulls() bool { - return atomic.LoadInt64(&a.Nulls) != 0 && a.Buffers[0].Buf != nil -} - -// UpdateNullCount will count the bits in the null bitmap and update the -// number of nulls if the current null count is unknown, otherwise it just -// returns the value of a.Nulls -func (a *ArraySpan) UpdateNullCount() int64 { - curNulls := atomic.LoadInt64(&a.Nulls) - if curNulls != array.UnknownNullCount { - return curNulls - } - - newNulls := a.Len - int64(bitutil.CountSetBits(a.Buffers[0].Buf, int(a.Offset), int(a.Len))) - atomic.StoreInt64(&a.Nulls, newNulls) - return newNulls -} - -// Dictionary returns a pointer to the array span for the dictionary which -// we will always place as the first (and only) child if it exists. -func (a *ArraySpan) Dictionary() *ArraySpan { return &a.Children[0] } - -// NumBuffers returns the number of expected buffers for this type -func (a *ArraySpan) NumBuffers() int { return getNumBuffers(a.Type) } - -// MakeData generates an arrow.ArrayData object for this ArraySpan, -// properly updating the buffer ref count if necessary. -func (a *ArraySpan) MakeData() arrow.ArrayData { - var bufs [3]*memory.Buffer - for i := range bufs { - b := a.GetBuffer(i) - bufs[i] = b - if b != nil && a.Buffers[i].SelfAlloc { - // if this buffer is just a pointer to another existing buffer - // then we never bumped the refcount for that buffer. - // As a result, we won't call release here so that the call - // to array.NewData properly updates the ref counts of the buffers. - // If instead this buffer was allocated during calculation - // (such as during prealloc or by a kernel itself) - // then we need to release after we create the ArrayData so that it - // maintains the correct refcount of 1, giving the resulting - // ArrayData object ownership of this buffer. - defer b.Release() - } - } - - var ( - nulls = int(atomic.LoadInt64(&a.Nulls)) - length = int(a.Len) - off = int(a.Offset) - dt = a.Type - children []arrow.ArrayData - ) - - if a.Type.ID() == arrow.NULL { - nulls = length - } else if len(a.Buffers[0].Buf) == 0 { - nulls = 0 - } - - // we use a.Type for the NewData call at the end, so we can - // handle extension types by using dt to point to the storage type - // and let the proper extension type get set into the ArrayData - // object we return. - if dt.ID() == arrow.EXTENSION { - dt = dt.(arrow.ExtensionType).StorageType() - } - - if dt.ID() == arrow.DICTIONARY { - result := array.NewData(a.Type, length, bufs[:a.NumBuffers()], nil, nulls, off) - dict := a.Dictionary().MakeData() - defer dict.Release() - result.SetDictionary(dict) - return result - } else if dt.ID() == arrow.DENSE_UNION || dt.ID() == arrow.SPARSE_UNION { - bufs[0] = nil - nulls = 0 - } - - if len(a.Children) > 0 { - children = make([]arrow.ArrayData, len(a.Children)) - for i, c := range a.Children { - d := c.MakeData() - defer d.Release() - children[i] = d - } - } - return array.NewData(a.Type, length, bufs[:a.NumBuffers()], children, nulls, off) -} - -// MakeArray is a convenience function for calling array.MakeFromData(a.MakeData()) -func (a *ArraySpan) MakeArray() arrow.Array { - d := a.MakeData() - defer d.Release() - return array.MakeFromData(d) -} - -// SetSlice updates the offset and length of this ArraySpan to refer to -// a specific slice of the underlying buffers. -func (a *ArraySpan) SetSlice(off, length int64) { - if off == a.Offset && length == a.Len { - // don't modify the nulls if the slice is the entire span - return - } - - if a.Type.ID() != arrow.NULL { - if a.Nulls != 0 { - if a.Nulls == a.Len { - a.Nulls = length - } else { - a.Nulls = array.UnknownNullCount - } - } - } else { - a.Nulls = length - } - - a.Offset, a.Len = off, length -} - -// GetBuffer returns the buffer for the requested index. If this buffer -// is owned by another array/arrayspan the Owning buffer is returned, -// otherwise if this slice has no owning buffer, we call NewBufferBytes -// to wrap it as a memory.Buffer. Can also return nil if there is no -// buffer in this index. -func (a *ArraySpan) GetBuffer(idx int) *memory.Buffer { - buf := a.Buffers[idx] - switch { - case buf.Owner != nil: - return buf.Owner - case buf.Buf != nil: - return memory.NewBufferBytes(buf.Buf) - } - return nil -} - -// convenience function to resize the children slice if necessary, -// or just shrink the slice without re-allocating if there's enough -// capacity already. -func (a *ArraySpan) resizeChildren(i int) { - if cap(a.Children) >= i { - a.Children = a.Children[:i] - } else { - a.Children = make([]ArraySpan, i) - } -} - -// FillFromScalar populates this ArraySpan as if it were a 1 length array -// with the single value equal to the passed in Scalar. -func (a *ArraySpan) FillFromScalar(val scalar.Scalar) { - var ( - trueBit byte = 0x01 - falseBit byte = 0x00 - ) - - a.Type = val.DataType() - a.Len = 1 - typeID := a.Type.ID() - if val.IsValid() { - a.Nulls = 0 - } else { - a.Nulls = 1 - } - - if !arrow.IsUnion(typeID) && typeID != arrow.NULL { - if val.IsValid() { - a.Buffers[0].Buf = []byte{trueBit} - } else { - a.Buffers[0].Buf = []byte{falseBit} - } - a.Buffers[0].Owner = nil - a.Buffers[0].SelfAlloc = false - } - - switch { - case typeID == arrow.BOOL: - if val.(*scalar.Boolean).Value { - a.Buffers[1].Buf = []byte{trueBit} - } else { - a.Buffers[1].Buf = []byte{falseBit} - } - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - case arrow.IsPrimitive(typeID) || arrow.IsDecimal(typeID): - sc := val.(scalar.PrimitiveScalar) - a.Buffers[1].Buf = sc.Data() - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - case typeID == arrow.DICTIONARY: - sc := val.(scalar.PrimitiveScalar) - a.Buffers[1].Buf = sc.Data() - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - a.resizeChildren(1) - a.Children[0].SetMembers(val.(*scalar.Dictionary).Value.Dict.Data()) - case arrow.IsBaseBinary(typeID): - sc := val.(scalar.BinaryScalar) - a.Buffers[1].Buf = arrow.Uint64Traits.CastToBytes(a.Scratch[:]) - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - - var dataBuffer []byte - if sc.IsValid() { - dataBuffer = sc.Data() - a.Buffers[2].Owner = sc.Buffer() - a.Buffers[2].SelfAlloc = false - } - if arrow.IsBinaryLike(typeID) { - setOffsetsForScalar(a, - unsafe.Slice((*int32)(unsafe.Pointer(&a.Scratch[0])), 2), - int64(len(dataBuffer)), 1) - } else { - // large_binary_like - setOffsetsForScalar(a, - unsafe.Slice((*int64)(unsafe.Pointer(&a.Scratch[0])), 2), - int64(len(dataBuffer)), 1) - } - a.Buffers[2].Buf = dataBuffer - case typeID == arrow.FIXED_SIZE_BINARY: - sc := val.(scalar.BinaryScalar) - if !sc.IsValid() { - a.Buffers[1].Buf = make([]byte, sc.DataType().(*arrow.FixedSizeBinaryType).ByteWidth) - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - break - } - a.Buffers[1].Buf = sc.Data() - a.Buffers[1].Owner = sc.Buffer() - a.Buffers[1].SelfAlloc = false - case arrow.IsListLike(typeID): - sc := val.(scalar.ListScalar) - valueLen := 0 - a.resizeChildren(1) - - if sc.GetList() != nil { - a.Children[0].SetMembers(sc.GetList().Data()) - valueLen = sc.GetList().Len() - } else { - // even when the value is null, we must populate - // child data to yield a valid array. ugh - FillZeroLength(sc.DataType().(arrow.NestedType).Fields()[0].Type, &a.Children[0]) - } - - switch typeID { - case arrow.LIST, arrow.MAP: - setOffsetsForScalar(a, - unsafe.Slice((*int32)(unsafe.Pointer(&a.Scratch[0])), 2), - int64(valueLen), 1) - case arrow.LARGE_LIST: - setOffsetsForScalar(a, - unsafe.Slice((*int64)(unsafe.Pointer(&a.Scratch[0])), 2), - int64(valueLen), 1) - default: - // fixed size list has no second buffer - a.Buffers[1].Buf, a.Buffers[1].Owner = nil, nil - a.Buffers[1].SelfAlloc = false - } - case typeID == arrow.STRUCT: - sc := val.(*scalar.Struct) - a.Buffers[1].Buf = nil - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - a.resizeChildren(len(sc.Value)) - for i, v := range sc.Value { - a.Children[i].FillFromScalar(v) - } - case arrow.IsUnion(typeID): - // first buffer is kept null since unions have no validity vector - a.Buffers[0].Buf, a.Buffers[0].Owner = nil, nil - a.Buffers[0].SelfAlloc = false - - a.Buffers[1].Buf = arrow.Uint64Traits.CastToBytes(a.Scratch[:])[:1] - a.Buffers[1].Owner = nil - a.Buffers[1].SelfAlloc = false - codes := unsafe.Slice((*arrow.UnionTypeCode)(unsafe.Pointer(&a.Buffers[1].Buf[0])), 1) - - a.resizeChildren(len(a.Type.(arrow.UnionType).Fields())) - switch sc := val.(type) { - case *scalar.DenseUnion: - codes[0] = sc.TypeCode - // has offset, start 4 bytes in so it's aligned to the 32-bit boundaries - off := unsafe.Slice((*int32)(unsafe.Add(unsafe.Pointer(&a.Scratch[0]), arrow.Int32SizeBytes)), 2) - setOffsetsForScalar(a, off, 1, 2) - // we can't "see" the other arrays in the union, but we put the "active" - // union array in the right place and fill zero-length arrays for - // the others. - childIDS := a.Type.(arrow.UnionType).ChildIDs() - for i, f := range a.Type.(arrow.UnionType).Fields() { - if i == childIDS[sc.TypeCode] { - a.Children[i].FillFromScalar(sc.Value) - } else { - FillZeroLength(f.Type, &a.Children[i]) - } - } - case *scalar.SparseUnion: - codes[0] = sc.TypeCode - // sparse union scalars have a full complement of child values - // even though only one of them is relevant, so we just fill them - // in here - for i, v := range sc.Value { - a.Children[i].FillFromScalar(v) - } - } - case typeID == arrow.EXTENSION: - // pass through storage - sc := val.(*scalar.Extension) - a.FillFromScalar(sc.Value) - // restore the extension type - a.Type = val.DataType() - case typeID == arrow.NULL: - for i := range a.Buffers { - a.Buffers[i].Buf = nil - a.Buffers[i].Owner = nil - a.Buffers[i].SelfAlloc = false - } - } -} - -func (a *ArraySpan) SetDictionary(span *ArraySpan) { - a.resizeChildren(1) - a.Children[0].Release() - a.Children[0] = *span -} - -// TakeOwnership is like SetMembers only this takes ownership of -// the buffers by calling Retain on them so that the passed in -// ArrayData can be released without negatively affecting this -// ArraySpan -func (a *ArraySpan) TakeOwnership(data arrow.ArrayData) { - a.Type = data.DataType() - a.Len = int64(data.Len()) - if a.Type.ID() == arrow.NULL { - a.Nulls = a.Len - } else { - a.Nulls = int64(data.NullN()) - } - a.Offset = int64(data.Offset()) - - for i, b := range data.Buffers() { - if b != nil { - a.Buffers[i].WrapBuffer(b) - b.Retain() - } else { - a.Buffers[i].Buf = nil - a.Buffers[i].Owner = nil - a.Buffers[i].SelfAlloc = false - } - } - - typeID := a.Type.ID() - if a.Buffers[0].Buf == nil { - switch typeID { - case arrow.NULL, arrow.SPARSE_UNION, arrow.DENSE_UNION: - default: - // should already be zero, but we make sure - a.Nulls = 0 - } - } - - for i := len(data.Buffers()); i < 3; i++ { - a.Buffers[i].Buf = nil - a.Buffers[i].Owner = nil - a.Buffers[i].SelfAlloc = false - } - - if typeID == arrow.DICTIONARY { - a.resizeChildren(1) - dict := data.Dictionary() - if dict != (*array.Data)(nil) { - a.Children[0].TakeOwnership(dict) - } - } else { - a.resizeChildren(len(data.Children())) - for i, c := range data.Children() { - a.Children[i].TakeOwnership(c) - } - } -} - -// SetMembers populates this ArraySpan from the given ArrayData object. -// As this is a non-owning reference, the ArrayData object must not -// be fully released while this ArraySpan is in use, otherwise any buffers -// referenced will be released too -func (a *ArraySpan) SetMembers(data arrow.ArrayData) { - a.Type = data.DataType() - a.Len = int64(data.Len()) - if a.Type.ID() == arrow.NULL { - a.Nulls = a.Len - } else { - a.Nulls = int64(data.NullN()) - } - a.Offset = int64(data.Offset()) - - for i, b := range data.Buffers() { - if b != nil { - a.Buffers[i].SetBuffer(b) - } else { - a.Buffers[i].Buf = nil - a.Buffers[i].Owner = nil - a.Buffers[i].SelfAlloc = false - } - } - - typeID := a.Type.ID() - if a.Buffers[0].Buf == nil { - switch typeID { - case arrow.NULL, arrow.SPARSE_UNION, arrow.DENSE_UNION: - default: - // should already be zero, but we make sure - a.Nulls = 0 - } - } - - for i := len(data.Buffers()); i < 3; i++ { - a.Buffers[i].Buf = nil - a.Buffers[i].Owner = nil - a.Buffers[i].SelfAlloc = false - } - - if typeID == arrow.DICTIONARY { - a.resizeChildren(1) - dict := data.Dictionary() - if dict != (*array.Data)(nil) { - a.Children[0].SetMembers(dict) - } - } else { - if cap(a.Children) >= len(data.Children()) { - a.Children = a.Children[:len(data.Children())] - } else { - a.Children = make([]ArraySpan, len(data.Children())) - } - for i, c := range data.Children() { - a.Children[i].SetMembers(c) - } - } -} - -// ExecValue represents a single input to an execution which could -// be either an Array (ArraySpan) or a Scalar value -type ExecValue struct { - Array ArraySpan - Scalar scalar.Scalar -} - -func (e *ExecValue) IsArray() bool { return e.Scalar == nil } -func (e *ExecValue) IsScalar() bool { return !e.IsArray() } - -func (e *ExecValue) Type() arrow.DataType { - if e.IsArray() { - return e.Array.Type - } - return e.Scalar.DataType() -} - -// ExecResult is the result of a kernel execution and should be populated -// by the execution functions and/or a kernel. For now we're just going to -// alias an ArraySpan. -type ExecResult = ArraySpan - -// ExecSpan represents a slice of inputs and is used to provide slices -// of input values to iterate over. -// -// Len is the length of the span (all elements in Values should either -// be scalar or an array with a length + offset of at least Len). -type ExecSpan struct { - Len int64 - Values []ExecValue -} - -func getNumBuffers(dt arrow.DataType) int { - switch dt.ID() { - case arrow.RUN_END_ENCODED: - return 0 - case arrow.NULL, arrow.STRUCT, arrow.FIXED_SIZE_LIST: - return 1 - case arrow.BINARY, arrow.LARGE_BINARY, arrow.STRING, arrow.LARGE_STRING, arrow.DENSE_UNION: - return 3 - case arrow.EXTENSION: - return getNumBuffers(dt.(arrow.ExtensionType).StorageType()) - default: - return 2 - } -} - -// FillZeroLength fills an ArraySpan with the appropriate information for -// a Zero Length Array of the provided type. -func FillZeroLength(dt arrow.DataType, span *ArraySpan) { - span.Scratch[0], span.Scratch[1] = 0, 0 - span.Type = dt - span.Len = 0 - numBufs := getNumBuffers(dt) - for i := 0; i < numBufs; i++ { - span.Buffers[i].Buf = arrow.Uint64Traits.CastToBytes(span.Scratch[:])[:0] - span.Buffers[i].Owner = nil - } - - for i := numBufs; i < 3; i++ { - span.Buffers[i].Buf, span.Buffers[i].Owner = nil, nil - } - - if dt.ID() == arrow.DICTIONARY { - span.resizeChildren(1) - FillZeroLength(dt.(*arrow.DictionaryType).ValueType, &span.Children[0]) - return - } - - nt, ok := dt.(arrow.NestedType) - if !ok { - if len(span.Children) > 0 { - span.Children = span.Children[:0] - } - return - } - - span.resizeChildren(nt.NumFields()) - for i, f := range nt.Fields() { - FillZeroLength(f.Type, &span.Children[i]) - } -} - -// PromoteExecSpanScalars promotes the values of the passed in ExecSpan -// from scalars to Arrays of length 1 for each value. -func PromoteExecSpanScalars(span ExecSpan) { - for i := range span.Values { - if span.Values[i].Scalar != nil { - span.Values[i].Array.FillFromScalar(span.Values[i].Scalar) - span.Values[i].Scalar = nil - } - } -} diff --git a/go/arrow/compute/exec/span_offsets.go b/go/arrow/compute/exec/span_offsets.go deleted file mode 100644 index d2d0398884c9d..0000000000000 --- a/go/arrow/compute/exec/span_offsets.go +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.20 || tinygo - -package exec - -import ( - "unsafe" -) - -// convenience function for populating the offsets buffer from a scalar -// value's size. -func setOffsetsForScalar[T int32 | int64](span *ArraySpan, buf []T, valueSize int64, bufidx int) { - buf[0] = 0 - buf[1] = T(valueSize) - - span.Buffers[bufidx].Buf = unsafe.Slice((*byte)(unsafe.Pointer(unsafe.SliceData(buf))), - 2*int(unsafe.Sizeof(T(0)))) - - span.Buffers[bufidx].Owner = nil - span.Buffers[bufidx].SelfAlloc = false -} diff --git a/go/arrow/compute/exec/span_test.go b/go/arrow/compute/exec/span_test.go deleted file mode 100644 index 018fbb7d623d9..0000000000000 --- a/go/arrow/compute/exec/span_test.go +++ /dev/null @@ -1,835 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exec_test - -import ( - "reflect" - "strings" - "testing" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/endian" - "github.com/apache/arrow/go/v18/arrow/extensions" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/apache/arrow/go/v18/internal/types" - "github.com/stretchr/testify/assert" -) - -func TestBufferSpan_SetBuffer(t *testing.T) { - type fields struct { - Buf []byte - Owner *memory.Buffer - SelfAlloc bool - } - type args struct { - buf *memory.Buffer - } - foo := []byte{0xde, 0xad, 0xbe, 0xef} - own := memory.NewBufferBytes(foo) - tests := []struct { - name string - fields fields - args args - }{ - {"simple", fields{SelfAlloc: true}, args{own}}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - b := &exec.BufferSpan{ - Buf: tt.fields.Buf, - Owner: tt.fields.Owner, - SelfAlloc: tt.fields.SelfAlloc, - } - b.SetBuffer(tt.args.buf) - assert.Same(t, &foo[0], &b.Buf[0]) - assert.Same(t, own, b.Owner) - assert.False(t, b.SelfAlloc) - }) - } -} - -func TestBufferSpan_WrapBuffer(t *testing.T) { - type fields struct { - Buf []byte - Owner *memory.Buffer - SelfAlloc bool - } - type args struct { - buf *memory.Buffer - } - foo := []byte{0xde, 0xad, 0xbe, 0xef} - own := memory.NewBufferBytes(foo) - tests := []struct { - name string - fields fields - args args - }{ - {"simple", fields{SelfAlloc: false}, args{own}}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - b := &exec.BufferSpan{ - Buf: tt.fields.Buf, - Owner: tt.fields.Owner, - SelfAlloc: tt.fields.SelfAlloc, - } - b.WrapBuffer(tt.args.buf) - assert.Same(t, &foo[0], &b.Buf[0]) - assert.Same(t, own, b.Owner) - assert.True(t, b.SelfAlloc) - }) - } -} - -func TestArraySpan_UpdateNullCount(t *testing.T) { - type fields struct { - Type arrow.DataType - Len int64 - Nulls int64 - Offset int64 - Buffers [3]exec.BufferSpan - Scratch [2]uint64 - Children []exec.ArraySpan - } - tests := []struct { - name string - fields fields - want int64 - }{ - {"known", fields{Nulls: 25}, 25}, - {"unknown", fields{ - Nulls: array.UnknownNullCount, - Len: 8, // 0b01101101 - Buffers: [3]exec.BufferSpan{{Buf: []byte{109}}, {}, {}}}, 3}, - {"unknown with offset", fields{ - Nulls: array.UnknownNullCount, - Len: 4, - Offset: 2, // 0b01101101 - Buffers: [3]exec.BufferSpan{{Buf: []byte{109}}, {}, {}}}, 1}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - a := &exec.ArraySpan{ - Type: tt.fields.Type, - Len: tt.fields.Len, - Nulls: tt.fields.Nulls, - Offset: tt.fields.Offset, - Buffers: tt.fields.Buffers, - Scratch: tt.fields.Scratch, - Children: tt.fields.Children, - } - if got := a.UpdateNullCount(); got != tt.want { - t.Errorf("ArraySpan.UpdateNullCount() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestArraySpan_Dictionary(t *testing.T) { - type fields struct { - Type arrow.DataType - Len int64 - Nulls int64 - Offset int64 - Buffers [3]exec.BufferSpan - Scratch [2]uint64 - Children []exec.ArraySpan - } - children := []exec.ArraySpan{{}} - tests := []struct { - name string - fields fields - want *exec.ArraySpan - }{ - {"basic", fields{Children: children}, &children[0]}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - a := &exec.ArraySpan{ - Type: tt.fields.Type, - Len: tt.fields.Len, - Nulls: tt.fields.Nulls, - Offset: tt.fields.Offset, - Buffers: tt.fields.Buffers, - Scratch: tt.fields.Scratch, - Children: tt.fields.Children, - } - if got := a.Dictionary(); !reflect.DeepEqual(got, tt.want) { - t.Errorf("ArraySpan.Dictionary() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestArraySpan_NumBuffers(t *testing.T) { - type fields struct { - Type arrow.DataType - Len int64 - Nulls int64 - Offset int64 - Buffers [3]exec.BufferSpan - Scratch [2]uint64 - Children []exec.ArraySpan - } - - tests := []struct { - name string - fields fields - want int - }{ - {"null", fields{Type: arrow.Null}, 1}, - {"struct", fields{Type: arrow.StructOf()}, 1}, - {"fixed size list", fields{Type: arrow.FixedSizeListOf(4, arrow.PrimitiveTypes.Int32)}, 1}, - {"binary", fields{Type: arrow.BinaryTypes.Binary}, 3}, - {"large binary", fields{Type: arrow.BinaryTypes.LargeBinary}, 3}, - {"string", fields{Type: arrow.BinaryTypes.String}, 3}, - {"large string", fields{Type: arrow.BinaryTypes.LargeString}, 3}, - {"extension", fields{Type: extensions.NewUUIDType()}, 2}, - {"int32", fields{Type: arrow.PrimitiveTypes.Int32}, 2}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - a := &exec.ArraySpan{ - Type: tt.fields.Type, - Len: tt.fields.Len, - Nulls: tt.fields.Nulls, - Offset: tt.fields.Offset, - Buffers: tt.fields.Buffers, - Scratch: tt.fields.Scratch, - Children: tt.fields.Children, - } - if got := a.NumBuffers(); got != tt.want { - t.Errorf("ArraySpan.NumBuffers() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestArraySpan_MakeData(t *testing.T) { - type fields struct { - Type arrow.DataType - Len int64 - Nulls int64 - Offset int64 - Buffers [3]exec.BufferSpan - Scratch [2]uint64 - Children []exec.ArraySpan - } - - var ( - buf1 *memory.Buffer - ) - arrow.RegisterExtensionType(types.NewDictExtensionType()) - defer arrow.UnregisterExtensionType("dict-extension") - - tests := []struct { - name string - fields func(mem memory.Allocator) fields - want func(mem memory.Allocator) arrow.ArrayData - }{ - {"null type", func(mem memory.Allocator) fields { - return fields{ - Type: arrow.Null, - Len: 5, - Nulls: array.UnknownNullCount, - } - }, func(mem memory.Allocator) arrow.ArrayData { - return array.NewData(arrow.Null, 5, []*memory.Buffer{nil}, nil, 5, 0) - }}, - {"zero len", func(mem memory.Allocator) fields { - return fields{Type: arrow.PrimitiveTypes.Int32} - }, func(mem memory.Allocator) arrow.ArrayData { - return array.NewData(arrow.PrimitiveTypes.Int32, 0, []*memory.Buffer{nil, nil}, nil, 0, 0) - }}, - {"non-owning offset", func(mem memory.Allocator) fields { - ret := fields{ - Type: arrow.PrimitiveTypes.Int8, - Len: 4, - Nulls: 1, - Offset: 1, - } - buf1 = memory.NewResizableBuffer(mem) - buf1.Resize(1) - buf1.Bytes()[0] = 109 - ret.Buffers[0].SetBuffer(buf1) - ret.Buffers[1].SetBuffer(memory.NewBufferBytes([]byte{5, 5, 5, 5, 5})) - return ret - }, func(mem memory.Allocator) arrow.ArrayData { - // created in the above func, we release after constructing - // the NewData so the refcount is as expected - defer buf1.Release() - return array.NewData(arrow.PrimitiveTypes.Int8, 4, - []*memory.Buffer{buf1, memory.NewBufferBytes([]byte{5, 5, 5, 5, 5})}, nil, 1, 1) - }}, - {"self-alloc", func(mem memory.Allocator) fields { - ret := fields{ - Type: arrow.PrimitiveTypes.Int8, - Len: 4, - } - buf := memory.NewResizableBuffer(mem) - buf.Resize(1) - ret.Buffers[0].WrapBuffer(buf) - buf2 := memory.NewResizableBuffer(mem) - buf2.Resize(4) - ret.Buffers[1].WrapBuffer(buf2) - return ret - }, func(mem memory.Allocator) arrow.ArrayData { - buf := memory.NewResizableBuffer(mem) - buf.Resize(1) - defer buf.Release() - buf2 := memory.NewResizableBuffer(mem) - buf2.Resize(4) - defer buf2.Release() - return array.NewData(arrow.PrimitiveTypes.Int8, 4, []*memory.Buffer{buf, buf2}, nil, 0, 0) - }}, - {"with children", func(mem memory.Allocator) fields { - ret := fields{ - Type: arrow.ListOf(arrow.PrimitiveTypes.Int8), - Len: 1, - Children: []exec.ArraySpan{{ - Type: arrow.PrimitiveTypes.Int8, - Len: 4, - }}, - } - var offsets [8]byte - endian.Native.PutUint32(offsets[4:], 4) - ret.Buffers[1].SetBuffer(memory.NewBufferBytes(offsets[:])) - buf := memory.NewResizableBuffer(mem) - buf.Resize(4) - buf.Bytes()[0] = 1 - buf.Bytes()[1] = 2 - buf.Bytes()[2] = 3 - buf.Bytes()[3] = 4 - - ret.Children[0].Buffers[1].WrapBuffer(buf) - return ret - }, func(mem memory.Allocator) arrow.ArrayData { - buf := memory.NewResizableBuffer(mem) - buf.Resize(4) - buf.Bytes()[0] = 1 - buf.Bytes()[1] = 2 - buf.Bytes()[2] = 3 - buf.Bytes()[3] = 4 - defer buf.Release() - child := array.NewData(arrow.PrimitiveTypes.Int8, 4, []*memory.Buffer{nil, buf}, nil, 0, 0) - defer child.Release() - - var offsets [8]byte - endian.Native.PutUint32(offsets[4:], 4) - - return array.NewData(arrow.ListOf(arrow.PrimitiveTypes.Int8), 1, - []*memory.Buffer{nil, memory.NewBufferBytes(offsets[:])}, - []arrow.ArrayData{child}, 0, 0) - }}, - {"dict-extension-type", func(mem memory.Allocator) fields { - // dict-extension-type is dict(Index: int8, Value: string) - // so there should be an int8 in the arrayspan and - // a child of a string arrayspan in the first index of - // Children - ret := fields{ - Type: types.NewDictExtensionType(), - Len: 1, - Children: []exec.ArraySpan{{ - Type: arrow.BinaryTypes.String, - Len: 2, - }}, - } - - indices := memory.NewResizableBuffer(mem) - indices.Resize(1) - indices.Bytes()[0] = 1 - ret.Buffers[1].WrapBuffer(indices) - - offsets := memory.NewResizableBuffer(mem) - offsets.Resize(3 * arrow.Int32SizeBytes) - copy(offsets.Bytes(), arrow.Int32Traits.CastToBytes([]int32{0, 5, 10})) - - values := memory.NewResizableBuffer(mem) - values.Resize(len("HelloWorld")) - copy(values.Bytes(), []byte("HelloWorld")) - - nulls := memory.NewResizableBuffer(mem) - nulls.Resize(1) - nulls.Bytes()[0] = 3 - ret.Children[0].Buffers[0].WrapBuffer(nulls) - ret.Children[0].Buffers[1].WrapBuffer(offsets) - ret.Children[0].Buffers[2].WrapBuffer(values) - - return ret - }, func(mem memory.Allocator) arrow.ArrayData { - dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World"]`)) - defer dict.Release() - index, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1]`)) - defer index.Release() - - out := array.NewData(types.NewDictExtensionType(), 1, []*memory.Buffer{nil, index.Data().Buffers()[1]}, nil, 0, 0) - out.SetDictionary(dict.Data()) - return out - }}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - t.Run("MakeData", func(t *testing.T) { - f := tt.fields(mem) - a := &exec.ArraySpan{ - Type: f.Type, - Len: f.Len, - Nulls: f.Nulls, - Offset: f.Offset, - Buffers: f.Buffers, - Scratch: f.Scratch, - Children: f.Children, - } - got := a.MakeData() - want := tt.want(mem) - if !reflect.DeepEqual(got, want) { - t.Errorf("ArraySpan.MakeData() = %v, want %v", got, want) - } - want.Release() - got.Release() - }) - - t.Run("MakeArray", func(t *testing.T) { - f := tt.fields(mem) - a := &exec.ArraySpan{ - Type: f.Type, - Len: f.Len, - Nulls: f.Nulls, - Offset: f.Offset, - Buffers: f.Buffers, - Scratch: f.Scratch, - Children: f.Children, - } - arr := a.MakeArray() - want := tt.want(mem) - defer want.Release() - exp := array.MakeFromData(want) - - assert.Truef(t, array.Equal(arr, exp), "expected: %s\ngot: %s", exp, arr) - - exp.Release() - arr.Release() - }) - }) - } -} - -func TestArraySpan_SetSlice(t *testing.T) { - type fields struct { - Type arrow.DataType - Len int64 - Nulls int64 - Offset int64 - Buffers [3]exec.BufferSpan - Scratch [2]uint64 - Children []exec.ArraySpan - } - type args struct { - off int64 - length int64 - } - tests := []struct { - name string - fields fields - args args - wantNulls int64 - }{ - {"null type", fields{Type: arrow.Null}, args{5, 10}, 10}, - {"not-null type", fields{Type: arrow.PrimitiveTypes.Int8}, args{5, 10}, 0}, - {"not-null type with nulls", fields{Type: arrow.PrimitiveTypes.Int8, Nulls: -1}, args{5, 10}, array.UnknownNullCount}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - a := &exec.ArraySpan{ - Type: tt.fields.Type, - Len: tt.fields.Len, - Nulls: tt.fields.Nulls, - Offset: tt.fields.Offset, - Buffers: tt.fields.Buffers, - Scratch: tt.fields.Scratch, - Children: tt.fields.Children, - } - a.SetSlice(tt.args.off, tt.args.length) - assert.Equal(t, tt.args.off, a.Offset) - assert.Equal(t, tt.args.length, a.Len) - assert.Equal(t, tt.wantNulls, a.Nulls) - }) - } -} - -func TestArraySpan_FillFromScalar(t *testing.T) { - var ( - expDecimalBuf [arrow.Decimal128SizeBytes]byte - expScratch [2]uint64 - ) - - endian.Native.PutUint64(expDecimalBuf[:], 1234) - endian.Native.PutUint32(arrow.Uint64Traits.CastToBytes(expScratch[:])[4:], 10) - - dict, _, _ := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World"]`)) - defer dict.Release() - - tests := []struct { - name string - args scalar.Scalar - exp exec.ArraySpan - }{ - {"null-type", - scalar.MakeNullScalar(arrow.Null), - exec.ArraySpan{Type: arrow.Null, Len: 1, Nulls: 1}}, - {"bool valid", - scalar.MakeScalar(true), - exec.ArraySpan{ - Type: arrow.FixedWidthTypes.Boolean, - Len: 1, - Nulls: 0, - Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: []byte{0x01}}, {}}, - }}, - {"bool valid false", - scalar.MakeScalar(false), - exec.ArraySpan{ - Type: arrow.FixedWidthTypes.Boolean, - Len: 1, - Nulls: 0, - Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: []byte{0x00}}, {}}, - }}, - {"primitive null", - scalar.MakeNullScalar(arrow.PrimitiveTypes.Int32), - exec.ArraySpan{ - Type: arrow.PrimitiveTypes.Int32, - Len: 1, - Nulls: 1, - Buffers: [3]exec.BufferSpan{{Buf: []byte{0x00}}, {Buf: []byte{0, 0, 0, 0}}, {}}, - }}, - {"decimal valid", - scalar.NewDecimal128Scalar(decimal128.FromU64(1234), &arrow.Decimal128Type{Precision: 12, Scale: 2}), - exec.ArraySpan{ - Type: &arrow.Decimal128Type{Precision: 12, Scale: 2}, - Len: 1, - Nulls: 0, - Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, {Buf: expDecimalBuf[:]}, {}}, - }}, - {"dictionary scalar", - scalar.NewDictScalar(scalar.NewInt8Scalar(1), dict), - exec.ArraySpan{ - Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String}, - Len: 1, - Nulls: 0, - Buffers: [3]exec.BufferSpan{{Buf: []byte{0x01}}, - {Buf: []byte{1}}, {}, - }, - Children: []exec.ArraySpan{{ - Type: arrow.BinaryTypes.String, - Len: 2, - Buffers: [3]exec.BufferSpan{ - {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, - {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, - }, - }}, - }, - }, - {"binary scalar", - scalar.NewBinaryScalar(dict.Data().Buffers()[2], arrow.BinaryTypes.String), - exec.ArraySpan{ - Type: arrow.BinaryTypes.String, - Len: 1, - Nulls: 0, - Scratch: expScratch, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x01}}, - {Buf: arrow.Uint64Traits.CastToBytes(expScratch[:1])}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}}, - }, - }, - {"large binary", - scalar.NewLargeStringScalarFromBuffer(dict.Data().Buffers()[2]), - exec.ArraySpan{ - Type: arrow.BinaryTypes.LargeString, - Len: 1, - Nulls: 0, - Scratch: [2]uint64{0, 10}, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x01}}, - {Buf: arrow.Uint64Traits.CastToBytes([]uint64{0, 10})}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}}, - }}, - {"fixed size binary", - scalar.NewFixedSizeBinaryScalar(dict.Data().Buffers()[2], &arrow.FixedSizeBinaryType{ByteWidth: 10}), - exec.ArraySpan{ - Type: &arrow.FixedSizeBinaryType{ByteWidth: 10}, - Len: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x01}}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, {}, - }, - }}, - {"map scalar null value", - scalar.MakeNullScalar(arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String)), - exec.ArraySpan{ - Type: arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), - Len: 1, - Nulls: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0}}, - {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, - {}, - }, - Children: []exec.ArraySpan{{ - Type: arrow.StructOf(arrow.Field{Name: "key", Type: arrow.PrimitiveTypes.Int8}, - arrow.Field{Name: "value", Type: arrow.BinaryTypes.String, Nullable: true}), - Len: 0, - Nulls: 0, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{}}, {}, {}, - }, - Children: []exec.ArraySpan{ - { - Type: arrow.PrimitiveTypes.Int8, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{}}, {Buf: []byte{}}, {}, - }, - }, - { - Type: arrow.BinaryTypes.String, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{}}, {Buf: []byte{}}, {Buf: []byte{}}, - }, - }, - }, - }}, - }}, - {"list scalar", - scalar.NewListScalarData(dict.Data()), - exec.ArraySpan{ - Type: arrow.ListOf(arrow.BinaryTypes.String), - Len: 1, - Scratch: [2]uint64{ - *(*uint64)(unsafe.Pointer(&[]int32{0, 2}[0])), - 0, - }, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {Buf: arrow.Int32Traits.CastToBytes([]int32{0, 2})}, - }, - Children: []exec.ArraySpan{{ - Type: arrow.BinaryTypes.String, - Len: 2, - Buffers: [3]exec.BufferSpan{ - {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, - {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, - }, - }}, - }, - }, - {"large list scalar", - scalar.NewLargeListScalarData(dict.Data()), - exec.ArraySpan{ - Type: arrow.LargeListOf(arrow.BinaryTypes.String), - Len: 1, - Scratch: [2]uint64{0, 2}, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {Buf: arrow.Int64Traits.CastToBytes([]int64{0, 2})}, - }, - Children: []exec.ArraySpan{{ - Type: arrow.BinaryTypes.String, - Len: 2, - Buffers: [3]exec.BufferSpan{ - {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, - {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, - }, - }}, - }, - }, - {"fixed size list", - scalar.NewFixedSizeListScalar(dict), - exec.ArraySpan{ - Type: arrow.FixedSizeListOf(2, arrow.BinaryTypes.String), - Len: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {}, {}, - }, - Children: []exec.ArraySpan{{ - Type: arrow.BinaryTypes.String, - Len: 2, - Buffers: [3]exec.BufferSpan{ - {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, - {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, - {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, - }, - }}, - }, - }, - {"struct scalar", - func() scalar.Scalar { - s, _ := scalar.NewStructScalarWithNames([]scalar.Scalar{ - scalar.MakeScalar(int32(5)), scalar.MakeScalar(uint8(10)), - }, []string{"int32", "uint8"}) - return s - }(), - exec.ArraySpan{ - Type: arrow.StructOf( - arrow.Field{Name: "int32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - arrow.Field{Name: "uint8", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}), - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, {}, {}, - }, - Len: 1, - Children: []exec.ArraySpan{ - { - Type: arrow.PrimitiveTypes.Int32, - Len: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {Buf: arrow.Int32Traits.CastToBytes([]int32{5})}, - {}, - }, - }, - { - Type: arrow.PrimitiveTypes.Uint8, - Len: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {Buf: []byte{10}}, - {}, - }, - }, - }, - }, - }, - {"dense union scalar", - func() scalar.Scalar { - dt := arrow.UnionOf(arrow.DenseMode, []arrow.Field{ - {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - }, []arrow.UnionTypeCode{3, 42, 43}) - return scalar.NewDenseUnionScalar(scalar.MakeScalar(uint64(25)), 42, dt.(*arrow.DenseUnionType)) - }(), - exec.ArraySpan{ - Type: arrow.UnionOf(arrow.DenseMode, []arrow.Field{ - {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - }, []arrow.UnionTypeCode{3, 42, 43}), - Len: 1, - Scratch: [2]uint64{42, 1}, - Buffers: [3]exec.BufferSpan{{}, - {Buf: []byte{42}}, {Buf: arrow.Int32Traits.CastToBytes([]int32{0, 1})}, - }, - Children: []exec.ArraySpan{ - { - Type: arrow.BinaryTypes.String, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{}}, {Buf: []byte{}}, {Buf: []byte{}}, - }, - }, - { - Type: arrow.PrimitiveTypes.Uint64, - Len: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {Buf: arrow.Uint64Traits.CastToBytes([]uint64{25})}, - {}, - }, - }, - { - Type: arrow.PrimitiveTypes.Uint64, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{}}, {Buf: []byte{}}, {}, - }, - }, - }, - }, - }, - {"sparse union", - func() scalar.Scalar { - dt := arrow.UnionOf(arrow.SparseMode, []arrow.Field{ - {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - }, []arrow.UnionTypeCode{3, 42, 43}) - return scalar.NewSparseUnionScalarFromValue(scalar.MakeScalar(uint64(25)), 1, dt.(*arrow.SparseUnionType)) - }(), - exec.ArraySpan{ - Type: arrow.UnionOf(arrow.SparseMode, []arrow.Field{ - {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, - }, []arrow.UnionTypeCode{3, 42, 43}), - Len: 1, - Scratch: [2]uint64{42, 0}, - Buffers: [3]exec.BufferSpan{{}, - {Buf: []byte{42}}, {}, - }, - Children: []exec.ArraySpan{ - { - Type: arrow.BinaryTypes.String, - Len: 1, - Nulls: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x0}}, - {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, - {}, - }, - }, - { - Type: arrow.PrimitiveTypes.Uint64, - Len: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x1}}, - {Buf: arrow.Uint64Traits.CastToBytes([]uint64{25})}, - {}, - }, - }, - { - Type: arrow.PrimitiveTypes.Uint64, - Len: 1, - Nulls: 1, - Buffers: [3]exec.BufferSpan{ - {Buf: []byte{0x0}}, {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, {}, - }, - }, - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - a := &exec.ArraySpan{ - Nulls: array.UnknownNullCount, - Buffers: [3]exec.BufferSpan{{SelfAlloc: true, Owner: &memory.Buffer{}}, {SelfAlloc: true, Owner: &memory.Buffer{}}, {}}, - } - a.FillFromScalar(tt.args) - assert.Equal(t, tt.exp, *a) - }) - } -} diff --git a/go/arrow/compute/exec/utils.go b/go/arrow/compute/exec/utils.go deleted file mode 100644 index 832f93f13165d..0000000000000 --- a/go/arrow/compute/exec/utils.go +++ /dev/null @@ -1,276 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exec - -import ( - "fmt" - "math" - "sync/atomic" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/memory" - "golang.org/x/exp/constraints" - "golang.org/x/exp/slices" -) - -// GetSpanValues returns a properly typed slice by reinterpreting -// the buffer at index i using unsafe.Slice. This will take into account -// the offset of the given ArraySpan. -func GetSpanValues[T arrow.FixedWidthType](span *ArraySpan, i int) []T { - if len(span.Buffers[i].Buf) == 0 { - return nil - } - ret := unsafe.Slice((*T)(unsafe.Pointer(&span.Buffers[i].Buf[0])), span.Offset+span.Len) - return ret[span.Offset:] -} - -// GetSpanOffsets is like GetSpanValues, except it is only for int32 -// or int64 and adds the additional 1 expected value for an offset -// buffer (ie. len(output) == span.Len+1) -func GetSpanOffsets[T int32 | int64](span *ArraySpan, i int) []T { - ret := unsafe.Slice((*T)(unsafe.Pointer(&span.Buffers[i].Buf[0])), span.Offset+span.Len+1) - return ret[span.Offset:] -} - -func Min[T constraints.Ordered](a, b T) T { - if a < b { - return a - } - return b -} - -func Max[T constraints.Ordered](a, b T) T { - if a > b { - return a - } - return b -} - -// OptionsInit should be used in the case where a KernelState is simply -// represented with a specific type by value (instead of pointer). -// This will initialize the KernelState as a value-copied instance of -// the passed in function options argument to ensure separation -// and allow the kernel to manipulate the options if necessary without -// any negative consequences since it will have its own copy of the options. -func OptionsInit[T any](_ *KernelCtx, args KernelInitArgs) (KernelState, error) { - if opts, ok := args.Options.(*T); ok { - return *opts, nil - } - - return nil, fmt.Errorf("%w: attempted to initialize kernel state from invalid function options", - arrow.ErrInvalid) -} - -type arrayBuilder[T arrow.NumericType | bool] interface { - array.Builder - Append(T) - AppendValues([]T, []bool) -} - -func ArrayFromSlice[T arrow.NumericType | bool](mem memory.Allocator, data []T) arrow.Array { - bldr := array.NewBuilder(mem, arrow.GetDataType[T]()).(arrayBuilder[T]) - defer bldr.Release() - - bldr.AppendValues(data, nil) - return bldr.NewArray() -} - -func ArrayFromSliceWithValid[T arrow.NumericType | bool](mem memory.Allocator, data []T, valid []bool) arrow.Array { - bldr := array.NewBuilder(mem, arrow.GetDataType[T]()).(arrayBuilder[T]) - defer bldr.Release() - - bldr.AppendValues(data, valid) - return bldr.NewArray() -} - -func RechunkArraysConsistently(groups [][]arrow.Array) [][]arrow.Array { - if len(groups) <= 1 { - return groups - } - - var totalLen int - for _, a := range groups[0] { - totalLen += a.Len() - } - - if totalLen == 0 { - return groups - } - - rechunked := make([][]arrow.Array, len(groups)) - offsets := make([]int64, len(groups)) - // scan all array vectors at once, rechunking along the way - var start int64 - for start < int64(totalLen) { - // first compute max possible length for next chunk - var chunkLength int64 = math.MaxInt64 - for i, g := range groups { - offset := offsets[i] - // skip any done arrays including 0-length - for offset == int64(g[0].Len()) { - g = g[1:] - offset = 0 - } - arr := g[0] - chunkLength = Min(chunkLength, int64(arr.Len())-offset) - - offsets[i] = offset - groups[i] = g - } - - // now slice all the arrays along this chunk size - for i, g := range groups { - offset := offsets[i] - arr := g[0] - if offset == 0 && int64(arr.Len()) == chunkLength { - // slice spans entire array - arr.Retain() - rechunked[i] = append(rechunked[i], arr) - } else { - rechunked[i] = append(rechunked[i], array.NewSlice(arr, int64(offset), int64(offset+chunkLength))) - } - offsets[i] += chunkLength - } - - start += int64(chunkLength) - } - return rechunked -} - -type ChunkResolver struct { - offsets []int64 - cached int64 -} - -func NewChunkResolver(chunks []arrow.Array) *ChunkResolver { - offsets := make([]int64, len(chunks)+1) - var offset int64 - for i, c := range chunks { - curOffset := offset - offset += int64(c.Len()) - offsets[i] = curOffset - } - offsets[len(chunks)] = offset - return &ChunkResolver{offsets: offsets} -} - -func (c *ChunkResolver) Resolve(idx int64) (chunk, index int64) { - // some algorithms consecutively access indexes that are a - // relatively small distance from each other, falling into - // the same chunk. - // This is trivial when merging (assuming each side of the - // merge uses its own resolver), but also in the inner - // recursive invocations of partitioning. - if len(c.offsets) <= 1 { - return 0, idx - } - - cached := atomic.LoadInt64(&c.cached) - cacheHit := idx >= c.offsets[cached] && idx < c.offsets[cached+1] - if cacheHit { - return cached, idx - c.offsets[cached] - } - - chkIdx, found := slices.BinarySearch(c.offsets, idx) - if !found { - chkIdx-- - } - - chunk, index = int64(chkIdx), idx-c.offsets[chkIdx] - atomic.StoreInt64(&c.cached, chunk) - return -} - -type arrayTypes interface { - arrow.FixedWidthType | arrow.TemporalType | bool | string | []byte -} - -type ArrayIter[T arrayTypes] interface { - Next() T -} - -type BoolIter struct { - Rdr *bitutil.BitmapReader -} - -func NewBoolIter(arr *ArraySpan) ArrayIter[bool] { - return &BoolIter{ - Rdr: bitutil.NewBitmapReader(arr.Buffers[1].Buf, int(arr.Offset), int(arr.Len))} -} - -func (b *BoolIter) Next() (out bool) { - out = b.Rdr.Set() - b.Rdr.Next() - return -} - -type PrimitiveIter[T arrow.FixedWidthType] struct { - Values []T -} - -func NewPrimitiveIter[T arrow.FixedWidthType](arr *ArraySpan) ArrayIter[T] { - return &PrimitiveIter[T]{Values: GetSpanValues[T](arr, 1)} -} - -func (p *PrimitiveIter[T]) Next() (v T) { - v = p.Values[0] - p.Values = p.Values[1:] - return -} - -type VarBinaryIter[OffsetT int32 | int64] struct { - Offsets []OffsetT - Data []byte - Pos int64 -} - -func NewVarBinaryIter[OffsetT int32 | int64](arr *ArraySpan) ArrayIter[[]byte] { - return &VarBinaryIter[OffsetT]{ - Offsets: GetSpanOffsets[OffsetT](arr, 1), - Data: arr.Buffers[2].Buf, - } -} - -func (v *VarBinaryIter[OffsetT]) Next() []byte { - cur := v.Pos - v.Pos++ - return v.Data[v.Offsets[cur]:v.Offsets[v.Pos]] -} - -type FSBIter struct { - Data []byte - Width int - Pos int64 -} - -func NewFSBIter(arr *ArraySpan) ArrayIter[[]byte] { - return &FSBIter{ - Data: arr.Buffers[1].Buf, - Width: arr.Type.(arrow.FixedWidthDataType).Bytes(), - } -} - -func (f *FSBIter) Next() []byte { - start := f.Width * int(f.Pos) - f.Pos++ - return f.Data[start : start+f.Width] -} diff --git a/go/arrow/compute/exec/utils_test.go b/go/arrow/compute/exec/utils_test.go deleted file mode 100644 index b8b7212b538c5..0000000000000 --- a/go/arrow/compute/exec/utils_test.go +++ /dev/null @@ -1,111 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exec_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestRechunkConsistentArraysTrivial(t *testing.T) { - var groups [][]arrow.Array - rechunked := exec.RechunkArraysConsistently(groups) - assert.Zero(t, rechunked) - - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - a1 := exec.ArrayFromSlice(mem, []int16{}) - defer a1.Release() - a2 := exec.ArrayFromSlice(mem, []int16{}) - defer a2.Release() - b1 := exec.ArrayFromSlice(mem, []int32{}) - defer b1.Release() - groups = [][]arrow.Array{{a1, a2}, {}, {b1}} - rechunked = exec.RechunkArraysConsistently(groups) - assert.Len(t, rechunked, 3) - - for _, arrvec := range rechunked { - for _, arr := range arrvec { - assert.Zero(t, arr.Len()) - } - } -} - -func assertEqual[T arrow.NumericType](t *testing.T, mem memory.Allocator, arr arrow.Array, data []T) { - exp := exec.ArrayFromSlice(mem, data) - defer exp.Release() - assert.Truef(t, array.Equal(exp, arr), "expected: %s\ngot: %s", exp, arr) -} - -func TestRechunkArraysConsistentlyPlain(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - a1 := exec.ArrayFromSlice(mem, []int16{1, 2, 3}) - defer a1.Release() - a2 := exec.ArrayFromSlice(mem, []int16{4, 5}) - defer a2.Release() - a3 := exec.ArrayFromSlice(mem, []int16{6, 7, 8, 9}) - defer a3.Release() - - b1 := exec.ArrayFromSlice(mem, []int32{41, 42}) - defer b1.Release() - b2 := exec.ArrayFromSlice(mem, []int32{43, 44, 45}) - defer b2.Release() - b3 := exec.ArrayFromSlice(mem, []int32{46, 47}) - defer b3.Release() - b4 := exec.ArrayFromSlice(mem, []int32{48, 49}) - defer b4.Release() - - groups := [][]arrow.Array{{a1, a2, a3}, {b1, b2, b3, b4}} - rechunked := exec.RechunkArraysConsistently(groups) - assert.Len(t, rechunked, 2) - ra := rechunked[0] - rb := rechunked[1] - - assert.Len(t, ra, 5) - assertEqual(t, mem, ra[0], []int16{1, 2}) - ra[0].Release() - assertEqual(t, mem, ra[1], []int16{3}) - ra[1].Release() - assertEqual(t, mem, ra[2], []int16{4, 5}) - ra[2].Release() - assertEqual(t, mem, ra[3], []int16{6, 7}) - ra[3].Release() - assertEqual(t, mem, ra[4], []int16{8, 9}) - ra[4].Release() - - assert.Len(t, rb, 5) - assertEqual(t, mem, rb[0], []int32{41, 42}) - rb[0].Release() - assertEqual(t, mem, rb[1], []int32{43}) - rb[1].Release() - assertEqual(t, mem, rb[2], []int32{44, 45}) - rb[2].Release() - assertEqual(t, mem, rb[3], []int32{46, 47}) - rb[3].Release() - assertEqual(t, mem, rb[4], []int32{48, 49}) - rb[4].Release() -} diff --git a/go/arrow/compute/exec_internals_test.go b/go/arrow/compute/exec_internals_test.go deleted file mode 100644 index f0c585f557ebc..0000000000000 --- a/go/arrow/compute/exec_internals_test.go +++ /dev/null @@ -1,585 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "bytes" - "context" - "fmt" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/internal/testing/gen" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/stretchr/testify/suite" -) - -type ComputeInternalsTestSuite struct { - suite.Suite - - mem *memory.CheckedAllocator - - execCtx ExecCtx - ctx *exec.KernelCtx - rng gen.RandomArrayGenerator -} - -func (c *ComputeInternalsTestSuite) SetupTest() { - c.mem = memory.NewCheckedAllocator(memory.DefaultAllocator) - c.rng = gen.NewRandomArrayGenerator(0, c.mem) - - c.resetCtx() -} - -func (c *ComputeInternalsTestSuite) TearDownTest() { - c.mem.AssertSize(c.T(), 0) -} - -func (c *ComputeInternalsTestSuite) assertArrayEqual(expected, got arrow.Array) { - c.Truef(array.Equal(expected, got), "expected: %s\ngot: %s", expected, got) -} - -func (c *ComputeInternalsTestSuite) assertDatumEqual(expected arrow.Array, got Datum) { - arr := got.(*ArrayDatum).MakeArray() - defer arr.Release() - c.Truef(array.Equal(expected, arr), "expected: %s\ngot: %s", expected, arr) -} - -func (c *ComputeInternalsTestSuite) resetCtx() { - c.execCtx = ExecCtx{Registry: GetFunctionRegistry(), - ChunkSize: DefaultMaxChunkSize, PreallocContiguous: true} - c.ctx = &exec.KernelCtx{Ctx: SetExecCtx(context.Background(), c.execCtx)} -} - -func (c *ComputeInternalsTestSuite) getBoolArr(sz int64, trueprob, nullprob float64) arrow.Array { - return c.rng.Boolean(sz, trueprob, nullprob) -} - -func (c *ComputeInternalsTestSuite) getUint8Arr(sz int64, nullprob float64) arrow.Array { - return c.rng.Uint8(sz, 0, 100, nullprob) -} - -func (c *ComputeInternalsTestSuite) getInt32Arr(sz int64, nullprob float64) arrow.Array { - return c.rng.Int32(sz, 0, 1000, nullprob) -} - -func (c *ComputeInternalsTestSuite) getFloat64Arr(sz int64, nullprob float64) arrow.Array { - return c.rng.Float64(sz, 0, 1000, nullprob) -} - -func (c *ComputeInternalsTestSuite) getInt32Chunked(szs []int64) *arrow.Chunked { - chunks := make([]arrow.Array, 0) - for i, s := range szs { - chunks = append(chunks, c.getInt32Arr(s, 0.1)) - defer chunks[i].Release() - } - return arrow.NewChunked(arrow.PrimitiveTypes.Int32, chunks) -} - -func (c *ComputeInternalsTestSuite) assertValidityZeroExtraBits(data []byte, length, offset int) { - bitExtent := ((offset + length + 7) / 8) * 8 - for i := offset + length; i < bitExtent; i++ { - c.False(bitutil.BitIsSet(data, i)) - } -} - -type PropagateNullsSuite struct { - ComputeInternalsTestSuite -} - -func (p *PropagateNullsSuite) TestUnknownNullCountWithNullsZeroCopies() { - const length int = 16 - bitmap := [8]byte{254, 0, 0, 0, 0, 0, 0, 0} - nulls := memory.NewBufferBytes(bitmap[:]) - - output := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nil, nil}, nil, 0, 0) - input := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nulls, nil}, nil, array.UnknownNullCount, 0) - - var outSpan exec.ArraySpan - outSpan.SetMembers(output) - batch := ExecBatch{Values: []Datum{NewDatum(input)}, Len: int64(length)} - p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(&batch), &outSpan)) - p.Same(nulls, outSpan.Buffers[0].Owner) - p.EqualValues(array.UnknownNullCount, outSpan.Nulls) - p.Equal(9, int(outSpan.Len)-bitutil.CountSetBits(outSpan.Buffers[0].Buf, int(outSpan.Offset), int(outSpan.Len))) -} - -func (p *PropagateNullsSuite) TestUnknownNullCountWithoutNulls() { - const length int = 16 - bitmap := [8]byte{255, 255, 0, 0, 0, 0, 0, 0} - nulls := memory.NewBufferBytes(bitmap[:]) - - output := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nil, nil}, nil, 0, 0) - input := array.NewData(arrow.FixedWidthTypes.Boolean, length, []*memory.Buffer{nulls, nil}, nil, array.UnknownNullCount, 0) - - var outSpan exec.ArraySpan - outSpan.SetMembers(output) - batch := ExecBatch{Values: []Datum{NewDatum(input)}, Len: int64(length)} - p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(&batch), &outSpan)) - p.EqualValues(-1, outSpan.Nulls) - p.Same(nulls, outSpan.Buffers[0].Owner) -} - -func (p *PropagateNullsSuite) TestSetAllNulls() { - const length int = 16 - checkSetAll := func(vals []Datum, prealloc bool) { - // fresh bitmap with all 1s - bitmapData := [2]byte{255, 255} - preallocatedMem := memory.NewBufferBytes(bitmapData[:]) - - output := &exec.ArraySpan{ - Type: arrow.FixedWidthTypes.Boolean, - Len: int64(length), - Nulls: array.UnknownNullCount, - } - - if prealloc { - output.Buffers[0].SetBuffer(preallocatedMem) - } - - batch := &ExecBatch{Values: vals, Len: int64(length)} - p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), output)) - - if prealloc { - // ensure that the buffer object is the same when we pass preallocated - // memory to it - p.Same(preallocatedMem, output.Buffers[0].Owner) - } else { - defer output.Buffers[0].Owner.Release() - } - - p.NotNil(output.Buffers[0].Buf) - expected := [2]byte{0, 0} - p.True(bytes.Equal(expected[:], output.Buffers[0].Buf)) - } - - var vals []Datum - const trueProb float64 = 0.5 - p.Run("Null Scalar", func() { - i32Val := scalar.MakeScalar(int32(3)) - vals = []Datum{NewDatum(i32Val), NewDatum(scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean))} - checkSetAll(vals, true) - checkSetAll(vals, false) - - arr := p.getBoolArr(int64(length), trueProb, 0) - defer arr.Release() - vals[0] = NewDatum(arr) - defer vals[0].Release() - checkSetAll(vals, true) - checkSetAll(vals, false) - }) - - p.Run("one all null", func() { - arrAllNulls := p.getBoolArr(int64(length), trueProb, 1) - defer arrAllNulls.Release() - arrHalf := p.getBoolArr(int64(length), trueProb, 0.5) - defer arrHalf.Release() - vals = []Datum{NewDatum(arrHalf), NewDatum(arrAllNulls)} - defer vals[0].Release() - defer vals[1].Release() - - checkSetAll(vals, true) - checkSetAll(vals, false) - }) - - p.Run("one value is NullType", func() { - nullarr := array.NewNull(length) - arr := p.getBoolArr(int64(length), trueProb, 0) - defer arr.Release() - vals = []Datum{NewDatum(arr), NewDatum(nullarr)} - defer vals[0].Release() - checkSetAll(vals, true) - checkSetAll(vals, false) - }) - - p.Run("Other scenarios", func() { - // an all-null bitmap is zero-copied over, even though - // there is a null-scalar earlier in the batch - outSpan := &exec.ArraySpan{ - Type: arrow.FixedWidthTypes.Boolean, - Len: int64(length), - } - arrAllNulls := p.getBoolArr(int64(length), trueProb, 1) - defer arrAllNulls.Release() - - batch := &ExecBatch{ - Values: []Datum{ - NewDatum(scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean)), - NewDatum(arrAllNulls), - }, - Len: int64(length), - } - defer batch.Values[1].Release() - - p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), outSpan)) - p.Same(arrAllNulls.Data().Buffers()[0], outSpan.Buffers[0].Owner) - outSpan.Buffers[0].Owner.Release() - }) -} - -func (p *PropagateNullsSuite) TestSingleValueWithNulls() { - const length int64 = 100 - arr := p.getBoolArr(length, 0.5, 0.5) - defer arr.Release() - - checkSliced := func(offset int64, prealloc bool, outOffset int64) { - // unaligned bitmap, zero copy not possible - sliced := array.NewSlice(arr, offset, int64(arr.Len())) - defer sliced.Release() - vals := []Datum{NewDatum(sliced)} - defer vals[0].Release() - - output := &exec.ArraySpan{ - Type: arrow.FixedWidthTypes.Boolean, - Len: vals[0].Len(), - Offset: outOffset, - } - - batch := &ExecBatch{Values: vals, Len: vals[0].Len()} - - var preallocatedBitmap *memory.Buffer - if prealloc { - preallocatedBitmap = memory.NewResizableBuffer(p.mem) - preallocatedBitmap.Resize(int(bitutil.BytesForBits(int64(sliced.Len()) + outOffset))) - defer preallocatedBitmap.Release() - output.Buffers[0].SetBuffer(preallocatedBitmap) - output.Buffers[0].SelfAlloc = true - } else { - p.EqualValues(0, output.Offset) - } - - p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), output)) - if !prealloc { - parentBuf := arr.Data().Buffers()[0] - if offset == 0 { - // validity bitmap same, no slice - p.Same(parentBuf, output.Buffers[0].Owner) - } else if offset%8 == 0 { - // validity bitmap sliced - p.NotSame(parentBuf, output.Buffers[0].Owner) - p.Same(parentBuf, output.Buffers[0].Owner.Parent()) - defer output.Buffers[0].Owner.Release() - } else { - // new memory for offset not 0 mod 8 - p.NotSame(parentBuf, output.Buffers[0].Owner) - p.Nil(output.Buffers[0].Owner.Parent()) - defer output.Buffers[0].Owner.Release() - } - } else { - // preallocated, so check that the validity bitmap is unbothered - p.Same(preallocatedBitmap, output.Buffers[0].Owner) - } - - p.EqualValues(sliced.NullN(), output.UpdateNullCount()) - p.True(bitutil.BitmapEquals( - sliced.NullBitmapBytes(), output.Buffers[0].Buf, - int64(sliced.Data().Offset()), output.Offset, output.Len)) - p.assertValidityZeroExtraBits(output.Buffers[0].Buf, int(output.Len), int(output.Offset)) - } - - tests := []struct { - offset, outoffset int64 - prealloc bool - }{ - {8, 0, false}, - {7, 0, false}, - {8, 0, true}, - {7, 0, true}, - {8, 4, true}, - {7, 4, true}, - } - - for _, tt := range tests { - name := fmt.Sprintf("off=%d,prealloc=%t,outoff=%d", tt.offset, tt.prealloc, tt.outoffset) - p.Run(name, func() { - checkSliced(tt.offset, tt.prealloc, tt.outoffset) - }) - } -} - -func (p *PropagateNullsSuite) TestIntersectsNulls() { - const length = 16 - var ( - // 0b01111111 0b11001111 - bitmap1 = [8]byte{127, 207, 0, 0, 0, 0, 0, 0} - // 0b11111110 0b01111111 - bitmap2 = [8]byte{254, 127, 0, 0, 0, 0, 0, 0} - // 0b11101111 0b11111110 - bitmap3 = [8]byte{239, 254, 0, 0, 0, 0, 0, 0} - ) - - arr1 := array.NewData(arrow.FixedWidthTypes.Boolean, length, - []*memory.Buffer{memory.NewBufferBytes(bitmap1[:]), nil}, nil, array.UnknownNullCount, 0) - arr2 := array.NewData(arrow.FixedWidthTypes.Boolean, length, - []*memory.Buffer{memory.NewBufferBytes(bitmap2[:]), nil}, nil, array.UnknownNullCount, 0) - arr3 := array.NewData(arrow.FixedWidthTypes.Boolean, length, - []*memory.Buffer{memory.NewBufferBytes(bitmap3[:]), nil}, nil, array.UnknownNullCount, 0) - - checkCase := func(vals []Datum, exNullCount int, exBitmap []byte, prealloc bool, outoffset int) { - batch := &ExecBatch{Values: vals, Len: length} - - output := &exec.ArraySpan{Type: arrow.FixedWidthTypes.Boolean, Len: length} - - var nulls *memory.Buffer - if prealloc { - // make the buffer one byte bigger so we can have non-zero offsets - nulls = memory.NewResizableBuffer(p.mem) - nulls.Resize(3) - defer nulls.Release() - output.Buffers[0].SetBuffer(nulls) - output.Buffers[0].SelfAlloc = true - } else { - // non-zero output offset not permitted unless output memory is preallocated - p.Equal(0, outoffset) - } - - output.Offset = int64(outoffset) - - p.NoError(propagateNulls(p.ctx, ExecSpanFromBatch(batch), output)) - - // preallocated memory used - if prealloc { - p.Same(nulls, output.Buffers[0].Owner) - } else { - defer output.Buffers[0].Owner.Release() - } - - p.EqualValues(array.UnknownNullCount, output.Nulls) - p.EqualValues(exNullCount, output.UpdateNullCount()) - - p.True(bitutil.BitmapEquals(exBitmap, output.Buffers[0].Buf, 0, output.Offset, length)) - p.assertValidityZeroExtraBits(output.Buffers[0].Buf, int(output.Len), int(output.Offset)) - } - - p.Run("0b01101110 0b01001110", func() { - // 0b01101110 0b01001110 - expected := [2]byte{110, 78} - checkCase([]Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(arr3)}, 7, expected[:], false, 0) - checkCase([]Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(arr3)}, 7, expected[:], true, 0) - checkCase([]Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(arr3)}, 7, expected[:], true, 4) - }) - - p.Run("0b01111110 0b01001111", func() { - expected := [2]byte{126, 79} - checkCase([]Datum{NewDatum(arr1), NewDatum(arr2)}, 5, expected[:], false, 0) - checkCase([]Datum{NewDatum(arr1), NewDatum(arr2)}, 5, expected[:], true, 4) - }) -} - -func TestComputeInternals(t *testing.T) { - suite.Run(t, new(PropagateNullsSuite)) -} - -type ExecSpanItrSuite struct { - ComputeInternalsTestSuite - - iter spanIterator -} - -func (e *ExecSpanItrSuite) setupIterator(batch *ExecBatch, maxChunk int64) { - var err error - _, e.iter, err = iterateExecSpans(batch, maxChunk, true) - e.NoError(err) -} - -func (e *ExecSpanItrSuite) checkIteration(input *ExecBatch, chunksize int, exBatchSizes []int) { - e.setupIterator(input, int64(chunksize)) - var ( - batch exec.ExecSpan - curPos int64 - pos int64 - next bool - ) - - for _, sz := range exBatchSizes { - batch, pos, next = e.iter() - e.True(next) - e.EqualValues(sz, batch.Len) - - for j, val := range input.Values { - switch val := val.(type) { - case *ScalarDatum: - e.Truef(scalar.Equals(batch.Values[j].Scalar, val.Value), "expected: %s\ngot: %s", val.Value, batch.Values[j].Scalar) - case *ArrayDatum: - arr := val.MakeArray() - sl := array.NewSlice(arr, curPos, curPos+batch.Len) - got := batch.Values[j].Array.MakeArray() - - e.Truef(array.Equal(sl, got), "expected: %s\ngot: %s", sl, got) - - got.Release() - arr.Release() - sl.Release() - case *ChunkedDatum: - carr := val.Value - if batch.Len == 0 { - e.Zero(carr.Len()) - } else { - chkd := array.NewChunkedSlice(carr, curPos, curPos+batch.Len) - defer chkd.Release() - e.Len(chkd.Chunks(), 1) - got := batch.Values[j].Array.MakeArray() - defer got.Release() - e.Truef(array.Equal(got, chkd.Chunk(0)), "expected: %s\ngot: %s", chkd.Chunk(0), got) - } - } - } - - curPos += int64(sz) - e.EqualValues(curPos, pos) - } - - batch, pos, next = e.iter() - e.Zero(batch) - e.False(next) - e.EqualValues(input.Len, pos) -} - -func (e *ExecSpanItrSuite) TestBasics() { - const length = 100 - - arr1 := e.getInt32Arr(length, 0.1) - defer arr1.Release() - arr2 := e.getFloat64Arr(length, 0.1) - defer arr2.Release() - - input := &ExecBatch{ - Len: length, - Values: []Datum{NewDatum(arr1), NewDatum(arr2), NewDatum(int32(3))}, - } - defer func() { - for _, v := range input.Values { - v.Release() - } - }() - - e.Run("simple", func() { - e.setupIterator(input, DefaultMaxChunkSize) - - batch, pos, next := e.iter() - e.True(next) - e.Len(batch.Values, 3) - e.EqualValues(length, batch.Len) - e.EqualValues(length, pos) - - in1 := input.Values[0].(*ArrayDatum).MakeArray() - defer in1.Release() - in2 := input.Values[1].(*ArrayDatum).MakeArray() - defer in2.Release() - out1 := batch.Values[0].Array.MakeArray() - defer out1.Release() - out2 := batch.Values[1].Array.MakeArray() - defer out2.Release() - - e.Truef(array.Equal(in1, out1), "expected: %s\ngot: %s", in1, out1) - e.Truef(array.Equal(in2, out2), "expected: %s\ngot: %s", in2, out2) - e.True(scalar.Equals(input.Values[2].(*ScalarDatum).Value, batch.Values[2].Scalar), input.Values[2].(*ScalarDatum).Value, batch.Values[2].Scalar) - - _, pos, next = e.iter() - e.EqualValues(length, pos) - e.False(next) - }) - - e.Run("iterations", func() { - e.checkIteration(input, 16, []int{16, 16, 16, 16, 16, 16, 4}) - }) -} - -func (e *ExecSpanItrSuite) TestInputValidation() { - arr1 := e.getInt32Arr(10, 0.1) - defer arr1.Release() - arr2 := e.getInt32Arr(9, 0.1) - defer arr2.Release() - - // length mismatch - batch := &ExecBatch{ - Values: []Datum{&ArrayDatum{arr1.Data()}, &ArrayDatum{arr2.Data()}}, - Len: 10, - } - - _, _, err := iterateExecSpans(batch, DefaultMaxChunkSize, true) - e.ErrorIs(err, arrow.ErrInvalid) - - // swap order of input - batch.Values = []Datum{&ArrayDatum{arr2.Data()}, &ArrayDatum{arr1.Data()}} - - _, _, err = iterateExecSpans(batch, DefaultMaxChunkSize, true) - e.ErrorIs(err, arrow.ErrInvalid) - - batch.Values = []Datum{&ArrayDatum{arr1.Data()}} - _, _, err = iterateExecSpans(batch, DefaultMaxChunkSize, true) - e.NoError(err) -} - -func (e *ExecSpanItrSuite) TestChunkedArrays() { - arr1 := e.getInt32Chunked([]int64{0, 20, 10}) - defer arr1.Release() - arr2 := e.getInt32Chunked([]int64{15, 15}) - defer arr2.Release() - arr3 := e.getInt32Arr(30, 0.1) - defer arr3.Release() - - batch := &ExecBatch{ - Values: []Datum{ - &ChunkedDatum{arr1}, &ChunkedDatum{arr2}, &ArrayDatum{arr3.Data()}, - NewDatum(int32(5)), NewDatum(scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean))}, - Len: 30, - } - - e.checkIteration(batch, 10, []int{10, 5, 5, 10}) - e.checkIteration(batch, 20, []int{15, 5, 10}) - e.checkIteration(batch, 30, []int{15, 5, 10}) -} - -func (e *ExecSpanItrSuite) TestZeroLengthInput() { - carr := arrow.NewChunked(arrow.PrimitiveTypes.Int32, []arrow.Array{}) - checkArgs := func(batch *ExecBatch) { - _, itr, err := iterateExecSpans(batch, DefaultMaxChunkSize, true) - e.NoError(err) - itrSpan, _, next := itr() - - e.False(next) - e.Zero(itrSpan) - } - - input := &ExecBatch{Len: 0} - - // zero-length chunkedarray with zero chunks - input.Values = []Datum{&ChunkedDatum{carr}} - checkArgs(input) - - // zero-length array - arr := e.getInt32Arr(0, 0.1) - defer arr.Release() - input.Values = []Datum{&ArrayDatum{arr.Data()}} - checkArgs(input) - - // chunkedarray with single empty chunk - carr = e.getInt32Chunked([]int64{0}) - input.Values = []Datum{&ChunkedDatum{carr}} - checkArgs(input) -} - -func TestExecSpanIterator(t *testing.T) { - suite.Run(t, new(ExecSpanItrSuite)) -} diff --git a/go/arrow/compute/exec_test.go b/go/arrow/compute/exec_test.go deleted file mode 100644 index 27f6676f3187c..0000000000000 --- a/go/arrow/compute/exec_test.go +++ /dev/null @@ -1,379 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/stretchr/testify/suite" -) - -func ExecCopyArray(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - debug.Assert(len(batch.Values) == 1, "wrong number of values") - valueSize := int64(batch.Values[0].Type().(arrow.FixedWidthDataType).BitWidth() / 8) - - arg0 := batch.Values[0].Array - dst := out.Buffers[1].Buf[out.Offset*valueSize:] - src := arg0.Buffers[1].Buf[arg0.Offset*valueSize:] - copy(dst, src[:batch.Len*valueSize]) - return nil -} - -func ExecComputedBitmap(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - // propagate nulls not used. check that out bitmap isn't the same already - // as the input bitmap - arg0 := batch.Values[0].Array - if bitutil.CountSetBits(arg0.Buffers[1].Buf, int(arg0.Offset), int(batch.Len)) > 0 { - // check that the bitmap hasn't already been copied - debug.Assert(!bitutil.BitmapEquals(arg0.Buffers[0].Buf, out.Buffers[0].Buf, - arg0.Offset, out.Offset, batch.Len), "bitmap should not have already been copied") - } - - bitutil.CopyBitmap(arg0.Buffers[0].Buf, int(arg0.Offset), int(batch.Len), out.Buffers[0].Buf, int(out.Offset)) - return ExecCopyArray(ctx, batch, out) -} - -func ExecNoPreallocatedData(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - // validity preallocated, not data - debug.Assert(out.Offset == 0, "invalid offset for non-prealloc") - valueSize := int64(batch.Values[0].Type().(arrow.FixedWidthDataType).BitWidth() / 8) - out.Buffers[1].SetBuffer(ctx.Allocate(int(out.Len * valueSize))) - out.Buffers[1].SelfAlloc = true - return ExecCopyArray(ctx, batch, out) -} - -func ExecNoPreallocatedAnything(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - // neither validity nor data preallocated - debug.Assert(out.Offset == 0, "invalid offset for non-prealloc") - out.Buffers[0].SetBuffer(ctx.AllocateBitmap(out.Len)) - out.Buffers[0].SelfAlloc = true - arg0 := batch.Values[0].Array - bitutil.CopyBitmap(arg0.Buffers[0].Buf, int(arg0.Offset), int(batch.Len), out.Buffers[0].Buf, 0) - - // reuse kernel that allocates data - return ExecNoPreallocatedData(ctx, batch, out) -} - -type ExampleOptions struct { - Value scalar.Scalar -} - -func (e *ExampleOptions) TypeName() string { return "example" } - -type ExampleState struct { - Value scalar.Scalar -} - -func InitStateful(_ *exec.KernelCtx, args exec.KernelInitArgs) (exec.KernelState, error) { - value := args.Options.(*ExampleOptions).Value - return &ExampleState{Value: value}, nil -} - -func ExecStateful(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - state := ctx.State.(*ExampleState) - multiplier := state.Value.(*scalar.Int32).Value - - arg0 := batch.Values[0].Array - arg0Data := exec.GetSpanValues[int32](&arg0, 1) - dst := exec.GetSpanValues[int32](out, 1) - for i, v := range arg0Data { - dst[i] = v * multiplier - } - return nil -} - -func ExecAddInt32(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { - left := exec.GetSpanValues[int32](&batch.Values[0].Array, 1) - right := exec.GetSpanValues[int32](&batch.Values[1].Array, 1) - outValues := exec.GetSpanValues[int32](out, 1) - for i := 0; i < int(batch.Len); i++ { - outValues[i] = left[i] + right[i] - } - return nil -} - -type CallScalarFuncSuite struct { - ComputeInternalsTestSuite -} - -func (c *CallScalarFuncSuite) addCopyFuncs() { - registry = GetFunctionRegistry() - - fn := NewScalarFunction("test_copy", Unary(), EmptyFuncDoc) - types := []arrow.DataType{arrow.PrimitiveTypes.Uint8, arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Float64} - for _, t := range types { - c.NoError(fn.AddNewKernel([]exec.InputType{exec.NewExactInput(t)}, - exec.NewOutputType(t), ExecCopyArray, nil)) - } - c.True(registry.AddFunction(fn, false)) - - // a version which doesn't want the executor to call propagatenulls - fn2 := NewScalarFunction("test_copy_computed_bitmap", Unary(), EmptyFuncDoc) - kernel := exec.NewScalarKernel([]exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Uint8)}, - exec.NewOutputType(arrow.PrimitiveTypes.Uint8), ExecComputedBitmap, nil) - kernel.NullHandling = exec.NullComputedPrealloc - c.NoError(fn2.AddKernel(kernel)) - c.True(registry.AddFunction(fn2, false)) -} - -func (c *CallScalarFuncSuite) addNoPreallocFuncs() { - registry = GetFunctionRegistry() - - // a function that allocates its own output memory. we have cases - // for both non-preallocated data and non-preallocated bitmap - f1 := NewScalarFunction("test_nopre_data", Unary(), EmptyFuncDoc) - f2 := NewScalarFunction("test_nopre_validity_or_data", Unary(), EmptyFuncDoc) - - kernel := exec.NewScalarKernel( - []exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Uint8)}, - exec.NewOutputType(arrow.PrimitiveTypes.Uint8), - ExecNoPreallocatedData, nil) - kernel.MemAlloc = exec.MemNoPrealloc - c.NoError(f1.AddKernel(kernel)) - - kernel.ExecFn = ExecNoPreallocatedAnything - kernel.NullHandling = exec.NullComputedNoPrealloc - c.NoError(f2.AddKernel(kernel)) - - c.True(registry.AddFunction(f1, false)) - c.True(registry.AddFunction(f2, false)) -} - -func (c *CallScalarFuncSuite) addStatefulFunc() { - registry := GetFunctionRegistry() - - // this functions behavior depends on a static parameter that - // is made available to the execution through its options object - fn := NewScalarFunction("test_stateful", Unary(), EmptyFuncDoc) - - c.NoError(fn.AddNewKernel([]exec.InputType{exec.NewExactInput(arrow.PrimitiveTypes.Int32)}, - exec.NewOutputType(arrow.PrimitiveTypes.Int32), ExecStateful, InitStateful)) - - c.True(registry.AddFunction(fn, false)) -} - -func (c *CallScalarFuncSuite) addScalarFunc() { - registry := GetFunctionRegistry() - - fn := NewScalarFunction("test_scalar_add_int32", Binary(), EmptyFuncDoc) - c.NoError(fn.AddNewKernel([]exec.InputType{ - exec.NewExactInput(arrow.PrimitiveTypes.Int32), - exec.NewExactInput(arrow.PrimitiveTypes.Int32)}, - exec.NewOutputType(arrow.PrimitiveTypes.Int32), ExecAddInt32, nil)) - c.True(registry.AddFunction(fn, false)) -} - -func (c *CallScalarFuncSuite) SetupSuite() { - c.addCopyFuncs() - c.addNoPreallocFuncs() - c.addStatefulFunc() - c.addScalarFunc() -} - -func (c *CallScalarFuncSuite) TestArgumentValidation() { - // copy accepts only a single array arg - arr := c.getInt32Arr(10, 0.1) - defer arr.Release() - d1 := &ArrayDatum{Value: arr.Data()} - - c.Run("too many args", func() { - args := []Datum{d1, d1} - _, err := CallFunction(c.ctx.Ctx, "test_copy", nil, args...) - c.ErrorIs(err, arrow.ErrInvalid) - }) - - c.Run("too few args", func() { - _, err := CallFunction(c.ctx.Ctx, "test_copy", nil) - c.ErrorIs(err, arrow.ErrInvalid) - }) - - d1Scalar := NewDatum(int32(5)) - result, err := CallFunction(c.ctx.Ctx, "test_copy", nil, d1) - c.NoError(err) - result.Release() - result, err = CallFunction(c.ctx.Ctx, "test_copy", nil, d1Scalar) - c.NoError(err) - result.Release() -} - -func (c *CallScalarFuncSuite) TestPreallocationCases() { - nullProb := float64(0.2) - arr := c.getUint8Arr(100, nullProb) - defer arr.Release() - - funcNames := []string{"test_copy", "test_copy_computed_bitmap"} - for _, funcName := range funcNames { - c.Run(funcName, func() { - c.resetCtx() - - c.Run("single output default", func() { - result, err := CallFunction(c.ctx.Ctx, funcName, nil, &ArrayDatum{arr.Data()}) - c.NoError(err) - defer result.Release() - c.Equal(KindArray, result.Kind()) - c.assertDatumEqual(arr, result) - }) - - c.Run("exec chunks", func() { - // set the exec_chunksize to be smaller so now we have - // several invocations of the kernel, - // but still only one output array - c.execCtx.ChunkSize = 80 - result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ArrayDatum{arr.Data()}) - c.NoError(err) - defer result.Release() - c.Equal(KindArray, result.Kind()) - c.assertDatumEqual(arr, result) - }) - - c.Run("not multiple 8 chunk", func() { - // chunksize is not a multiple of 8 - c.execCtx.ChunkSize = 11 - result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ArrayDatum{arr.Data()}) - c.NoError(err) - defer result.Release() - c.Equal(KindArray, result.Kind()) - c.assertDatumEqual(arr, result) - }) - - c.Run("chunked", func() { - // input is chunked, output is one big chunk - chk1, chk2 := array.NewSlice(arr, 0, 10), array.NewSlice(arr, 10, int64(arr.Len())) - defer chk1.Release() - defer chk2.Release() - carr := arrow.NewChunked(arr.DataType(), []arrow.Array{chk1, chk2}) - defer carr.Release() - - result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ChunkedDatum{carr}) - c.NoError(err) - defer result.Release() - c.Equal(KindChunked, result.Kind()) - actual := result.(*ChunkedDatum).Value - c.Len(actual.Chunks(), 1) - c.Truef(array.ChunkedEqual(actual, carr), "expected: %s\ngot: %s", carr, actual) - }) - - c.Run("independent", func() { - // preallocate independently for each batch - c.execCtx.PreallocContiguous = false - c.execCtx.ChunkSize = 40 - result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, &ArrayDatum{arr.Data()}) - c.NoError(err) - defer result.Release() - c.Equal(KindChunked, result.Kind()) - - carr := result.(*ChunkedDatum).Value - c.Len(carr.Chunks(), 3) - sl := array.NewSlice(arr, 0, 40) - defer sl.Release() - c.assertArrayEqual(sl, carr.Chunk(0)) - sl = array.NewSlice(arr, 40, 80) - defer sl.Release() - c.assertArrayEqual(sl, carr.Chunk(1)) - sl = array.NewSlice(arr, 80, int64(arr.Len())) - defer sl.Release() - c.assertArrayEqual(sl, carr.Chunk(2)) - }) - }) - } -} - -func (c *CallScalarFuncSuite) TestBasicNonStandardCases() { - // test some more cases - // - // * validity bitmap computed by kernel rather than propagate nulls - // * data not pre-allocated - // * validity bitmap not pre-allocated - - nullProb := float64(0.2) - arr := c.getUint8Arr(1000, nullProb) - defer arr.Release() - args := []Datum{&ArrayDatum{arr.Data()}} - - for _, funcName := range []string{"test_nopre_data", "test_nopre_validity_or_data"} { - c.Run("funcName", func() { - c.resetCtx() - c.Run("single output default", func() { - result, err := CallFunction(c.ctx.Ctx, funcName, nil, args...) - c.NoError(err) - defer result.Release() - c.Equal(KindArray, result.Kind()) - c.assertDatumEqual(arr, result) - }) - - c.Run("split into 3 chunks", func() { - c.execCtx.ChunkSize = 400 - result, err := CallFunction(SetExecCtx(c.ctx.Ctx, c.execCtx), funcName, nil, args...) - c.NoError(err) - defer result.Release() - - c.Equal(KindChunked, result.Kind()) - - carr := result.(*ChunkedDatum).Value - c.Len(carr.Chunks(), 3) - sl := array.NewSlice(arr, 0, 400) - defer sl.Release() - c.assertArrayEqual(sl, carr.Chunk(0)) - sl = array.NewSlice(arr, 400, 800) - defer sl.Release() - c.assertArrayEqual(sl, carr.Chunk(1)) - sl = array.NewSlice(arr, 800, int64(arr.Len())) - defer sl.Release() - c.assertArrayEqual(sl, carr.Chunk(2)) - }) - }) - } -} - -func (c *CallScalarFuncSuite) TestStatefulKernel() { - input, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3, null, 5]`)) - defer input.Release() - - multiplier := scalar.MakeScalar(int32(2)) - expected, _, _ := array.FromJSON(c.mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[2, 4, 6, null, 10]`)) - defer expected.Release() - - options := &ExampleOptions{multiplier} - result, err := CallFunction(c.ctx.Ctx, "test_stateful", options, &ArrayDatum{input.Data()}) - c.NoError(err) - defer result.Release() - c.assertDatumEqual(expected, result) -} - -func (c *CallScalarFuncSuite) TestScalarFunction() { - args := []Datum{NewDatum(int32(5)), NewDatum(int32(7))} - result, err := CallFunction(c.ctx.Ctx, "test_scalar_add_int32", nil, args...) - c.NoError(err) - defer result.Release() - - c.Equal(KindScalar, result.Kind()) - expected := scalar.MakeScalar(int32(12)) - c.True(scalar.Equals(expected, result.(*ScalarDatum).Value)) -} - -func TestCallScalarFunctions(t *testing.T) { - suite.Run(t, new(CallScalarFuncSuite)) -} diff --git a/go/arrow/compute/executor.go b/go/arrow/compute/executor.go deleted file mode 100644 index 1d197e4220ab2..0000000000000 --- a/go/arrow/compute/executor.go +++ /dev/null @@ -1,1122 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "context" - "fmt" - "math" - "runtime" - "sync" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/bitutil" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/internal" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" -) - -// ExecCtx holds simple contextual information for execution -// such as the default ChunkSize for batch iteration, whether or not -// to ensure contiguous preallocations for kernels that want preallocation, -// and a reference to the desired function registry to use. -// -// An ExecCtx should be placed into a context.Context by using -// SetExecCtx and GetExecCtx to pass it along for execution. -type ExecCtx struct { - // ChunkSize is the size used when iterating batches for execution - // ChunkSize elements will be operated on as a time unless an argument - // is a chunkedarray with a chunk that is smaller - ChunkSize int64 - // PreallocContiguous determines whether preallocating memory for - // execution of compute attempts to preallocate a full contiguous - // buffer for all of the chunks beforehand. - PreallocContiguous bool - // Registry allows specifying the Function Registry to utilize - // when searching for kernel implementations. - Registry FunctionRegistry - // ExecChannelSize is the size of the channel used for passing - // exec results to the WrapResults function. - ExecChannelSize int - // NumParallel determines the number of parallel goroutines - // allowed for parallel executions. - NumParallel int -} - -type ctxExecKey struct{} - -const DefaultMaxChunkSize = math.MaxInt64 - -var ( - // global default ExecCtx object, initialized with the - // default max chunk size, contiguous preallocations, and - // the default function registry. - defaultExecCtx ExecCtx - - // WithAllocator returns a new context with the provided allocator - // embedded into the context. - WithAllocator = exec.WithAllocator - // GetAllocator retrieves the allocator from the context, or returns - // memory.DefaultAllocator if there was no allocator in the provided - // context. - GetAllocator = exec.GetAllocator -) - -// DefaultExecCtx returns the default exec context which will be used -// if there is no ExecCtx set into the context for execution. -// -// This can be called to get a copy of the default values which can -// then be modified to set into a context. -// -// The default exec context uses the following values: -// - ChunkSize = DefaultMaxChunkSize (MaxInt64) -// - PreallocContiguous = true -// - Registry = GetFunctionRegistry() -// - ExecChannelSize = 10 -// - NumParallel = runtime.NumCPU() -func DefaultExecCtx() ExecCtx { return defaultExecCtx } - -func init() { - defaultExecCtx.ChunkSize = DefaultMaxChunkSize - defaultExecCtx.PreallocContiguous = true - defaultExecCtx.Registry = GetFunctionRegistry() - defaultExecCtx.ExecChannelSize = 10 - // default level of parallelism - // set to 1 to disable parallelization - defaultExecCtx.NumParallel = runtime.NumCPU() -} - -// SetExecCtx returns a new child context containing the passed in ExecCtx -func SetExecCtx(ctx context.Context, e ExecCtx) context.Context { - return context.WithValue(ctx, ctxExecKey{}, e) -} - -// GetExecCtx returns an embedded ExecCtx from the provided context. -// If it does not contain an ExecCtx, then the default one is returned. -func GetExecCtx(ctx context.Context) ExecCtx { - e, ok := ctx.Value(ctxExecKey{}).(ExecCtx) - if ok { - return e - } - return defaultExecCtx -} - -// ExecBatch is a unit of work for kernel execution. It contains a collection -// of Array and Scalar values. -// -// ExecBatch is semantically similar to a RecordBatch but for a SQL-style -// execution context. It represents a collection or records, but constant -// "columns" are represented by Scalar values rather than having to be -// converted into arrays with repeated values. -type ExecBatch struct { - Values []Datum - // Guarantee is a predicate Expression guaranteed to evaluate to true for - // all rows in this batch. - // Guarantee Expression - // Len is the semantic length of this ExecBatch. When the values are - // all scalars, the length should be set to 1 for non-aggregate kernels. - // Otherwise the length is taken from the array values. Aggregate kernels - // can have an ExecBatch formed by projecting just the partition columns - // from a batch in which case it would have scalar rows with length > 1 - // - // If the array values are of length 0, then the length is 0 regardless of - // whether any values are Scalar. - Len int64 -} - -func (e ExecBatch) NumValues() int { return len(e.Values) } - -// simple struct for defining how to preallocate a particular buffer. -type bufferPrealloc struct { - bitWidth int - addLen int -} - -func allocateDataBuffer(ctx *exec.KernelCtx, length, bitWidth int) *memory.Buffer { - switch bitWidth { - case 1: - return ctx.AllocateBitmap(int64(length)) - default: - bufsiz := int(bitutil.BytesForBits(int64(length * bitWidth))) - return ctx.Allocate(bufsiz) - } -} - -func addComputeDataPrealloc(dt arrow.DataType, widths []bufferPrealloc) []bufferPrealloc { - if typ, ok := dt.(arrow.FixedWidthDataType); ok { - return append(widths, bufferPrealloc{bitWidth: typ.BitWidth()}) - } - - switch dt.ID() { - case arrow.BINARY, arrow.STRING, arrow.LIST, arrow.MAP: - return append(widths, bufferPrealloc{bitWidth: 32, addLen: 1}) - case arrow.LARGE_BINARY, arrow.LARGE_STRING, arrow.LARGE_LIST: - return append(widths, bufferPrealloc{bitWidth: 64, addLen: 1}) - case arrow.STRING_VIEW, arrow.BINARY_VIEW: - return append(widths, bufferPrealloc{bitWidth: arrow.ViewHeaderSizeBytes * 8}) - } - return widths -} - -// enum to define a generalized assumption of the nulls in the inputs -type nullGeneralization int8 - -const ( - nullGenPerhapsNull nullGeneralization = iota - nullGenAllValid - nullGenAllNull -) - -func getNullGen(val *exec.ExecValue) nullGeneralization { - dtID := val.Type().ID() - switch { - case dtID == arrow.NULL: - return nullGenAllNull - case !internal.DefaultHasValidityBitmap(dtID): - return nullGenAllValid - case val.IsScalar(): - if val.Scalar.IsValid() { - return nullGenAllValid - } - return nullGenAllNull - default: - arr := val.Array - // do not count if they haven't been counted already - if arr.Nulls == 0 || arr.Buffers[0].Buf == nil { - return nullGenAllValid - } - - if arr.Nulls == arr.Len { - return nullGenAllNull - } - } - return nullGenPerhapsNull -} - -func getNullGenDatum(datum Datum) nullGeneralization { - var val exec.ExecValue - switch datum.Kind() { - case KindArray: - val.Array.SetMembers(datum.(*ArrayDatum).Value) - case KindScalar: - val.Scalar = datum.(*ScalarDatum).Value - case KindChunked: - return nullGenPerhapsNull - default: - debug.Assert(false, "should be array, scalar, or chunked!") - return nullGenPerhapsNull - } - return getNullGen(&val) -} - -// populate the validity bitmaps with the intersection of the nullity -// of the arguments. If a preallocated bitmap is not provided, then one -// will be allocated if needed (in some cases a bitmap can be zero-copied -// from the arguments). If any Scalar value is null, then the entire -// validity bitmap will be set to null. -func propagateNulls(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ArraySpan) (err error) { - if out.Type.ID() == arrow.NULL { - // null output type is a no-op (rare but it happens) - return - } - - // this function is ONLY able to write into output with non-zero offset - // when the bitmap is preallocated. - if out.Offset != 0 && out.Buffers[0].Buf == nil { - return fmt.Errorf("%w: can only propagate nulls into pre-allocated memory when output offset is non-zero", arrow.ErrInvalid) - } - - var ( - arrsWithNulls = make([]*exec.ArraySpan, 0, len(batch.Values)) - isAllNull bool - prealloc bool = out.Buffers[0].Buf != nil - ) - - for i := range batch.Values { - v := &batch.Values[i] - nullGen := getNullGen(v) - if nullGen == nullGenAllNull { - isAllNull = true - } - if nullGen != nullGenAllValid && v.IsArray() { - arrsWithNulls = append(arrsWithNulls, &v.Array) - } - } - - outBitmap := out.Buffers[0].Buf - if isAllNull { - // an all-null value gives us a short circuit opportunity - // output should all be null - out.Nulls = out.Len - if prealloc { - bitutil.SetBitsTo(outBitmap, out.Offset, out.Len, false) - return - } - - // walk all the values with nulls instead of breaking on the first - // in case we find a bitmap that can be reused in the non-preallocated case - for _, arr := range arrsWithNulls { - if arr.Nulls == arr.Len && arr.Buffers[0].Owner != nil { - buf := arr.GetBuffer(0) - buf.Retain() - out.Buffers[0].Buf = buf.Bytes() - out.Buffers[0].Owner = buf - return - } - } - - buf := ctx.AllocateBitmap(int64(out.Len)) - out.Buffers[0].Owner = buf - out.Buffers[0].Buf = buf.Bytes() - out.Buffers[0].SelfAlloc = true - bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, false) - return - } - - out.Nulls = array.UnknownNullCount - switch len(arrsWithNulls) { - case 0: - out.Nulls = 0 - if prealloc { - bitutil.SetBitsTo(outBitmap, out.Offset, out.Len, true) - } - case 1: - arr := arrsWithNulls[0] - out.Nulls = arr.Nulls - if prealloc { - bitutil.CopyBitmap(arr.Buffers[0].Buf, int(arr.Offset), int(arr.Len), outBitmap, int(out.Offset)) - return - } - - switch { - case arr.Offset == 0: - out.Buffers[0] = arr.Buffers[0] - out.Buffers[0].Owner.Retain() - case arr.Offset%8 == 0: - buf := memory.SliceBuffer(arr.GetBuffer(0), int(arr.Offset)/8, int(bitutil.BytesForBits(arr.Len))) - out.Buffers[0].Buf = buf.Bytes() - out.Buffers[0].Owner = buf - default: - buf := ctx.AllocateBitmap(int64(out.Len)) - out.Buffers[0].Owner = buf - out.Buffers[0].Buf = buf.Bytes() - out.Buffers[0].SelfAlloc = true - bitutil.CopyBitmap(arr.Buffers[0].Buf, int(arr.Offset), int(arr.Len), out.Buffers[0].Buf, 0) - } - return - - default: - if !prealloc { - buf := ctx.AllocateBitmap(int64(out.Len)) - out.Buffers[0].Owner = buf - out.Buffers[0].Buf = buf.Bytes() - out.Buffers[0].SelfAlloc = true - outBitmap = out.Buffers[0].Buf - } - - acc := func(left, right *exec.ArraySpan) { - debug.Assert(left.Buffers[0].Buf != nil, "invalid intersection for null propagation") - debug.Assert(right.Buffers[0].Buf != nil, "invalid intersection for null propagation") - bitutil.BitmapAnd(left.Buffers[0].Buf, right.Buffers[0].Buf, left.Offset, right.Offset, outBitmap, out.Offset, out.Len) - } - - acc(arrsWithNulls[0], arrsWithNulls[1]) - for _, arr := range arrsWithNulls[2:] { - acc(out, arr) - } - } - return -} - -func inferBatchLength(values []Datum) (length int64, allSame bool) { - length, allSame = -1, true - areAllScalar := true - for _, arg := range values { - switch arg := arg.(type) { - case *ArrayDatum: - argLength := arg.Len() - if length < 0 { - length = argLength - } else { - if length != argLength { - allSame = false - return - } - } - areAllScalar = false - case *ChunkedDatum: - argLength := arg.Len() - if length < 0 { - length = argLength - } else { - if length != argLength { - allSame = false - return - } - } - areAllScalar = false - } - } - - if areAllScalar && len(values) > 0 { - length = 1 - } else if length < 0 { - length = 0 - } - allSame = true - return -} - -// KernelExecutor is the interface for all executors to initialize and -// call kernel execution functions on batches. -type KernelExecutor interface { - // Init must be called *after* the kernel's init method and any - // KernelState must be set into the KernelCtx *before* calling - // this Init method. This is to facilitate the case where - // Init may be expensive and does not need to be called - // again for each execution of the kernel. For example, - // the same lookup table can be re-used for all scanned batches - // in a dataset filter. - Init(*exec.KernelCtx, exec.KernelInitArgs) error - // Execute the kernel for the provided batch and pass the resulting - // Datum values to the provided channel. - Execute(context.Context, *ExecBatch, chan<- Datum) error - // WrapResults exists for the case where an executor wants to post process - // the batches of result datums. Such as creating a ChunkedArray from - // multiple output batches or so on. Results from individual batch - // executions should be read from the out channel, and WrapResults should - // return the final Datum result. - WrapResults(ctx context.Context, out <-chan Datum, chunkedArgs bool) Datum - // CheckResultType checks the actual result type against the resolved - // output type. If the types don't match an error is returned - CheckResultType(out Datum) error - // Clear resets the state in the executor so that it can be reused. - Clear() -} - -// the base implementation for executing non-aggregate kernels. -type nonAggExecImpl struct { - ctx *exec.KernelCtx - ectx ExecCtx - kernel exec.NonAggKernel - outType arrow.DataType - numOutBuf int - dataPrealloc []bufferPrealloc - preallocValidity bool -} - -func (e *nonAggExecImpl) Clear() { - e.ctx, e.kernel, e.outType = nil, nil, nil - if e.dataPrealloc != nil { - e.dataPrealloc = e.dataPrealloc[:0] - } -} - -func (e *nonAggExecImpl) Init(ctx *exec.KernelCtx, args exec.KernelInitArgs) (err error) { - e.ctx, e.kernel = ctx, args.Kernel.(exec.NonAggKernel) - e.outType, err = e.kernel.GetSig().OutType.Resolve(ctx, args.Inputs) - e.ectx = GetExecCtx(ctx.Ctx) - return -} - -func (e *nonAggExecImpl) prepareOutput(length int) *exec.ExecResult { - var nullCount int = array.UnknownNullCount - - if e.kernel.GetNullHandling() == exec.NullNoOutput { - nullCount = 0 - } - - output := &exec.ArraySpan{ - Type: e.outType, - Len: int64(length), - Nulls: int64(nullCount), - } - - if e.preallocValidity { - buf := e.ctx.AllocateBitmap(int64(length)) - output.Buffers[0].Owner = buf - output.Buffers[0].Buf = buf.Bytes() - output.Buffers[0].SelfAlloc = true - } - - for i, pre := range e.dataPrealloc { - if pre.bitWidth >= 0 { - buf := allocateDataBuffer(e.ctx, length+pre.addLen, pre.bitWidth) - output.Buffers[i+1].Owner = buf - output.Buffers[i+1].Buf = buf.Bytes() - output.Buffers[i+1].SelfAlloc = true - } - } - - return output -} - -func (e *nonAggExecImpl) CheckResultType(out Datum) error { - typ := out.(ArrayLikeDatum).Type() - if typ != nil && !arrow.TypeEqual(e.outType, typ) { - return fmt.Errorf("%w: kernel type result mismatch: declared as %s, actual is %s", - arrow.ErrType, e.outType, typ) - } - return nil -} - -type spanIterator func() (exec.ExecSpan, int64, bool) - -func NewScalarExecutor() KernelExecutor { return &scalarExecutor{} } - -type scalarExecutor struct { - nonAggExecImpl - - elideValidityBitmap bool - preallocAllBufs bool - preallocContiguous bool - allScalars bool - iter spanIterator - iterLen int64 -} - -func (s *scalarExecutor) Execute(ctx context.Context, batch *ExecBatch, data chan<- Datum) (err error) { - s.allScalars, s.iter, err = iterateExecSpans(batch, s.ectx.ChunkSize, true) - if err != nil { - return - } - - s.iterLen = batch.Len - - if batch.Len == 0 { - result := array.MakeArrayOfNull(exec.GetAllocator(s.ctx.Ctx), s.outType, 0) - defer result.Release() - out := &exec.ArraySpan{} - out.SetMembers(result.Data()) - return s.emitResult(out, data) - } - - if err = s.setupPrealloc(batch.Len, batch.Values); err != nil { - return - } - - return s.executeSpans(data) -} - -func (s *scalarExecutor) WrapResults(ctx context.Context, out <-chan Datum, hasChunked bool) Datum { - var ( - output Datum - acc []arrow.Array - ) - - toChunked := func() { - acc = output.(ArrayLikeDatum).Chunks() - output.Release() - output = nil - } - - // get first output - select { - case <-ctx.Done(): - return nil - case output = <-out: - // if the inputs contained at least one chunked array - // then we want to return chunked output - if hasChunked { - toChunked() - } - } - - for { - select { - case <-ctx.Done(): - // context is done, either cancelled or a timeout. - // either way, we end early and return what we've got so far. - return output - case o, ok := <-out: - if !ok { // channel closed, wrap it up - if output != nil { - return output - } - - for _, c := range acc { - defer c.Release() - } - - chkd := arrow.NewChunked(s.outType, acc) - defer chkd.Release() - return NewDatum(chkd) - } - - // if we get multiple batches of output, then we need - // to return it as a chunked array. - if acc == nil { - toChunked() - } - - defer o.Release() - if o.Len() == 0 { // skip any empty batches - continue - } - - acc = append(acc, o.(*ArrayDatum).MakeArray()) - } - } -} - -func (s *scalarExecutor) executeSpans(data chan<- Datum) (err error) { - var ( - input exec.ExecSpan - output exec.ExecResult - next bool - ) - - if s.preallocContiguous { - // make one big output alloc - prealloc := s.prepareOutput(int(s.iterLen)) - output = *prealloc - - output.Offset = 0 - var resultOffset int64 - var nextOffset int64 - for err == nil { - if input, nextOffset, next = s.iter(); !next { - break - } - output.SetSlice(resultOffset, input.Len) - err = s.executeSingleSpan(&input, &output) - resultOffset = nextOffset - } - if err != nil { - prealloc.Release() - return - } - - return s.emitResult(prealloc, data) - } - - // fully preallocating, but not contiguously - // we (maybe) preallocate only for the output of processing - // the current chunk - for err == nil { - if input, _, next = s.iter(); !next { - break - } - - output = *s.prepareOutput(int(input.Len)) - if err = s.executeSingleSpan(&input, &output); err != nil { - output.Release() - return - } - err = s.emitResult(&output, data) - } - - return -} - -func (s *scalarExecutor) executeSingleSpan(input *exec.ExecSpan, out *exec.ExecResult) error { - switch { - case out.Type.ID() == arrow.NULL: - out.Nulls = out.Len - case s.kernel.GetNullHandling() == exec.NullIntersection: - if !s.elideValidityBitmap { - propagateNulls(s.ctx, input, out) - } - case s.kernel.GetNullHandling() == exec.NullNoOutput: - out.Nulls = 0 - } - return s.kernel.Exec(s.ctx, input, out) -} - -func (s *scalarExecutor) setupPrealloc(totalLen int64, args []Datum) error { - s.numOutBuf = len(s.outType.Layout().Buffers) - outTypeID := s.outType.ID() - // default to no validity pre-allocation for the following cases: - // - Output Array is NullArray - // - kernel.NullHandling is ComputeNoPrealloc or OutputNotNull - s.preallocValidity = false - - if outTypeID != arrow.NULL { - switch s.kernel.GetNullHandling() { - case exec.NullComputedPrealloc: - s.preallocValidity = true - case exec.NullIntersection: - s.elideValidityBitmap = true - for _, a := range args { - nullGen := getNullGenDatum(a) == nullGenAllValid - s.elideValidityBitmap = s.elideValidityBitmap && nullGen - } - s.preallocValidity = !s.elideValidityBitmap - case exec.NullNoOutput: - s.elideValidityBitmap = true - } - } - - if s.kernel.GetMemAlloc() == exec.MemPrealloc { - s.dataPrealloc = addComputeDataPrealloc(s.outType, s.dataPrealloc) - } - - // validity bitmap either preallocated or elided, and all data buffers allocated - // this is basically only true for primitive types that are not dict-encoded - s.preallocAllBufs = - ((s.preallocValidity || s.elideValidityBitmap) && len(s.dataPrealloc) == (s.numOutBuf-1) && - !arrow.IsNested(outTypeID) && outTypeID != arrow.DICTIONARY) - - // contiguous prealloc only possible on non-nested types if all - // buffers are preallocated. otherwise we have to go chunk by chunk - // - // some kernels are also unable to write into sliced outputs, so - // we respect the kernel's attributes - s.preallocContiguous = - (s.ectx.PreallocContiguous && s.kernel.CanFillSlices() && - s.preallocAllBufs) - - return nil -} - -func (s *scalarExecutor) emitResult(resultData *exec.ArraySpan, data chan<- Datum) error { - var output Datum - if len(resultData.Buffers[0].Buf) != 0 { - resultData.UpdateNullCount() - } - if s.allScalars { - // we boxed scalar inputs as ArraySpan so now we have to unbox the output - arr := resultData.MakeArray() - defer arr.Release() - sc, err := scalar.GetScalar(arr, 0) - if err != nil { - return err - } - if r, ok := sc.(scalar.Releasable); ok { - defer r.Release() - } - output = NewDatum(sc) - } else { - d := resultData.MakeData() - defer d.Release() - output = NewDatum(d) - } - data <- output - return nil -} - -func checkAllIsValue(vals []Datum) error { - for _, v := range vals { - if !DatumIsValue(v) { - return fmt.Errorf("%w: tried executing function with non-value type: %s", - arrow.ErrInvalid, v) - } - } - return nil -} - -func checkIfAllScalar(batch *ExecBatch) bool { - for _, v := range batch.Values { - if v.Kind() != KindScalar { - return false - } - } - return batch.NumValues() > 0 -} - -// iterateExecSpans sets up and returns a function which can iterate a batch -// according to the chunk sizes. If the inputs contain chunked arrays, then -// we will find the min(chunk sizes, maxChunkSize) to ensure we return -// contiguous spans to execute on. -// -// the iteration function returns the next span to execute on, the current -// position in the full batch, and a boolean indicating whether or not -// a span was actually returned (there is data to process). -func iterateExecSpans(batch *ExecBatch, maxChunkSize int64, promoteIfAllScalar bool) (haveAllScalars bool, itr spanIterator, err error) { - if batch.NumValues() > 0 { - inferred, allArgsSame := inferBatchLength(batch.Values) - if inferred != batch.Len { - return false, nil, fmt.Errorf("%w: value lengths differed from execbatch length", arrow.ErrInvalid) - } - if !allArgsSame { - return false, nil, fmt.Errorf("%w: array args must all be the same length", arrow.ErrInvalid) - } - } - - var ( - args []Datum = batch.Values - haveChunked bool - chunkIdxes = make([]int, len(args)) - valuePositions = make([]int64, len(args)) - valueOffsets = make([]int64, len(args)) - pos, length int64 = 0, batch.Len - ) - haveAllScalars = checkIfAllScalar(batch) - maxChunkSize = exec.Min(length, maxChunkSize) - - span := exec.ExecSpan{Values: make([]exec.ExecValue, len(args)), Len: 0} - for i, a := range args { - switch arg := a.(type) { - case *ScalarDatum: - span.Values[i].Scalar = arg.Value - case *ArrayDatum: - span.Values[i].Array.SetMembers(arg.Value) - valueOffsets[i] = int64(arg.Value.Offset()) - case *ChunkedDatum: - // populate from first chunk - carr := arg.Value - if len(carr.Chunks()) > 0 { - arr := carr.Chunk(0).Data() - span.Values[i].Array.SetMembers(arr) - valueOffsets[i] = int64(arr.Offset()) - } else { - // fill as zero len - exec.FillZeroLength(carr.DataType(), &span.Values[i].Array) - } - haveChunked = true - } - } - - if haveAllScalars && promoteIfAllScalar { - exec.PromoteExecSpanScalars(span) - } - - nextChunkSpan := func(iterSz int64, span exec.ExecSpan) int64 { - for i := 0; i < len(args) && iterSz > 0; i++ { - // if the argument is not chunked, it's either a scalar or an array - // in which case it doesn't influence the size of the span - chunkedArg, ok := args[i].(*ChunkedDatum) - if !ok { - continue - } - - arg := chunkedArg.Value - if len(arg.Chunks()) == 0 { - iterSz = 0 - continue - } - - var curChunk arrow.Array - for { - curChunk = arg.Chunk(chunkIdxes[i]) - if valuePositions[i] == int64(curChunk.Len()) { - // chunk is zero-length, or was exhausted in the previous - // iteration, move to next chunk - chunkIdxes[i]++ - curChunk = arg.Chunk(chunkIdxes[i]) - span.Values[i].Array.SetMembers(curChunk.Data()) - valuePositions[i] = 0 - valueOffsets[i] = int64(curChunk.Data().Offset()) - continue - } - break - } - iterSz = exec.Min(int64(curChunk.Len())-valuePositions[i], iterSz) - } - return iterSz - } - - return haveAllScalars, func() (exec.ExecSpan, int64, bool) { - if pos == length { - return exec.ExecSpan{}, pos, false - } - - iterationSize := exec.Min(length-pos, maxChunkSize) - if haveChunked { - iterationSize = nextChunkSpan(iterationSize, span) - } - - span.Len = iterationSize - for i, a := range args { - if a.Kind() != KindScalar { - span.Values[i].Array.SetSlice(valuePositions[i]+valueOffsets[i], iterationSize) - valuePositions[i] += iterationSize - } - } - - pos += iterationSize - debug.Assert(pos <= length, "bad state for iteration exec span") - return span, pos, true - }, nil -} - -var ( - // have a pool of scalar executors to avoid excessive object creation - scalarExecPool = sync.Pool{ - New: func() any { return &scalarExecutor{} }, - } - vectorExecPool = sync.Pool{ - New: func() any { return &vectorExecutor{} }, - } -) - -func checkCanExecuteChunked(k *exec.VectorKernel) error { - if k.ExecChunked == nil { - return fmt.Errorf("%w: vector kernel cannot execute chunkwise and no chunked exec function defined", arrow.ErrInvalid) - } - - if k.NullHandling == exec.NullIntersection { - return fmt.Errorf("%w: null pre-propagation is unsupported for chunkedarray execution in vector kernels", arrow.ErrInvalid) - } - return nil -} - -type vectorExecutor struct { - nonAggExecImpl - - iter spanIterator - results []*exec.ArraySpan - iterLen int64 - - allScalars bool -} - -func (v *vectorExecutor) Execute(ctx context.Context, batch *ExecBatch, data chan<- Datum) (err error) { - final := v.kernel.(*exec.VectorKernel).Finalize - if final != nil { - if v.results == nil { - v.results = make([]*exec.ArraySpan, 0, 1) - } else { - v.results = v.results[:0] - } - } - // some vector kernels have a separate code path for handling chunked - // arrays (VectorKernel.ExecChunked) so we check for any chunked - // arrays. If we do and an ExecChunked function is defined - // then we call that. - hasChunked := haveChunkedArray(batch.Values) - v.numOutBuf = len(v.outType.Layout().Buffers) - v.preallocValidity = v.kernel.GetNullHandling() != exec.NullComputedNoPrealloc && - v.kernel.GetNullHandling() != exec.NullNoOutput - if v.kernel.GetMemAlloc() == exec.MemPrealloc { - v.dataPrealloc = addComputeDataPrealloc(v.outType, v.dataPrealloc) - } - - if v.kernel.(*exec.VectorKernel).CanExecuteChunkWise { - v.allScalars, v.iter, err = iterateExecSpans(batch, v.ectx.ChunkSize, true) - v.iterLen = batch.Len - - var ( - input exec.ExecSpan - next bool - ) - if v.iterLen == 0 { - input.Values = make([]exec.ExecValue, batch.NumValues()) - for i, v := range batch.Values { - exec.FillZeroLength(v.(ArrayLikeDatum).Type(), &input.Values[i].Array) - } - err = v.exec(&input, data) - } - for err == nil { - if input, _, next = v.iter(); !next { - break - } - err = v.exec(&input, data) - } - if err != nil { - return - } - } else { - // kernel cannot execute chunkwise. if we have any chunked arrays, - // then execchunked must be defined or we raise an error - if hasChunked { - if err = v.execChunked(batch, data); err != nil { - return - } - } else { - // no chunked arrays. we pack the args into an execspan - // and call regular exec code path - span := ExecSpanFromBatch(batch) - if checkIfAllScalar(batch) { - exec.PromoteExecSpanScalars(*span) - } - if err = v.exec(span, data); err != nil { - return - } - } - } - - if final != nil { - // intermediate results require post-processing after execution is - // completed (possibly involving some accumulated state) - output, err := final(v.ctx, v.results) - if err != nil { - return err - } - - for _, r := range output { - d := r.MakeData() - defer d.Release() - data <- NewDatum(d) - } - } - - return nil -} - -func (v *vectorExecutor) WrapResults(ctx context.Context, out <-chan Datum, hasChunked bool) Datum { - // if kernel doesn't output chunked, just grab the one output and return it - if !v.kernel.(*exec.VectorKernel).OutputChunked { - var output Datum - select { - case <-ctx.Done(): - return nil - case output = <-out: - } - - // we got an output datum, but let's wait for the channel to - // close so we don't have any race conditions - select { - case <-ctx.Done(): - output.Release() - return nil - case <-out: - return output - } - } - - // if execution yielded multiple chunks then the result is a chunked array - var ( - output Datum - acc []arrow.Array - ) - - toChunked := func() { - out := output.(ArrayLikeDatum).Chunks() - acc = make([]arrow.Array, 0, len(out)) - for _, o := range out { - if o.Len() > 0 { - acc = append(acc, o) - } - } - if output.Kind() != KindChunked { - output.Release() - } - output = nil - } - - // get first output - select { - case <-ctx.Done(): - return nil - case output = <-out: - if output == nil || ctx.Err() != nil { - return nil - } - - // if the inputs contained at least one chunked array - // then we want to return chunked output - if hasChunked { - toChunked() - } - } - - for { - select { - case <-ctx.Done(): - // context is done, either cancelled or a timeout. - // either way, we end early and return what we've got so far. - return output - case o, ok := <-out: - if !ok { // channel closed, wrap it up - if output != nil { - return output - } - - for _, c := range acc { - defer c.Release() - } - - chkd := arrow.NewChunked(v.outType, acc) - defer chkd.Release() - return NewDatum(chkd) - } - - // if we get multiple batches of output, then we need - // to return it as a chunked array. - if acc == nil { - toChunked() - } - - defer o.Release() - if o.Len() == 0 { // skip any empty batches - continue - } - - acc = append(acc, o.(*ArrayDatum).MakeArray()) - } - } -} - -func (v *vectorExecutor) exec(span *exec.ExecSpan, data chan<- Datum) (err error) { - out := v.prepareOutput(int(span.Len)) - if v.kernel.GetNullHandling() == exec.NullIntersection { - if err = propagateNulls(v.ctx, span, out); err != nil { - return - } - } - if err = v.kernel.Exec(v.ctx, span, out); err != nil { - return - } - return v.emitResult(out, data) -} - -func (v *vectorExecutor) emitResult(result *exec.ArraySpan, data chan<- Datum) (err error) { - if v.kernel.(*exec.VectorKernel).Finalize == nil { - d := result.MakeData() - defer d.Release() - data <- NewDatum(d) - } else { - v.results = append(v.results, result) - } - return nil -} - -func (v *vectorExecutor) execChunked(batch *ExecBatch, out chan<- Datum) error { - if err := checkCanExecuteChunked(v.kernel.(*exec.VectorKernel)); err != nil { - return err - } - - output := v.prepareOutput(int(batch.Len)) - input := make([]*arrow.Chunked, len(batch.Values)) - for i, v := range batch.Values { - switch val := v.(type) { - case *ArrayDatum: - chks := val.Chunks() - input[i] = arrow.NewChunked(val.Type(), chks) - chks[0].Release() - defer input[i].Release() - case *ChunkedDatum: - input[i] = val.Value - default: - return fmt.Errorf("%w: handling with exec chunked", arrow.ErrNotImplemented) - } - } - result, err := v.kernel.(*exec.VectorKernel).ExecChunked(v.ctx, input, output) - if err != nil { - return err - } - - if len(result) == 0 { - empty := output.MakeArray() - defer empty.Release() - out <- &ChunkedDatum{Value: arrow.NewChunked(output.Type, []arrow.Array{empty})} - return nil - } - - for _, r := range result { - if err := v.emitResult(r, out); err != nil { - return err - } - } - return nil -} diff --git a/go/arrow/compute/expression.go b/go/arrow/compute/expression.go deleted file mode 100644 index f6aadeda5634b..0000000000000 --- a/go/arrow/compute/expression.go +++ /dev/null @@ -1,904 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "bytes" - "encoding/hex" - "errors" - "fmt" - "hash/maphash" - "reflect" - "strconv" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/compute/internal/kernels" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/ipc" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" -) - -var hashSeed = maphash.MakeSeed() - -// Expression is an interface for mapping one datum to another. An expression -// is one of: -// -// A literal Datum -// A reference to a single (potentially nested) field of an input Datum -// A call to a compute function, with arguments specified by other Expressions -// -// Deprecated: use substrait-go expressions instead. -type Expression interface { - fmt.Stringer - // IsBound returns true if this expression has been bound to a particular - // Datum and/or Schema. - IsBound() bool - // IsScalarExpr returns true if this expression is composed only of scalar - // literals, field references and calls to scalar functions. - IsScalarExpr() bool - // IsNullLiteral returns true if this expression is a literal and entirely - // null. - IsNullLiteral() bool - // IsSatisfiable returns true if this expression could evaluate to true - IsSatisfiable() bool - // FieldRef returns a pointer to the underlying field reference, or nil if - // this expression is not a field reference. - FieldRef() *FieldRef - // Type returns the datatype this expression will evaluate to. - Type() arrow.DataType - - Hash() uint64 - Equals(Expression) bool - - // Release releases the underlying bound C++ memory that is allocated when - // a Bind is performed. Any bound expression should get released to ensure - // no memory leaks. - Release() -} - -func printDatum(datum Datum) string { - switch datum := datum.(type) { - case *ScalarDatum: - if !datum.Value.IsValid() { - return "null" - } - - switch datum.Type().ID() { - case arrow.STRING, arrow.LARGE_STRING: - return strconv.Quote(datum.Value.(scalar.BinaryScalar).String()) - case arrow.BINARY, arrow.FIXED_SIZE_BINARY, arrow.LARGE_BINARY: - return `"` + strings.ToUpper(hex.EncodeToString(datum.Value.(scalar.BinaryScalar).Data())) + `"` - } - - return datum.Value.String() - default: - return datum.String() - } -} - -// Literal is an expression denoting a literal Datum which could be any value -// as a scalar, an array, or so on. -// -// Deprecated: use substrait-go expressions Literal instead. -type Literal struct { - Literal Datum -} - -func (Literal) FieldRef() *FieldRef { return nil } -func (l *Literal) String() string { return printDatum(l.Literal) } -func (l *Literal) Type() arrow.DataType { return l.Literal.(ArrayLikeDatum).Type() } -func (l *Literal) IsBound() bool { return l.Type() != nil } -func (l *Literal) IsScalarExpr() bool { return l.Literal.Kind() == KindScalar } - -func (l *Literal) Equals(other Expression) bool { - if rhs, ok := other.(*Literal); ok { - return l.Literal.Equals(rhs.Literal) - } - return false -} - -func (l *Literal) IsNullLiteral() bool { - if ad, ok := l.Literal.(ArrayLikeDatum); ok { - return ad.NullN() == ad.Len() - } - return true -} - -func (l *Literal) IsSatisfiable() bool { - if l.IsNullLiteral() { - return false - } - - if sc, ok := l.Literal.(*ScalarDatum); ok && sc.Type().ID() == arrow.BOOL { - return sc.Value.(*scalar.Boolean).Value - } - - return true -} - -func (l *Literal) Hash() uint64 { - if l.IsScalarExpr() { - return scalar.Hash(hashSeed, l.Literal.(*ScalarDatum).Value) - } - return 0 -} - -func (l *Literal) Release() { - l.Literal.Release() -} - -// Parameter represents a field reference and needs to be bound in order to determine -// its type and shape. -// -// Deprecated: use substrait-go field references instead. -type Parameter struct { - ref *FieldRef - - // post bind props - dt arrow.DataType - index int -} - -func (Parameter) IsNullLiteral() bool { return false } -func (p *Parameter) Type() arrow.DataType { return p.dt } -func (p *Parameter) IsBound() bool { return p.Type() != nil } -func (p *Parameter) IsScalarExpr() bool { return p.ref != nil } -func (p *Parameter) IsSatisfiable() bool { return p.Type() == nil || p.Type().ID() != arrow.NULL } -func (p *Parameter) FieldRef() *FieldRef { return p.ref } -func (p *Parameter) Hash() uint64 { return p.ref.Hash(hashSeed) } - -func (p *Parameter) String() string { - switch { - case p.ref.IsName(): - return p.ref.Name() - case p.ref.IsFieldPath(): - return p.ref.FieldPath().String() - default: - return p.ref.String() - } -} - -func (p *Parameter) Equals(other Expression) bool { - if rhs, ok := other.(*Parameter); ok { - return p.ref.Equals(*rhs.ref) - } - - return false -} - -func (p *Parameter) Release() {} - -type comparisonType int8 - -const ( - compNA comparisonType = 0 - compEQ comparisonType = 1 - compLT comparisonType = 2 - compGT comparisonType = 4 - compNE comparisonType = compLT | compGT - compLE comparisonType = compLT | compEQ - compGE comparisonType = compGT | compEQ -) - -//lint:ignore U1000 ignore that this is unused for now -func (c comparisonType) name() string { - switch c { - case compEQ: - return "equal" - case compLT: - return "less" - case compGT: - return "greater" - case compNE: - return "not_equal" - case compLE: - return "less_equal" - case compGE: - return "greater_equal" - } - return "na" -} - -func (c comparisonType) getOp() string { - switch c { - case compEQ: - return "==" - case compLT: - return "<" - case compGT: - return ">" - case compNE: - return "!=" - case compLE: - return "<=" - case compGE: - return ">=" - } - debug.Assert(false, "invalid getop") - return "" -} - -var compmap = map[string]comparisonType{ - "equal": compEQ, - "less": compLT, - "greater": compGT, - "not_equal": compNE, - "less_equal": compLE, - "greater_equal": compGE, -} - -func optionsToString(fn FunctionOptions) string { - if s, ok := fn.(fmt.Stringer); ok { - return s.String() - } - - var b strings.Builder - v := reflect.Indirect(reflect.ValueOf(fn)) - b.WriteByte('{') - for i := 0; i < v.Type().NumField(); i++ { - fld := v.Type().Field(i) - tag := fld.Tag.Get("compute") - if tag == "-" { - continue - } - - fldVal := v.Field(i) - fmt.Fprintf(&b, "%s=%v, ", tag, fldVal.Interface()) - } - ret := b.String() - return ret[:len(ret)-2] + "}" -} - -// Call is a function call with specific arguments which are themselves other -// expressions. A call can also have options that are specific to the function -// in question. It must be bound to determine the shape and type. -// -// Deprecated: use substrait-go expression functions instead. -type Call struct { - funcName string - args []Expression - dt arrow.DataType - options FunctionOptions - - cachedHash uint64 -} - -func (c *Call) IsNullLiteral() bool { return false } -func (c *Call) FieldRef() *FieldRef { return nil } -func (c *Call) Type() arrow.DataType { return c.dt } -func (c *Call) IsSatisfiable() bool { return c.Type() == nil || c.Type().ID() != arrow.NULL } - -func (c *Call) String() string { - binary := func(op string) string { - return "(" + c.args[0].String() + " " + op + " " + c.args[1].String() + ")" - } - - if cmp, ok := compmap[c.funcName]; ok { - return binary(cmp.getOp()) - } - - const kleene = "_kleene" - if strings.HasSuffix(c.funcName, kleene) { - return binary(strings.TrimSuffix(c.funcName, kleene)) - } - - if c.funcName == "make_struct" && c.options != nil { - opts := c.options.(*MakeStructOptions) - out := "{" - for i, a := range c.args { - out += opts.FieldNames[i] + "=" + a.String() + ", " - } - return out[:len(out)-2] + "}" - } - - var b strings.Builder - b.WriteString(c.funcName + "(") - for _, a := range c.args { - b.WriteString(a.String() + ", ") - } - - if c.options != nil { - b.WriteString(optionsToString(c.options)) - b.WriteString(" ") - } - - ret := b.String() - return ret[:len(ret)-2] + ")" -} - -func (c *Call) Hash() uint64 { - if c.cachedHash != 0 { - return c.cachedHash - } - - var h maphash.Hash - h.SetSeed(hashSeed) - - h.WriteString(c.funcName) - c.cachedHash = h.Sum64() - for _, arg := range c.args { - c.cachedHash = exec.HashCombine(c.cachedHash, arg.Hash()) - } - return c.cachedHash -} - -func (c *Call) IsScalarExpr() bool { - for _, arg := range c.args { - if !arg.IsScalarExpr() { - return false - } - } - - return false - // return isFuncScalar(c.funcName) -} - -func (c *Call) IsBound() bool { - return c.Type() != nil -} - -func (c *Call) Equals(other Expression) bool { - rhs, ok := other.(*Call) - if !ok { - return false - } - - if c.funcName != rhs.funcName || len(c.args) != len(rhs.args) { - return false - } - - for i := range c.args { - if !c.args[i].Equals(rhs.args[i]) { - return false - } - } - - if opt, ok := c.options.(FunctionOptionsEqual); ok { - return opt.Equals(rhs.options) - } - return reflect.DeepEqual(c.options, rhs.options) -} - -func (c *Call) Release() { - for _, a := range c.args { - a.Release() - } - if r, ok := c.options.(releasable); ok { - r.Release() - } -} - -// FunctionOptions can be any type which has a TypeName function. The fields -// of the type will be used (via reflection) to determine the information to -// propagate when serializing to pass to the C++ for execution. -type FunctionOptions interface { - TypeName() string -} - -type FunctionOptionsEqual interface { - Equals(FunctionOptions) bool -} - -type FunctionOptionsCloneable interface { - Clone() FunctionOptions -} - -type MakeStructOptions struct { - FieldNames []string `compute:"field_names"` - FieldNullability []bool `compute:"field_nullability"` - FieldMetadata []*arrow.Metadata `compute:"field_metadata"` -} - -func (MakeStructOptions) TypeName() string { return "MakeStructOptions" } - -type NullOptions struct { - NanIsNull bool `compute:"nan_is_null"` -} - -func (NullOptions) TypeName() string { return "NullOptions" } - -type StrptimeOptions struct { - Format string `compute:"format"` - Unit arrow.TimeUnit `compute:"unit"` -} - -func (StrptimeOptions) TypeName() string { return "StrptimeOptions" } - -type NullSelectionBehavior = kernels.NullSelectionBehavior - -const ( - SelectionEmitNulls = kernels.EmitNulls - SelectionDropNulls = kernels.DropNulls -) - -type ArithmeticOptions struct { - NoCheckOverflow bool `compute:"check_overflow"` -} - -func (ArithmeticOptions) TypeName() string { return "ArithmeticOptions" } - -type ( - CastOptions = kernels.CastOptions - FilterOptions = kernels.FilterOptions - TakeOptions = kernels.TakeOptions -) - -func DefaultFilterOptions() *FilterOptions { return &FilterOptions{} } - -func DefaultTakeOptions() *TakeOptions { return &TakeOptions{BoundsCheck: true} } - -func DefaultCastOptions(safe bool) *CastOptions { - if safe { - return &CastOptions{} - } - return &CastOptions{ - AllowIntOverflow: true, - AllowTimeTruncate: true, - AllowTimeOverflow: true, - AllowDecimalTruncate: true, - AllowFloatTruncate: true, - AllowInvalidUtf8: true, - } -} - -func UnsafeCastOptions(dt arrow.DataType) *CastOptions { - return NewCastOptions(dt, false) -} - -func SafeCastOptions(dt arrow.DataType) *CastOptions { - return NewCastOptions(dt, true) -} - -func NewCastOptions(dt arrow.DataType, safe bool) *CastOptions { - opts := DefaultCastOptions(safe) - if dt != nil { - opts.ToType = dt - } else { - opts.ToType = arrow.Null - } - return opts -} - -func Cast(ex Expression, dt arrow.DataType) Expression { - opts := &CastOptions{} - if dt == nil { - opts.ToType = arrow.Null - } else { - opts.ToType = dt - } - - return NewCall("cast", []Expression{ex}, opts) -} - -type SetLookupOptions struct { - ValueSet Datum `compute:"value_set"` - SkipNulls bool `compute:"skip_nulls"` -} - -func (SetLookupOptions) TypeName() string { return "SetLookupOptions" } - -func (s *SetLookupOptions) Release() { s.ValueSet.Release() } - -func (s *SetLookupOptions) Equals(other FunctionOptions) bool { - rhs, ok := other.(*SetLookupOptions) - if !ok { - return false - } - - return s.SkipNulls == rhs.SkipNulls && s.ValueSet.Equals(rhs.ValueSet) -} - -func (s *SetLookupOptions) FromStructScalar(sc *scalar.Struct) error { - if v, err := sc.Field("skip_nulls"); err == nil { - s.SkipNulls = v.(*scalar.Boolean).Value - } - - value, err := sc.Field("value_set") - if err != nil { - return err - } - - if v, ok := value.(scalar.ListScalar); ok { - s.ValueSet = NewDatum(v.GetList()) - return nil - } - - return errors.New("set lookup options valueset should be a list") -} - -var ( - funcOptionsMap map[string]reflect.Type - funcOptsTypes = []FunctionOptions{ - SetLookupOptions{}, ArithmeticOptions{}, CastOptions{}, - FilterOptions{}, NullOptions{}, StrptimeOptions{}, MakeStructOptions{}, - } -) - -func init() { - funcOptionsMap = make(map[string]reflect.Type) - for _, ft := range funcOptsTypes { - funcOptionsMap[ft.TypeName()] = reflect.TypeOf(ft) - } -} - -// NewLiteral constructs a new literal expression from any value. It is passed -// to NewDatum which will construct the appropriate Datum and/or scalar -// value for the type provided. -func NewLiteral(arg interface{}) Expression { - return &Literal{Literal: NewDatum(arg)} -} - -func NullLiteral(dt arrow.DataType) Expression { - return &Literal{Literal: NewDatum(scalar.MakeNullScalar(dt))} -} - -// NewRef constructs a parameter expression which refers to a specific field -func NewRef(ref FieldRef) Expression { - return &Parameter{ref: &ref, index: -1} -} - -// NewFieldRef is shorthand for NewRef(FieldRefName(field)) -func NewFieldRef(field string) Expression { - return NewRef(FieldRefName(field)) -} - -// NewCall constructs an expression that represents a specific function call with -// the given arguments and options. -func NewCall(name string, args []Expression, opts FunctionOptions) Expression { - return &Call{funcName: name, args: args, options: opts} -} - -// Project is shorthand for `make_struct` to produce a record batch output -// from a group of expressions. -func Project(values []Expression, names []string) Expression { - nulls := make([]bool, len(names)) - for i := range nulls { - nulls[i] = true - } - meta := make([]*arrow.Metadata, len(names)) - return NewCall("make_struct", values, - &MakeStructOptions{FieldNames: names, FieldNullability: nulls, FieldMetadata: meta}) -} - -// Equal is a convenience function for the equal function -func Equal(lhs, rhs Expression) Expression { - return NewCall("equal", []Expression{lhs, rhs}, nil) -} - -// NotEqual creates a call to not_equal -func NotEqual(lhs, rhs Expression) Expression { - return NewCall("not_equal", []Expression{lhs, rhs}, nil) -} - -// Less is shorthand for NewCall("less",....) -func Less(lhs, rhs Expression) Expression { - return NewCall("less", []Expression{lhs, rhs}, nil) -} - -// LessEqual is shorthand for NewCall("less_equal",....) -func LessEqual(lhs, rhs Expression) Expression { - return NewCall("less_equal", []Expression{lhs, rhs}, nil) -} - -// Greater is shorthand for NewCall("greater",....) -func Greater(lhs, rhs Expression) Expression { - return NewCall("greater", []Expression{lhs, rhs}, nil) -} - -// GreaterEqual is shorthand for NewCall("greater_equal",....) -func GreaterEqual(lhs, rhs Expression) Expression { - return NewCall("greater_equal", []Expression{lhs, rhs}, nil) -} - -// IsNull creates an expression that returns true if the passed in expression is -// null. Optionally treating NaN as null if desired. -func IsNull(lhs Expression, nanIsNull bool) Expression { - return NewCall("less", []Expression{lhs}, &NullOptions{nanIsNull}) -} - -// IsValid is the inverse of IsNull -func IsValid(lhs Expression) Expression { - return NewCall("is_valid", []Expression{lhs}, nil) -} - -type binop func(lhs, rhs Expression) Expression - -func foldLeft(op binop, args ...Expression) Expression { - switch len(args) { - case 0: - return nil - case 1: - return args[0] - } - - folded := args[0] - for _, a := range args[1:] { - folded = op(folded, a) - } - return folded -} - -func and(lhs, rhs Expression) Expression { - return NewCall("and_kleene", []Expression{lhs, rhs}, nil) -} - -// And constructs a tree of calls to and_kleene for boolean And logic taking -// an arbitrary number of values. -func And(lhs, rhs Expression, ops ...Expression) Expression { - folded := foldLeft(and, append([]Expression{lhs, rhs}, ops...)...) - if folded != nil { - return folded - } - return NewLiteral(true) -} - -func or(lhs, rhs Expression) Expression { - return NewCall("or_kleene", []Expression{lhs, rhs}, nil) -} - -// Or constructs a tree of calls to or_kleene for boolean Or logic taking -// an arbitrary number of values. -func Or(lhs, rhs Expression, ops ...Expression) Expression { - folded := foldLeft(or, append([]Expression{lhs, rhs}, ops...)...) - if folded != nil { - return folded - } - return NewLiteral(false) -} - -// Not creates a call to "invert" for the value specified. -func Not(expr Expression) Expression { - return NewCall("invert", []Expression{expr}, nil) -} - -func SerializeOptions(opts FunctionOptions, mem memory.Allocator) (*memory.Buffer, error) { - sc, err := scalar.ToScalar(opts, mem) - if err != nil { - return nil, err - } - if sc, ok := sc.(releasable); ok { - defer sc.Release() - } - - arr, err := scalar.MakeArrayFromScalar(sc, 1, mem) - if err != nil { - return nil, err - } - defer arr.Release() - - batch := array.NewRecord(arrow.NewSchema([]arrow.Field{{Type: arr.DataType(), Nullable: true}}, nil), []arrow.Array{arr}, 1) - defer batch.Release() - - buf := &bufferWriteSeeker{mem: mem} - wr, err := ipc.NewFileWriter(buf, ipc.WithSchema(batch.Schema()), ipc.WithAllocator(mem)) - if err != nil { - return nil, err - } - - wr.Write(batch) - wr.Close() - return buf.buf, nil -} - -// SerializeExpr serializes expressions by converting them to Metadata and -// storing this in the schema of a Record. Embedded arrays and scalars are -// stored in its columns. Finally the record is written as an IPC file -func SerializeExpr(expr Expression, mem memory.Allocator) (*memory.Buffer, error) { - var ( - cols []arrow.Array - metaKey []string - metaValue []string - visit func(Expression) error - ) - - addScalar := func(s scalar.Scalar) (string, error) { - ret := len(cols) - arr, err := scalar.MakeArrayFromScalar(s, 1, mem) - if err != nil { - return "", err - } - cols = append(cols, arr) - return strconv.Itoa(ret), nil - } - - visit = func(e Expression) error { - switch e := e.(type) { - case *Literal: - if !e.IsScalarExpr() { - return errors.New("not implemented: serialization of non-scalar literals") - } - metaKey = append(metaKey, "literal") - s, err := addScalar(e.Literal.(*ScalarDatum).Value) - if err != nil { - return err - } - metaValue = append(metaValue, s) - case *Parameter: - if e.ref.Name() == "" { - return errors.New("not implemented: serialization of non-name field_ref") - } - - metaKey = append(metaKey, "field_ref") - metaValue = append(metaValue, e.ref.Name()) - case *Call: - metaKey = append(metaKey, "call") - metaValue = append(metaValue, e.funcName) - - for _, arg := range e.args { - visit(arg) - } - - if e.options != nil { - st, err := scalar.ToScalar(e.options, mem) - if err != nil { - return err - } - metaKey = append(metaKey, "options") - s, err := addScalar(st) - if err != nil { - return err - } - metaValue = append(metaValue, s) - - for _, f := range st.(*scalar.Struct).Value { - switch s := f.(type) { - case releasable: - defer s.Release() - } - } - } - - metaKey = append(metaKey, "end") - metaValue = append(metaValue, e.funcName) - } - return nil - } - - if err := visit(expr); err != nil { - return nil, err - } - - fields := make([]arrow.Field, len(cols)) - for i, c := range cols { - fields[i].Type = c.DataType() - defer c.Release() - } - - metadata := arrow.NewMetadata(metaKey, metaValue) - rec := array.NewRecord(arrow.NewSchema(fields, &metadata), cols, 1) - defer rec.Release() - - buf := &bufferWriteSeeker{mem: mem} - wr, err := ipc.NewFileWriter(buf, ipc.WithSchema(rec.Schema()), ipc.WithAllocator(mem)) - if err != nil { - return nil, err - } - - wr.Write(rec) - wr.Close() - return buf.buf, nil -} - -func DeserializeExpr(mem memory.Allocator, buf *memory.Buffer) (Expression, error) { - rdr, err := ipc.NewFileReader(bytes.NewReader(buf.Bytes()), ipc.WithAllocator(mem)) - if err != nil { - return nil, err - } - defer rdr.Close() - - batch, err := rdr.Read() - if err != nil { - return nil, err - } - - if !batch.Schema().HasMetadata() { - return nil, errors.New("serialized Expression's batch repr had no metadata") - } - - if batch.NumRows() != 1 { - return nil, fmt.Errorf("serialized Expression's batch repr was not a single row - had %d", batch.NumRows()) - } - - var ( - getone func() (Expression, error) - index int = 0 - metadata = batch.Schema().Metadata() - ) - - getscalar := func(i string) (scalar.Scalar, error) { - colIndex, err := strconv.ParseInt(i, 10, 32) - if err != nil { - return nil, err - } - if colIndex >= batch.NumCols() { - return nil, errors.New("column index out of bounds") - } - return scalar.GetScalar(batch.Column(int(colIndex)), 0) - } - - getone = func() (Expression, error) { - if index >= metadata.Len() { - return nil, errors.New("unterminated serialized Expression") - } - - key, val := metadata.Keys()[index], metadata.Values()[index] - index++ - - switch key { - case "literal": - scalar, err := getscalar(val) - if err != nil { - return nil, err - } - if r, ok := scalar.(releasable); ok { - defer r.Release() - } - return NewLiteral(scalar), err - case "field_ref": - return NewFieldRef(val), nil - case "call": - args := make([]Expression, 0) - for metadata.Keys()[index] != "end" { - if metadata.Keys()[index] == "options" { - optsScalar, err := getscalar(metadata.Values()[index]) - if err != nil { - return nil, err - } - if r, ok := optsScalar.(releasable); ok { - defer r.Release() - } - var opts FunctionOptions - if optsScalar != nil { - typname, err := optsScalar.(*scalar.Struct).Field("_type_name") - if err != nil { - return nil, err - } - if typname.DataType().ID() != arrow.BINARY { - return nil, errors.New("options scalar typename must be binary") - } - - optionsVal := reflect.New(funcOptionsMap[string(typname.(*scalar.Binary).Data())]).Interface() - if err := scalar.FromScalar(optsScalar.(*scalar.Struct), optionsVal); err != nil { - return nil, err - } - opts = optionsVal.(FunctionOptions) - } - index += 2 - return NewCall(val, args, opts), nil - } - - arg, err := getone() - if err != nil { - return nil, err - } - args = append(args, arg) - } - index++ - return NewCall(val, args, nil), nil - default: - return nil, fmt.Errorf("unrecognized serialized Expression key %s", key) - } - } - - return getone() -} diff --git a/go/arrow/compute/expression_test.go b/go/arrow/compute/expression_test.go deleted file mode 100644 index 1898bb3dc92b2..0000000000000 --- a/go/arrow/compute/expression_test.go +++ /dev/null @@ -1,259 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//go:build go1.18 - -package compute_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/stretchr/testify/assert" -) - -func TestExpressionToString(t *testing.T) { - ts, _ := scalar.MakeScalar("1990-10-23 10:23:33.123456").CastTo(arrow.FixedWidthTypes.Timestamp_ns) - - add := compute.NewCall("add", []compute.Expression{compute.NewFieldRef("beta"), compute.NewLiteral(3)}, &compute.ArithmeticOptions{}) - - tests := []struct { - expr compute.Expression - expected string - }{ - {compute.NewFieldRef("alpha"), "alpha"}, - {compute.NewLiteral(3), "3"}, - {compute.NewLiteral("a"), `"a"`}, - {compute.NewLiteral("a\nb"), `"a\nb"`}, - {compute.NewLiteral(&scalar.Boolean{}), "null"}, - {compute.NewLiteral(&scalar.Int64{}), "null"}, - {compute.NewLiteral(scalar.NewBinaryScalar(memory.NewBufferBytes([]byte("az")), - arrow.BinaryTypes.Binary)), `"617A"`}, - {compute.NewLiteral(ts), "1990-10-23 10:23:33.123456"}, - {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewFieldRef("beta")}, nil), "add(3, beta)"}, - {compute.And(compute.NewFieldRef("a"), compute.NewFieldRef("b")), "(a and b)"}, - {compute.Or(compute.NewFieldRef("a"), compute.NewFieldRef("b")), "(a or b)"}, - {compute.Not(compute.NewFieldRef("a")), "invert(a)"}, - {compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), - "cast(a, {to_type=int32, allow_int_overflow=false, allow_time_truncate=false, " + - "allow_time_overflow=false, allow_decimal_truncate=false, " + - "allow_float_truncate=false, allow_invalid_utf8=false})"}, - {compute.Cast(compute.NewFieldRef("a"), nil), - "cast(a, {to_type=null, allow_int_overflow=false, allow_time_truncate=false, " + - "allow_time_overflow=false, allow_decimal_truncate=false, " + - "allow_float_truncate=false, allow_invalid_utf8=false})"}, - {compute.Equal(compute.NewFieldRef("a"), compute.NewLiteral(1)), "(a == 1)"}, - {compute.Less(compute.NewFieldRef("a"), compute.NewLiteral(2)), "(a < 2)"}, - {compute.Greater(compute.NewFieldRef("a"), compute.NewLiteral(3)), "(a > 3)"}, - {compute.NotEqual(compute.NewFieldRef("a"), compute.NewLiteral("a")), `(a != "a")`}, - {compute.LessEqual(compute.NewFieldRef("a"), compute.NewLiteral("b")), `(a <= "b")`}, - {compute.GreaterEqual(compute.NewFieldRef("a"), compute.NewLiteral("c")), `(a >= "c")`}, - {compute.Project( - []compute.Expression{ - compute.NewFieldRef("a"), compute.NewFieldRef("a"), compute.NewLiteral(3), add, - }, []string{"a", "renamed_a", "three", "b"}), - "{a=a, renamed_a=a, three=3, b=" + add.String() + "}"}, - } - - for _, tt := range tests { - t.Run(tt.expected, func(t *testing.T) { - assert.Equal(t, tt.expected, tt.expr.String()) - }) - } -} - -func TestExpressionEquality(t *testing.T) { - tests := []struct { - exp1 compute.Expression - exp2 compute.Expression - equal bool - }{ - {compute.NewLiteral(1), compute.NewLiteral(1), true}, - {compute.NewLiteral(1), compute.NewLiteral(2), false}, - {compute.NewFieldRef("a"), compute.NewFieldRef("a"), true}, - {compute.NewFieldRef("a"), compute.NewFieldRef("b"), false}, - {compute.NewFieldRef("a"), compute.NewLiteral(2), false}, - {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, nil), - compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, nil), true}, - {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, nil), - compute.NewCall("add", []compute.Expression{compute.NewLiteral(2), compute.NewLiteral("a")}, nil), false}, - {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, nil), - compute.NewCall("add", []compute.Expression{compute.NewFieldRef("a"), compute.NewLiteral(3)}, nil), false}, - {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, &compute.ArithmeticOptions{true}), - compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, &compute.ArithmeticOptions{true}), true}, - {compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, &compute.ArithmeticOptions{true}), - compute.NewCall("add", []compute.Expression{compute.NewLiteral(3), compute.NewLiteral("a")}, &compute.ArithmeticOptions{false}), false}, - {compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), true}, - {compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int64), false}, - {compute.Cast(compute.NewFieldRef("a"), arrow.PrimitiveTypes.Int32), compute.NewCall("cast", []compute.Expression{compute.NewFieldRef("a")}, compute.NewCastOptions(arrow.PrimitiveTypes.Int32, false)), false}, - } - - for _, tt := range tests { - t.Run(tt.exp1.String(), func(t *testing.T) { - assert.Equal(t, tt.equal, tt.exp1.Equals(tt.exp2)) - }) - } -} - -func TestExpressionHashing(t *testing.T) { - set := make(map[uint64]compute.Expression) - - e := compute.NewFieldRef("alpha") - set[e.Hash()] = e - - e = compute.NewFieldRef("beta") - _, ok := set[e.Hash()] - assert.False(t, ok) - set[e.Hash()] = e - - e = compute.NewFieldRef("beta") - ex, ok := set[e.Hash()] - assert.True(t, ok) - assert.True(t, e.Equals(ex)) - - e = compute.NewLiteral(1) - set[e.Hash()] = e - _, ok = set[compute.NewLiteral(1).Hash()] - assert.True(t, ok) - _, ok = set[compute.NewLiteral(3).Hash()] - assert.False(t, ok) - set[compute.NewLiteral(3).Hash()] = compute.NewLiteral(3) - - e = compute.NullLiteral(arrow.PrimitiveTypes.Int32) - set[e.Hash()] = e - _, ok = set[compute.NullLiteral(arrow.PrimitiveTypes.Int32).Hash()] - assert.True(t, ok) - e = compute.NullLiteral(arrow.PrimitiveTypes.Float32) - _, ok = set[e.Hash()] - assert.False(t, ok) - set[e.Hash()] = e - - e = compute.NewCall("add", []compute.Expression{}, nil) - set[e.Hash()] = e - _, ok = set[compute.NewCall("add", nil, nil).Hash()] - assert.True(t, ok) - e = compute.NewCall("widgetify", nil, nil) - _, ok = set[e.Hash()] - assert.False(t, ok) - set[e.Hash()] = e - - assert.Len(t, set, 8) -} - -func TestIsScalarExpression(t *testing.T) { - assert.True(t, compute.NewLiteral(true).IsScalarExpr()) - arr := array.MakeFromData(array.NewData(arrow.PrimitiveTypes.Int8, 0, []*memory.Buffer{nil, nil}, nil, 0, 0)) - defer arr.Release() - - assert.False(t, compute.NewLiteral(arr).IsScalarExpr()) - assert.True(t, compute.NewFieldRef("a").IsScalarExpr()) -} - -func TestExpressionIsSatisfiable(t *testing.T) { - assert.True(t, compute.NewLiteral(true).IsSatisfiable()) - assert.False(t, compute.NewLiteral(false).IsSatisfiable()) - - null := scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean) - assert.False(t, compute.NewLiteral(null).IsSatisfiable()) - assert.True(t, compute.NewFieldRef("a").IsSatisfiable()) - assert.True(t, compute.Equal(compute.NewFieldRef("a"), compute.NewLiteral(1)).IsSatisfiable()) - // no constant folding here - assert.True(t, compute.Equal(compute.NewLiteral(0), compute.NewLiteral(1)).IsSatisfiable()) - - // when a top level conjunction contains an Expression which is certain to - // evaluate to null, it can only evaluate to null or false - neverTrue := compute.And(compute.NewLiteral(null), compute.NewFieldRef("a")) - // this may appear in satisfiable filters if coalesced (for example, wrapped in fill_na) - assert.True(t, compute.NewCall("is_null", []compute.Expression{neverTrue}, nil).IsSatisfiable()) -} - -func TestExpressionSerializationRoundTrip(t *testing.T) { - bldr := array.NewInt32Builder(memory.DefaultAllocator) - defer bldr.Release() - - bldr.AppendValues([]int32{1, 2, 3}, nil) - lookupArr := bldr.NewArray() - defer lookupArr.Release() - - intvalueset := compute.NewDatum(lookupArr) - defer intvalueset.Release() - - bldr2 := array.NewFloat64Builder(memory.DefaultAllocator) - defer bldr2.Release() - - bldr2.AppendValues([]float64{0.5, 1.0, 2.0}, nil) - lookupArr = bldr2.NewArray() - defer lookupArr.Release() - - fltvalueset := compute.NewDatum(lookupArr) - defer fltvalueset.Release() - - tests := []struct { - name string - expr compute.Expression - }{ - {"null literal", compute.NewLiteral(scalar.MakeNullScalar(arrow.Null))}, - {"null int32 literal", compute.NewLiteral(scalar.MakeNullScalar(arrow.PrimitiveTypes.Int32))}, - {"null struct literal", compute.NewLiteral(scalar.MakeNullScalar(arrow.StructOf( - arrow.Field{Name: "i", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - arrow.Field{Name: "s", Type: arrow.BinaryTypes.String, Nullable: true}, - )))}, - {"literal true", compute.NewLiteral(true)}, - {"literal false", compute.NewLiteral(false)}, - {"literal int", compute.NewLiteral(1)}, - {"literal float", compute.NewLiteral(1.125)}, - {"stringy strings", compute.NewLiteral("stringy strings")}, - {"field ref", compute.NewFieldRef("field")}, - {"greater", compute.Greater(compute.NewFieldRef("a"), compute.NewLiteral(0.25))}, - {"or", compute.Or( - compute.Equal(compute.NewFieldRef("a"), compute.NewLiteral(1)), - compute.NotEqual(compute.NewFieldRef("b"), compute.NewLiteral("hello")), - compute.Equal(compute.NewFieldRef("b"), compute.NewLiteral("foo bar")))}, - {"not", compute.Not(compute.NewFieldRef("alpha"))}, - {"is_in", compute.NewCall("is_in", []compute.Expression{compute.NewLiteral(1)}, &compute.SetLookupOptions{ValueSet: intvalueset})}, - {"is_in cast", compute.NewCall("is_in", []compute.Expression{ - compute.NewCall("cast", []compute.Expression{compute.NewFieldRef("version")}, compute.NewCastOptions(arrow.PrimitiveTypes.Float64, true))}, - &compute.SetLookupOptions{ValueSet: fltvalueset})}, - {"is valid", compute.IsValid(compute.NewFieldRef("validity"))}, - {"lots and", compute.And( - compute.And( - compute.GreaterEqual(compute.NewFieldRef("x"), compute.NewLiteral(-1.5)), - compute.Less(compute.NewFieldRef("x"), compute.NewLiteral(0.0))), - compute.And(compute.GreaterEqual(compute.NewFieldRef("y"), compute.NewLiteral(0.0)), - compute.Less(compute.NewFieldRef("y"), compute.NewLiteral(1.5))), - compute.And(compute.Greater(compute.NewFieldRef("z"), compute.NewLiteral(1.5)), - compute.LessEqual(compute.NewFieldRef("z"), compute.NewLiteral(3.0))))}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - serialized, err := compute.SerializeExpr(tt.expr, mem) - assert.NoError(t, err) - defer serialized.Release() - roundTripped, err := compute.DeserializeExpr(mem, serialized) - assert.NoError(t, err) - defer roundTripped.Release() - assert.Truef(t, tt.expr.Equals(roundTripped), "started with: %s, got: %s", tt.expr, roundTripped) - }) - } -} diff --git a/go/arrow/compute/exprs/builders.go b/go/arrow/compute/exprs/builders.go deleted file mode 100644 index a3af8dd6f287d..0000000000000 --- a/go/arrow/compute/exprs/builders.go +++ /dev/null @@ -1,445 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//go:build go1.18 - -package exprs - -import ( - "fmt" - "strconv" - "strings" - "unicode" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/substrait-io/substrait-go/expr" - "github.com/substrait-io/substrait-go/extensions" - "github.com/substrait-io/substrait-go/types" -) - -// NewDefaultExtensionSet constructs an empty extension set using the default -// Arrow Extension registry and the default collection of substrait extensions -// from the Substrait-go repo. -func NewDefaultExtensionSet() ExtensionIDSet { - return NewExtensionSetDefault(expr.NewEmptyExtensionRegistry(&extensions.DefaultCollection)) -} - -// NewScalarCall constructs a substrait ScalarFunction expression with the provided -// options and arguments. -// -// The function name (fn) is looked up in the internal Arrow DefaultExtensionIDRegistry -// to ensure it exists and to convert from the Arrow function name to the substrait -// function name. It is then looked up using the DefaultCollection from the -// substrait extensions module to find the declaration. If it cannot be found, -// we try constructing the compound signature name by getting the types of the -// arguments which were passed and appending them to the function name appropriately. -// -// An error is returned if the function cannot be resolved. -func NewScalarCall(reg ExtensionIDSet, fn string, opts []*types.FunctionOption, args ...types.FuncArg) (*expr.ScalarFunction, error) { - conv, ok := reg.GetArrowRegistry().GetArrowToSubstrait(fn) - if !ok { - return nil, arrow.ErrNotFound - } - - id, convOpts, err := conv(fn) - if err != nil { - return nil, err - } - - opts = append(opts, convOpts...) - return expr.NewScalarFunc(reg.GetSubstraitRegistry(), id, opts, args...) -} - -// NewFieldRefFromDotPath constructs a substrait reference segment from -// a dot path and the base schema. -// -// dot_path = '.' name -// -// | '[' digit+ ']' -// | dot_path+ -// -// # Examples -// -// Assume root schema of {alpha: i32, beta: struct>, delta: map} -// -// ".alpha" => StructFieldRef(0) -// "[2]" => StructFieldRef(2) -// ".beta[0]" => StructFieldRef(1, StructFieldRef(0)) -// "[1].gamma[3]" => StructFieldRef(1, StructFieldRef(0, ListElementRef(3))) -// ".delta.foobar" => StructFieldRef(2, MapKeyRef("foobar")) -// -// Note: when parsing a name, a '\' preceding any other character -// will be dropped from the resulting name. Therefore if a name must -// contain the characters '.', '\', '[', or ']' then they must be escaped -// with a preceding '\'. -func NewFieldRefFromDotPath(dotpath string, rootSchema *arrow.Schema) (expr.ReferenceSegment, error) { - if len(dotpath) == 0 { - return nil, fmt.Errorf("%w dotpath was empty", arrow.ErrInvalid) - } - - parseName := func() string { - var name string - for { - idx := strings.IndexAny(dotpath, `\[.`) - if idx == -1 { - name += dotpath - dotpath = "" - break - } - - if dotpath[idx] != '\\' { - // subscript for a new field ref - name += dotpath[:idx] - dotpath = dotpath[idx:] - break - } - - if len(dotpath) == idx+1 { - // dotpath ends with a backslash; consume it all - name += dotpath - dotpath = "" - break - } - - // append all characters before backslash, then the character which follows it - name += dotpath[:idx] + string(dotpath[idx+1]) - dotpath = dotpath[idx+2:] - } - return name - } - - var curType arrow.DataType = arrow.StructOf(rootSchema.Fields()...) - children := make([]expr.ReferenceSegment, 0) - - for len(dotpath) > 0 { - subscript := dotpath[0] - dotpath = dotpath[1:] - switch subscript { - case '.': - // next element is a name - n := parseName() - switch ct := curType.(type) { - case *arrow.StructType: - idx, found := ct.FieldIdx(n) - if !found { - return nil, fmt.Errorf("%w: dot path '%s' referenced invalid field", arrow.ErrInvalid, dotpath) - } - children = append(children, &expr.StructFieldRef{Field: int32(idx)}) - curType = ct.Field(idx).Type - case *arrow.MapType: - curType = ct.KeyType() - switch ct.KeyType().ID() { - case arrow.BINARY, arrow.LARGE_BINARY: - children = append(children, &expr.MapKeyRef{MapKey: expr.NewByteSliceLiteral([]byte(n), false)}) - case arrow.STRING, arrow.LARGE_STRING: - children = append(children, &expr.MapKeyRef{MapKey: expr.NewPrimitiveLiteral(n, false)}) - default: - return nil, fmt.Errorf("%w: MapKeyRef to non-binary/string map not supported", arrow.ErrNotImplemented) - } - default: - return nil, fmt.Errorf("%w: dot path names must refer to struct fields or map keys", arrow.ErrInvalid) - } - case '[': - subend := strings.IndexFunc(dotpath, func(r rune) bool { return !unicode.IsDigit(r) }) - if subend == -1 || dotpath[subend] != ']' { - return nil, fmt.Errorf("%w: dot path '%s' contained an unterminated index", arrow.ErrInvalid, dotpath) - } - idx, _ := strconv.Atoi(dotpath[:subend]) - switch ct := curType.(type) { - case *arrow.StructType: - if idx > ct.NumFields() { - return nil, fmt.Errorf("%w: field out of bounds in dotpath", arrow.ErrIndex) - } - curType = ct.Field(idx).Type - children = append(children, &expr.StructFieldRef{Field: int32(idx)}) - case *arrow.MapType: - curType = ct.KeyType() - var keyLiteral expr.Literal - // TODO: implement user defined types and variations - switch ct.KeyType().ID() { - case arrow.INT8: - keyLiteral = expr.NewPrimitiveLiteral(int8(idx), false) - case arrow.INT16: - keyLiteral = expr.NewPrimitiveLiteral(int16(idx), false) - case arrow.INT32: - keyLiteral = expr.NewPrimitiveLiteral(int32(idx), false) - case arrow.INT64: - keyLiteral = expr.NewPrimitiveLiteral(int64(idx), false) - case arrow.FLOAT32: - keyLiteral = expr.NewPrimitiveLiteral(float32(idx), false) - case arrow.FLOAT64: - keyLiteral = expr.NewPrimitiveLiteral(float64(idx), false) - default: - return nil, fmt.Errorf("%w: dotpath ref to map key type %s", arrow.ErrNotImplemented, ct.KeyType()) - } - children = append(children, &expr.MapKeyRef{MapKey: keyLiteral}) - case *arrow.ListType: - curType = ct.Elem() - children = append(children, &expr.ListElementRef{Offset: int32(idx)}) - case *arrow.LargeListType: - curType = ct.Elem() - children = append(children, &expr.ListElementRef{Offset: int32(idx)}) - case *arrow.FixedSizeListType: - curType = ct.Elem() - children = append(children, &expr.ListElementRef{Offset: int32(idx)}) - default: - return nil, fmt.Errorf("%w: %s type not supported for dotpath ref", arrow.ErrInvalid, ct) - } - dotpath = dotpath[subend+1:] - default: - return nil, fmt.Errorf("%w: dot path must begin with '[' or '.' got '%s'", - arrow.ErrInvalid, dotpath) - } - } - - out := children[0] - if len(children) > 1 { - cur := out - for _, c := range children[1:] { - switch r := cur.(type) { - case *expr.StructFieldRef: - r.Child = c - case *expr.MapKeyRef: - r.Child = c - case *expr.ListElementRef: - r.Child = c - } - cur = c - } - } - - return out, nil -} - -// RefFromFieldPath constructs a substrait field reference segment -// from a compute.FieldPath which should be a slice of integers -// indicating nested field paths to travel. This will return a -// series of StructFieldRef's whose child is the next element in -// the field path. -func RefFromFieldPath(field compute.FieldPath) expr.ReferenceSegment { - if len(field) == 0 { - return nil - } - - seg := expr.NewStructFieldRef(int32(field[0])) - parent := seg - for _, ref := range field[1:] { - next := expr.NewStructFieldRef(int32(ref)) - parent.Child = next - parent = next - } - - return seg -} - -// NewFieldRef constructs a properly typed substrait field reference segment, -// from a given arrow field reference, schema and extension set (for resolving -// substrait types). -func NewFieldRef(ref compute.FieldRef, schema *arrow.Schema, ext ExtensionIDSet) (*expr.FieldReference, error) { - path, err := ref.FindOne(schema) - if err != nil { - return nil, err - } - - st, err := ToSubstraitType(arrow.StructOf(schema.Fields()...), false, ext) - if err != nil { - return nil, err - } - - return expr.NewRootFieldRef(RefFromFieldPath(path), st.(*types.StructType)) -} - -// Builder wraps the substrait-go expression Builder and FuncArgBuilder -// interfaces for a simple interface that can be passed around to build -// substrait expressions from Arrow data. -type Builder interface { - expr.Builder - expr.FuncArgBuilder -} - -// ExprBuilder is the parent for building substrait expressions -// via Arrow types and functions. -// -// The expectation is that it should be utilized like so: -// -// bldr := NewExprBuilder(extSet) -// bldr.SetInputSchema(arrowschema) -// call, err := bldr.CallScalar("equal", nil, -// bldr.FieldRef("i32"), -// bldr.Literal(expr.NewPrimitiveLiteral( -// int32(0), false))) -// ex, err := call.BuildExpr() -// ... -// result, err := exprs.ExecuteScalarExpression(ctx, arrowschema, -// ex, input) -type ExprBuilder struct { - b expr.ExprBuilder - extSet ExtensionIDSet - inputSchema *arrow.Schema -} - -// NewExprBuilder constructs a new Expression Builder that will use the -// provided extension set and registry. -func NewExprBuilder(extSet ExtensionIDSet) ExprBuilder { - return ExprBuilder{ - b: expr.ExprBuilder{Reg: extSet.GetSubstraitRegistry()}, - extSet: extSet, - } -} - -// SetInputSchema sets the current Arrow schema that will be utilized -// for performing field reference and field type resolutions. -func (e *ExprBuilder) SetInputSchema(s *arrow.Schema) error { - st, err := ToSubstraitType(arrow.StructOf(s.Fields()...), false, e.extSet) - if err != nil { - return err - } - - e.inputSchema = s - e.b.BaseSchema = st.(*types.StructType) - return nil -} - -// MustCallScalar is like CallScalar, but will panic on error rather than -// return it. -func (e *ExprBuilder) MustCallScalar(fn string, opts []*types.FunctionOption, args ...expr.FuncArgBuilder) Builder { - b, err := e.CallScalar(fn, opts, args...) - if err != nil { - panic(err) - } - return b -} - -// CallScalar constructs a builder for a scalar function call. The function -// name is expected to be valid in the Arrow function registry which will -// map it properly to a substrait expression by resolving the types of -// the arguments. Examples are: "greater", "multiply", "equal", etc. -// -// Can return arrow.ErrNotFound if there is no function mapping found. -// Or will forward any error encountered when converting from an Arrow -// function to a substrait one. -func (e *ExprBuilder) CallScalar(fn string, opts []*types.FunctionOption, args ...expr.FuncArgBuilder) (Builder, error) { - conv, ok := e.extSet.GetArrowRegistry().GetArrowToSubstrait(fn) - if !ok { - return nil, arrow.ErrNotFound - } - - id, convOpts, err := conv(fn) - if err != nil { - return nil, err - } - - opts = append(opts, convOpts...) - return e.b.ScalarFunc(id, opts...).Args(args...), nil -} - -// FieldPath uses a field path to construct a Field Reference -// expression. -func (e *ExprBuilder) FieldPath(path compute.FieldPath) Builder { - segments := make([]expr.ReferenceSegment, len(path)) - for i, p := range path { - segments[i] = expr.NewStructFieldRef(int32(p)) - } - - return e.b.RootRef(expr.FlattenRefSegments(segments...)) -} - -// FieldIndex is shorthand for creating a single field reference -// to the struct field index provided. -func (e *ExprBuilder) FieldIndex(i int) Builder { - return e.b.RootRef(expr.NewStructFieldRef(int32(i))) -} - -// FieldRef constructs a field reference expression to the field with -// the given name from the input. It will be resolved to a field -// index when calling BuildExpr. -func (e *ExprBuilder) FieldRef(field string) Builder { - return &refBuilder{eb: e, fieldRef: compute.FieldRefName(field)} -} - -// FieldRefList accepts a list of either integers or strings to -// construct a field reference expression from. This will panic -// if any of elems are not a string or int. -// -// Field names will be resolved to their indexes when BuildExpr is called -// by using the provided Arrow schema. -func (e *ExprBuilder) FieldRefList(elems ...any) Builder { - return &refBuilder{eb: e, fieldRef: compute.FieldRefList(elems...)} -} - -// Literal wraps a substrait literal to be used as an argument to -// building other expressions. -func (e *ExprBuilder) Literal(l expr.Literal) Builder { - return e.b.Literal(l) -} - -// WrapLiteral is a convenience for accepting functions like NewLiteral -// which can potentially return an error. If an error is encountered, -// it will be surfaced when BuildExpr is called. -func (e *ExprBuilder) WrapLiteral(l expr.Literal, err error) Builder { - return e.b.Wrap(l, err) -} - -// Must is a convenience wrapper for any method that returns a Builder -// and error, panic'ing if it received an error or otherwise returning -// the Builder. -func (*ExprBuilder) Must(b Builder, err error) Builder { - if err != nil { - panic(err) - } - return b -} - -// Cast returns a Cast expression with the FailBehavior of ThrowException, -// erroring for invalid casts. -func (e *ExprBuilder) Cast(from Builder, to arrow.DataType) (Builder, error) { - t, err := ToSubstraitType(to, true, e.extSet) - if err != nil { - return nil, err - } - - return e.b.Cast(from, t).FailBehavior(types.BehaviorThrowException), nil -} - -type refBuilder struct { - eb *ExprBuilder - - fieldRef compute.FieldRef -} - -func (r *refBuilder) BuildFuncArg() (types.FuncArg, error) { - return r.BuildExpr() -} - -func (r *refBuilder) BuildExpr() (expr.Expression, error) { - if r.eb.inputSchema == nil { - return nil, fmt.Errorf("%w: no input schema specified for ref", arrow.ErrInvalid) - } - - path, err := r.fieldRef.FindOne(r.eb.inputSchema) - if err != nil { - return nil, err - } - - segments := make([]expr.ReferenceSegment, len(path)) - for i, p := range path { - segments[i] = expr.NewStructFieldRef(int32(p)) - } - - return r.eb.b.RootRef(expr.FlattenRefSegments(segments...)).Build() -} diff --git a/go/arrow/compute/exprs/builders_test.go b/go/arrow/compute/exprs/builders_test.go deleted file mode 100644 index 21ad3bd642030..0000000000000 --- a/go/arrow/compute/exprs/builders_test.go +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute/exprs" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/substrait-io/substrait-go/expr" -) - -func TestNewScalarFunc(t *testing.T) { - reg := exprs.NewDefaultExtensionSet() - - fn, err := exprs.NewScalarCall(reg, "add", nil, - expr.NewPrimitiveLiteral(int32(1), false), - expr.NewPrimitiveLiteral(int32(10), false)) - require.NoError(t, err) - - assert.Equal(t, "add(i32(1), i32(10), {overflow: [ERROR]}) => i32", fn.String()) - assert.Equal(t, "add:i32_i32", fn.CompoundName()) -} - -func TestFieldRefDotPath(t *testing.T) { - f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f1_0 := arrow.Field{Name: "be.ta", Type: arrow.PrimitiveTypes.Int32} - f1 := arrow.Field{Name: "beta", Type: arrow.StructOf(f1_0)} - f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_0 := arrow.Field{Name: "[alpha]", Type: arrow.MapOf(arrow.BinaryTypes.String, arrow.PrimitiveTypes.Int32)} - f2_1_1 := arrow.Field{Name: "beta", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32)} - f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)} - f2 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_0, f2_1)} - s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil) - - tests := []struct { - dotpath string - shouldErr bool - expected expr.ReferenceSegment - }{ - {".alpha", false, &expr.StructFieldRef{Field: 0}}, - {"[2]", false, &expr.StructFieldRef{Field: 2}}, - {".beta[0]", false, &expr.StructFieldRef{Field: 1, Child: &expr.StructFieldRef{Field: 0}}}, - {"[2].gamma[1][5]", false, &expr.StructFieldRef{Field: 2, - Child: &expr.StructFieldRef{Field: 1, - Child: &expr.StructFieldRef{Field: 1, - Child: &expr.ListElementRef{Offset: 5}}}}}, - {"[2].gamma[0].foobar", false, &expr.StructFieldRef{Field: 2, - Child: &expr.StructFieldRef{Field: 1, - Child: &expr.StructFieldRef{Field: 0, - Child: &expr.MapKeyRef{MapKey: expr.NewPrimitiveLiteral("foobar", false)}}}}}, - {`[1].be\.ta`, false, &expr.StructFieldRef{Field: 1, Child: &expr.StructFieldRef{Field: 0}}}, - {`[2].gamma.\[alpha\]`, false, &expr.StructFieldRef{Field: 2, - Child: &expr.StructFieldRef{Field: 1, - Child: &expr.StructFieldRef{Field: 0}}}}, - {`[5]`, true, nil}, // bad struct index - {``, true, nil}, // empty - {`delta`, true, nil}, // not found - {`[1234`, true, nil}, // bad syntax - {`[1stuf]`, true, nil}, // bad syntax - } - - for _, tt := range tests { - t.Run(tt.dotpath, func(t *testing.T) { - ref, err := exprs.NewFieldRefFromDotPath(tt.dotpath, s) - if tt.shouldErr { - assert.Error(t, err) - } else { - assert.NoError(t, err) - assert.Truef(t, tt.expected.Equals(ref), "expected: %s\ngot: %s", tt.expected, ref) - } - }) - } -} diff --git a/go/arrow/compute/exprs/exec.go b/go/arrow/compute/exprs/exec.go deleted file mode 100644 index 850acbb3cd492..0000000000000 --- a/go/arrow/compute/exprs/exec.go +++ /dev/null @@ -1,620 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs - -import ( - "context" - "fmt" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/compute/exec" - "github.com/apache/arrow/go/v18/arrow/decimal128" - "github.com/apache/arrow/go/v18/arrow/endian" - "github.com/apache/arrow/go/v18/arrow/internal/debug" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/substrait-io/substrait-go/expr" - "github.com/substrait-io/substrait-go/extensions" - "github.com/substrait-io/substrait-go/types" -) - -func makeExecBatch(ctx context.Context, schema *arrow.Schema, partial compute.Datum) (out compute.ExecBatch, err error) { - // cleanup if we get an error - defer func() { - if err != nil { - for _, v := range out.Values { - if v != nil { - v.Release() - } - } - } - }() - - if partial.Kind() == compute.KindRecord { - partialBatch := partial.(*compute.RecordDatum).Value - batchSchema := partialBatch.Schema() - - out.Values = make([]compute.Datum, schema.NumFields()) - out.Len = partialBatch.NumRows() - - for i, field := range schema.Fields() { - idxes := batchSchema.FieldIndices(field.Name) - switch len(idxes) { - case 0: - out.Values[i] = compute.NewDatum(scalar.MakeNullScalar(field.Type)) - case 1: - col := partialBatch.Column(idxes[0]) - if !arrow.TypeEqual(col.DataType(), field.Type) { - // referenced field was present but didn't have expected type - // we'll cast this case for now - col, err = compute.CastArray(ctx, col, compute.SafeCastOptions(field.Type)) - if err != nil { - return compute.ExecBatch{}, err - } - defer col.Release() - } - out.Values[i] = compute.NewDatum(col) - default: - err = fmt.Errorf("%w: exec batch field '%s' ambiguous, more than one match", - arrow.ErrInvalid, field.Name) - return compute.ExecBatch{}, err - } - } - return - } - - part, ok := partial.(compute.ArrayLikeDatum) - if !ok { - return out, fmt.Errorf("%w: MakeExecBatch from %s", arrow.ErrNotImplemented, partial) - } - - // wasteful but useful for testing - if part.Type().ID() == arrow.STRUCT { - switch part := part.(type) { - case *compute.ArrayDatum: - arr := part.MakeArray().(*array.Struct) - defer arr.Release() - - batch := array.RecordFromStructArray(arr, nil) - defer batch.Release() - return makeExecBatch(ctx, schema, compute.NewDatumWithoutOwning(batch)) - case *compute.ScalarDatum: - out.Len = 1 - out.Values = make([]compute.Datum, schema.NumFields()) - - s := part.Value.(*scalar.Struct) - dt := s.Type.(*arrow.StructType) - - for i, field := range schema.Fields() { - idx, found := dt.FieldIdx(field.Name) - if !found { - out.Values[i] = compute.NewDatum(scalar.MakeNullScalar(field.Type)) - continue - } - - val := s.Value[idx] - if !arrow.TypeEqual(val.DataType(), field.Type) { - // referenced field was present but didn't have the expected - // type. for now we'll cast this - val, err = val.CastTo(field.Type) - if err != nil { - return compute.ExecBatch{}, err - } - } - out.Values[i] = compute.NewDatum(val) - } - return - } - } - - return out, fmt.Errorf("%w: MakeExecBatch from %s", arrow.ErrNotImplemented, partial) -} - -// ToArrowSchema takes a substrait NamedStruct and an extension set (for -// type resolution mapping) and creates the equivalent Arrow Schema. -func ToArrowSchema(base types.NamedStruct, ext ExtensionIDSet) (*arrow.Schema, error) { - fields := make([]arrow.Field, len(base.Names)) - for i, typ := range base.Struct.Types { - dt, nullable, err := FromSubstraitType(typ, ext) - if err != nil { - return nil, err - } - fields[i] = arrow.Field{ - Name: base.Names[i], - Type: dt, - Nullable: nullable, - } - } - - return arrow.NewSchema(fields, nil), nil -} - -type ( - regCtxKey struct{} - extCtxKey struct{} -) - -func WithExtensionRegistry(ctx context.Context, reg *ExtensionIDRegistry) context.Context { - return context.WithValue(ctx, regCtxKey{}, reg) -} - -func GetExtensionRegistry(ctx context.Context) *ExtensionIDRegistry { - v, ok := ctx.Value(regCtxKey{}).(*ExtensionIDRegistry) - if !ok { - v = DefaultExtensionIDRegistry - } - return v -} - -func WithExtensionIDSet(ctx context.Context, ext ExtensionIDSet) context.Context { - return context.WithValue(ctx, extCtxKey{}, ext) -} - -func GetExtensionIDSet(ctx context.Context) ExtensionIDSet { - v, ok := ctx.Value(extCtxKey{}).(ExtensionIDSet) - if !ok { - return NewExtensionSet( - expr.NewEmptyExtensionRegistry(&extensions.DefaultCollection), - GetExtensionRegistry(ctx)) - } - return v -} - -func literalToDatum(mem memory.Allocator, lit expr.Literal, ext ExtensionIDSet) (compute.Datum, error) { - switch v := lit.(type) { - case *expr.PrimitiveLiteral[bool]: - return compute.NewDatum(scalar.NewBooleanScalar(v.Value)), nil - case *expr.PrimitiveLiteral[int8]: - return compute.NewDatum(scalar.NewInt8Scalar(v.Value)), nil - case *expr.PrimitiveLiteral[int16]: - return compute.NewDatum(scalar.NewInt16Scalar(v.Value)), nil - case *expr.PrimitiveLiteral[int32]: - return compute.NewDatum(scalar.NewInt32Scalar(v.Value)), nil - case *expr.PrimitiveLiteral[int64]: - return compute.NewDatum(scalar.NewInt64Scalar(v.Value)), nil - case *expr.PrimitiveLiteral[float32]: - return compute.NewDatum(scalar.NewFloat32Scalar(v.Value)), nil - case *expr.PrimitiveLiteral[float64]: - return compute.NewDatum(scalar.NewFloat64Scalar(v.Value)), nil - case *expr.PrimitiveLiteral[string]: - return compute.NewDatum(scalar.NewStringScalar(v.Value)), nil - case *expr.PrimitiveLiteral[types.Timestamp]: - return compute.NewDatum(scalar.NewTimestampScalar(arrow.Timestamp(v.Value), &arrow.TimestampType{Unit: arrow.Microsecond})), nil - case *expr.PrimitiveLiteral[types.TimestampTz]: - return compute.NewDatum(scalar.NewTimestampScalar(arrow.Timestamp(v.Value), - &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: TimestampTzTimezone})), nil - case *expr.PrimitiveLiteral[types.Date]: - return compute.NewDatum(scalar.NewDate32Scalar(arrow.Date32(v.Value))), nil - case *expr.PrimitiveLiteral[types.Time]: - return compute.NewDatum(scalar.NewTime64Scalar(arrow.Time64(v.Value), &arrow.Time64Type{Unit: arrow.Microsecond})), nil - case *expr.PrimitiveLiteral[types.FixedChar]: - length := int(v.Type.(*types.FixedCharType).Length) - return compute.NewDatum(scalar.NewExtensionScalar( - scalar.NewFixedSizeBinaryScalar(memory.NewBufferBytes([]byte(v.Value)), - &arrow.FixedSizeBinaryType{ByteWidth: length}), fixedChar(int32(length)))), nil - case *expr.ByteSliceLiteral[[]byte]: - return compute.NewDatum(scalar.NewBinaryScalar(memory.NewBufferBytes(v.Value), arrow.BinaryTypes.Binary)), nil - case *expr.ByteSliceLiteral[types.UUID]: - return compute.NewDatum(scalar.NewExtensionScalar(scalar.NewFixedSizeBinaryScalar( - memory.NewBufferBytes(v.Value), uuid().(arrow.ExtensionType).StorageType()), uuid())), nil - case *expr.ByteSliceLiteral[types.FixedBinary]: - return compute.NewDatum(scalar.NewFixedSizeBinaryScalar(memory.NewBufferBytes(v.Value), - &arrow.FixedSizeBinaryType{ByteWidth: int(v.Type.(*types.FixedBinaryType).Length)})), nil - case *expr.NullLiteral: - dt, _, err := FromSubstraitType(v.Type, ext) - if err != nil { - return nil, err - } - return compute.NewDatum(scalar.MakeNullScalar(dt)), nil - case *expr.ListLiteral: - var elemType arrow.DataType - - values := make([]scalar.Scalar, len(v.Value)) - for i, val := range v.Value { - d, err := literalToDatum(mem, val, ext) - if err != nil { - return nil, err - } - defer d.Release() - values[i] = d.(*compute.ScalarDatum).Value - if elemType != nil { - if !arrow.TypeEqual(values[i].DataType(), elemType) { - return nil, fmt.Errorf("%w: %s has a value whose type doesn't match the other list values", - arrow.ErrInvalid, v) - } - } else { - elemType = values[i].DataType() - } - } - - bldr := array.NewBuilder(memory.DefaultAllocator, elemType) - defer bldr.Release() - if err := scalar.AppendSlice(bldr, values); err != nil { - return nil, err - } - arr := bldr.NewArray() - defer arr.Release() - return compute.NewDatum(scalar.NewListScalar(arr)), nil - case *expr.MapLiteral: - dt, _, err := FromSubstraitType(v.Type, ext) - if err != nil { - return nil, err - } - - mapType, ok := dt.(*arrow.MapType) - if !ok { - return nil, fmt.Errorf("%w: map literal with non-map type", arrow.ErrInvalid) - } - - keys, values := make([]scalar.Scalar, len(v.Value)), make([]scalar.Scalar, len(v.Value)) - for i, kv := range v.Value { - k, err := literalToDatum(mem, kv.Key, ext) - if err != nil { - return nil, err - } - defer k.Release() - scalarKey := k.(*compute.ScalarDatum).Value - - v, err := literalToDatum(mem, kv.Value, ext) - if err != nil { - return nil, err - } - defer v.Release() - scalarValue := v.(*compute.ScalarDatum).Value - - if !arrow.TypeEqual(mapType.KeyType(), scalarKey.DataType()) { - return nil, fmt.Errorf("%w: key type mismatch for %s, got key with type %s", - arrow.ErrInvalid, mapType, scalarKey.DataType()) - } - if !arrow.TypeEqual(mapType.ItemType(), scalarValue.DataType()) { - return nil, fmt.Errorf("%w: value type mismatch for %s, got value with type %s", - arrow.ErrInvalid, mapType, scalarValue.DataType()) - } - - keys[i], values[i] = scalarKey, scalarValue - } - - keyBldr, valBldr := array.NewBuilder(mem, mapType.KeyType()), array.NewBuilder(mem, mapType.ItemType()) - defer keyBldr.Release() - defer valBldr.Release() - - if err := scalar.AppendSlice(keyBldr, keys); err != nil { - return nil, err - } - if err := scalar.AppendSlice(valBldr, values); err != nil { - return nil, err - } - - keyArr, valArr := keyBldr.NewArray(), valBldr.NewArray() - defer keyArr.Release() - defer valArr.Release() - - kvArr, err := array.NewStructArray([]arrow.Array{keyArr, valArr}, []string{"key", "value"}) - if err != nil { - return nil, err - } - defer kvArr.Release() - - return compute.NewDatumWithoutOwning(scalar.NewMapScalar(kvArr)), nil - case *expr.StructLiteral: - fields := make([]scalar.Scalar, len(v.Value)) - names := make([]string, len(v.Value)) - - for i, l := range v.Value { - lit, err := literalToDatum(mem, l, ext) - if err != nil { - return nil, err - } - fields[i] = lit.(*compute.ScalarDatum).Value - } - - s, err := scalar.NewStructScalarWithNames(fields, names) - return compute.NewDatum(s), err - case *expr.ProtoLiteral: - switch v := v.Value.(type) { - case *types.Decimal: - if len(v.Value) != arrow.Decimal128SizeBytes { - return nil, fmt.Errorf("%w: decimal literal had %d bytes (expected %d)", - arrow.ErrInvalid, len(v.Value), arrow.Decimal128SizeBytes) - } - - var val decimal128.Num - data := (*(*[arrow.Decimal128SizeBytes]byte)(unsafe.Pointer(&val)))[:] - copy(data, v.Value) - if endian.IsBigEndian { - // reverse the bytes - for i := len(data)/2 - 1; i >= 0; i-- { - opp := len(data) - 1 - i - data[i], data[opp] = data[opp], data[i] - } - } - - return compute.NewDatum(scalar.NewDecimal128Scalar(val, - &arrow.Decimal128Type{Precision: v.Precision, Scale: v.Scale})), nil - case *types.UserDefinedLiteral: // not yet implemented - case *types.IntervalYearToMonth: - bldr := array.NewInt32Builder(memory.DefaultAllocator) - defer bldr.Release() - typ := intervalYear() - bldr.Append(v.Years) - bldr.Append(v.Months) - arr := bldr.NewArray() - defer arr.Release() - return &compute.ScalarDatum{Value: scalar.NewExtensionScalar( - scalar.NewFixedSizeListScalar(arr), typ)}, nil - case *types.IntervalDayToSecond: - bldr := array.NewInt32Builder(memory.DefaultAllocator) - defer bldr.Release() - typ := intervalDay() - bldr.Append(v.Days) - bldr.Append(v.Seconds) - arr := bldr.NewArray() - defer arr.Release() - return &compute.ScalarDatum{Value: scalar.NewExtensionScalar( - scalar.NewFixedSizeListScalar(arr), typ)}, nil - case *types.VarChar: - return compute.NewDatum(scalar.NewExtensionScalar( - scalar.NewStringScalar(v.Value), varChar(int32(v.Length)))), nil - } - } - - return nil, arrow.ErrNotImplemented -} - -// ExecuteScalarExpression executes the given substrait expression using the provided datum as input. -// It will first create an exec batch using the input schema and the datum. -// The datum may have missing or incorrectly ordered columns while the input schema -// should describe the expected input schema for the expression. Missing fields will -// be replaced with null scalars and incorrectly ordered columns will be re-ordered -// according to the schema. -// -// You can provide an allocator to use through the context via compute.WithAllocator. -// -// You can provide the ExtensionIDSet to use through the context via WithExtensionIDSet. -func ExecuteScalarExpression(ctx context.Context, inputSchema *arrow.Schema, expression expr.Expression, partialInput compute.Datum) (compute.Datum, error) { - if expression == nil { - return nil, arrow.ErrInvalid - } - - batch, err := makeExecBatch(ctx, inputSchema, partialInput) - if err != nil { - return nil, err - } - defer func() { - for _, v := range batch.Values { - v.Release() - } - }() - - return executeScalarBatch(ctx, batch, expression, GetExtensionIDSet(ctx)) -} - -// ExecuteScalarSubstrait uses the provided Substrait extended expression to -// determine the expected input schema (replacing missing fields in the partial -// input datum with null scalars and re-ordering columns if necessary) and -// ExtensionIDSet to use. You can provide the extension registry to use -// through the context via WithExtensionRegistry, otherwise the default -// Arrow registry will be used. You can provide a memory.Allocator to use -// the same way via compute.WithAllocator. -func ExecuteScalarSubstrait(ctx context.Context, expression *expr.Extended, partialInput compute.Datum) (compute.Datum, error) { - if expression == nil { - return nil, arrow.ErrInvalid - } - - var toExecute expr.Expression - - switch len(expression.ReferredExpr) { - case 0: - return nil, fmt.Errorf("%w: no referred expression to execute", arrow.ErrInvalid) - case 1: - if toExecute = expression.ReferredExpr[0].GetExpr(); toExecute == nil { - return nil, fmt.Errorf("%w: measures not implemented", arrow.ErrNotImplemented) - } - default: - return nil, fmt.Errorf("%w: only single referred expression implemented", arrow.ErrNotImplemented) - } - - reg := GetExtensionRegistry(ctx) - set := NewExtensionSet(expr.NewExtensionRegistry(expression.Extensions, &extensions.DefaultCollection), reg) - sc, err := ToArrowSchema(expression.BaseSchema, set) - if err != nil { - return nil, err - } - - return ExecuteScalarExpression(WithExtensionIDSet(ctx, set), sc, toExecute, partialInput) -} - -func execFieldRef(ctx context.Context, e *expr.FieldReference, input compute.ExecBatch, ext ExtensionIDSet) (compute.Datum, error) { - if e.Root != expr.RootReference { - return nil, fmt.Errorf("%w: only RootReference is implemented", arrow.ErrNotImplemented) - } - - ref, ok := e.Reference.(expr.ReferenceSegment) - if !ok { - return nil, fmt.Errorf("%w: only direct references are implemented", arrow.ErrNotImplemented) - } - - expectedType, _, err := FromSubstraitType(e.GetType(), ext) - if err != nil { - return nil, err - } - - var param compute.Datum - if sref, ok := ref.(*expr.StructFieldRef); ok { - if sref.Field < 0 || sref.Field >= int32(len(input.Values)) { - return nil, arrow.ErrInvalid - } - param = input.Values[sref.Field] - ref = ref.GetChild() - } - - out, err := GetReferencedValue(compute.GetAllocator(ctx), ref, param, ext) - if err == compute.ErrEmpty { - out = compute.NewDatum(param) - } else if err != nil { - return nil, err - } - if !arrow.TypeEqual(out.(compute.ArrayLikeDatum).Type(), expectedType) { - return nil, fmt.Errorf("%w: referenced field %s was %s, but should have been %s", - arrow.ErrInvalid, ref, out.(compute.ArrayLikeDatum).Type(), expectedType) - } - - return out, nil -} - -func executeScalarBatch(ctx context.Context, input compute.ExecBatch, exp expr.Expression, ext ExtensionIDSet) (compute.Datum, error) { - if !exp.IsScalar() { - return nil, fmt.Errorf("%w: ExecuteScalarExpression cannot execute non-scalar expressions", - arrow.ErrInvalid) - } - - switch e := exp.(type) { - case expr.Literal: - return literalToDatum(compute.GetAllocator(ctx), e, ext) - case *expr.FieldReference: - return execFieldRef(ctx, e, input, ext) - case *expr.Cast: - if e.Input == nil { - return nil, fmt.Errorf("%w: cast without argument to cast", arrow.ErrInvalid) - } - - arg, err := executeScalarBatch(ctx, input, e.Input, ext) - if err != nil { - return nil, err - } - defer arg.Release() - - dt, _, err := FromSubstraitType(e.Type, ext) - if err != nil { - return nil, fmt.Errorf("%w: could not determine type for cast", err) - } - - var opts *compute.CastOptions - switch e.FailureBehavior { - case types.BehaviorThrowException: - opts = compute.UnsafeCastOptions(dt) - case types.BehaviorUnspecified: - return nil, fmt.Errorf("%w: cast behavior unspecified", arrow.ErrInvalid) - case types.BehaviorReturnNil: - return nil, fmt.Errorf("%w: cast behavior return nil", arrow.ErrNotImplemented) - } - return compute.CastDatum(ctx, arg, opts) - case *expr.ScalarFunction: - var ( - err error - allScalar = true - args = make([]compute.Datum, e.NArgs()) - argTypes = make([]arrow.DataType, e.NArgs()) - ) - for i := 0; i < e.NArgs(); i++ { - switch v := e.Arg(i).(type) { - case types.Enum: - args[i] = compute.NewDatum(scalar.NewStringScalar(string(v))) - case expr.Expression: - args[i], err = executeScalarBatch(ctx, input, v, ext) - if err != nil { - return nil, err - } - defer args[i].Release() - - if args[i].Kind() != compute.KindScalar { - allScalar = false - } - default: - return nil, arrow.ErrNotImplemented - } - - argTypes[i] = args[i].(compute.ArrayLikeDatum).Type() - } - - _, conv, ok := ext.DecodeFunction(e.FuncRef()) - if !ok { - return nil, arrow.ErrNotImplemented - } - - fname, opts, err := conv(e) - if err != nil { - return nil, err - } - - ectx := compute.GetExecCtx(ctx) - fn, ok := ectx.Registry.GetFunction(fname) - if !ok { - return nil, arrow.ErrInvalid - } - - if fn.Kind() != compute.FuncScalar { - return nil, arrow.ErrInvalid - } - - k, err := fn.DispatchBest(argTypes...) - if err != nil { - return nil, err - } - - kctx := &exec.KernelCtx{Ctx: ctx, Kernel: k} - init := k.GetInitFn() - kinitArgs := exec.KernelInitArgs{Kernel: k, Inputs: argTypes, Options: opts} - if init != nil { - kctx.State, err = init(kctx, kinitArgs) - if err != nil { - return nil, err - } - } - - executor := compute.NewScalarExecutor() - if err := executor.Init(kctx, kinitArgs); err != nil { - return nil, err - } - - batch := compute.ExecBatch{Values: args} - if allScalar { - batch.Len = 1 - } else { - batch.Len = input.Len - } - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - ch := make(chan compute.Datum, ectx.ExecChannelSize) - go func() { - defer close(ch) - if err = executor.Execute(ctx, &batch, ch); err != nil { - cancel() - } - }() - - result := executor.WrapResults(ctx, ch, false) - if err == nil { - debug.Assert(executor.CheckResultType(result) == nil, "invalid result type") - } - - if ctx.Err() == context.Canceled && result != nil { - result.Release() - } - - return result, nil - } - - return nil, arrow.ErrNotImplemented -} diff --git a/go/arrow/compute/exprs/exec_internal_test.go b/go/arrow/compute/exprs/exec_internal_test.go deleted file mode 100644 index 450db139e9357..0000000000000 --- a/go/arrow/compute/exprs/exec_internal_test.go +++ /dev/null @@ -1,114 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs - -import ( - "context" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -var ( - boringArrowSchema = arrow.NewSchema([]arrow.Field{ - {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - {Name: "i32_req", Type: arrow.PrimitiveTypes.Int32}, - {Name: "u32", Type: arrow.PrimitiveTypes.Uint32, Nullable: true}, - {Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, - {Name: "f32_req", Type: arrow.PrimitiveTypes.Float32}, - {Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - {Name: "date32", Type: arrow.FixedWidthTypes.Date32, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "bin", Type: arrow.BinaryTypes.Binary, Nullable: true}, - }, nil) -) - -func TestMakeExecBatch(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - const numRows = 3 - var ( - ctx = compute.WithAllocator(context.Background(), mem) - i32, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[1, 2, 3]`)) - f32, _, _ = array.FromJSON(mem, arrow.PrimitiveTypes.Float32, strings.NewReader(`[1.5, 2.25, 3.125]`)) - empty, _, _ = array.RecordFromJSON(mem, boringArrowSchema, strings.NewReader(`[]`)) - ) - defer i32.Release() - defer f32.Release() - - getField := func(n string) arrow.Field { - f, _ := boringArrowSchema.FieldsByName(n) - return f[0] - } - - tests := []struct { - name string - batch arrow.Record - }{ - {"empty", empty}, - {"subset", array.NewRecord(arrow.NewSchema([]arrow.Field{getField("i32"), getField("f32")}, nil), - []arrow.Array{i32, f32}, numRows)}, - {"flipped subset", array.NewRecord(arrow.NewSchema([]arrow.Field{getField("f32"), getField("i32")}, nil), - []arrow.Array{f32, i32}, numRows)}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - defer tt.batch.Release() - batch, err := makeExecBatch(ctx, boringArrowSchema, compute.NewDatumWithoutOwning(tt.batch)) - require.NoError(t, err) - require.Equal(t, tt.batch.NumRows(), batch.Len) - - defer func() { - for _, v := range batch.Values { - v.Release() - } - }() - - for i, field := range boringArrowSchema.Fields() { - typ := batch.Values[i].(compute.ArrayLikeDatum).Type() - assert.Truef(t, arrow.TypeEqual(typ, field.Type), - "expected: %s\ngot: %s", field.Type, typ) - - idxes := tt.batch.Schema().FieldIndices(field.Name) - if batch.Values[i].Kind() == compute.KindScalar { - assert.False(t, batch.Values[i].(*compute.ScalarDatum).Value.IsValid(), - "null placeholder should be injected") - assert.Len(t, idxes, 0, "should only happen when column isn't found") - } else { - col := tt.batch.Column(idxes[0]) - val := batch.Values[i].(*compute.ArrayDatum).MakeArray() - defer val.Release() - - assert.Truef(t, array.Equal(col, val), "expected: %s\ngot: %s", col, val) - } - } - }) - } -} diff --git a/go/arrow/compute/exprs/exec_test.go b/go/arrow/compute/exprs/exec_test.go deleted file mode 100644 index b74f80057a0d7..0000000000000 --- a/go/arrow/compute/exprs/exec_test.go +++ /dev/null @@ -1,461 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs_test - -import ( - "context" - "strings" - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/compute/exprs" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/substrait-io/substrait-go/expr" - "github.com/substrait-io/substrait-go/types" -) - -var ( - extSet = exprs.NewDefaultExtensionSet() - _, u32TypeRef, _ = extSet.EncodeTypeVariation(arrow.PrimitiveTypes.Uint32) - - boringSchema = types.NamedStruct{ - Names: []string{ - "bool", "i8", "i32", "i32_req", - "u32", "i64", "f32", "f32_req", - "f64", "date32", "str", "bin"}, - Struct: types.StructType{ - Nullability: types.NullabilityRequired, - Types: []types.Type{ - &types.BooleanType{}, - &types.Int8Type{}, - &types.Int32Type{}, - &types.Int32Type{Nullability: types.NullabilityRequired}, - &types.Int32Type{ - TypeVariationRef: u32TypeRef, - }, - &types.Int64Type{}, - &types.Float32Type{}, - &types.Float32Type{Nullability: types.NullabilityRequired}, - &types.Float64Type{}, - &types.DateType{}, - &types.StringType{}, - &types.BinaryType{}, - }, - }, - } - - boringArrowSchema = arrow.NewSchema([]arrow.Field{ - {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - {Name: "u32", Type: arrow.PrimitiveTypes.Uint32, Nullable: true}, - {Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, - {Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - {Name: "date32", Type: arrow.FixedWidthTypes.Date32, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "bin", Type: arrow.BinaryTypes.Binary, Nullable: true}, - }, nil) -) - -func TestToArrowSchema(t *testing.T) { - expectedSchema := arrow.NewSchema([]arrow.Field{ - {Name: "bool", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}, - {Name: "i8", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, - {Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - {Name: "i32_req", Type: arrow.PrimitiveTypes.Int32}, - {Name: "u32", Type: arrow.PrimitiveTypes.Uint32, Nullable: true}, - {Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, - {Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, - {Name: "f32_req", Type: arrow.PrimitiveTypes.Float32}, - {Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - {Name: "date32", Type: arrow.FixedWidthTypes.Date32, Nullable: true}, - {Name: "str", Type: arrow.BinaryTypes.String, Nullable: true}, - {Name: "bin", Type: arrow.BinaryTypes.Binary, Nullable: true}, - }, nil) - - sc, err := exprs.ToArrowSchema(boringSchema, extSet) - assert.NoError(t, err) - - assert.Truef(t, expectedSchema.Equal(sc), "expected: %s\ngot: %s", expectedSchema, sc) -} - -func assertEqual(t *testing.T, expected, actual any) bool { - switch e := expected.(type) { - case compute.Datum: - return assert.Truef(t, e.Equals(compute.NewDatumWithoutOwning(actual)), - "expected: %s\ngot: %s", e, actual) - case arrow.Array: - switch a := actual.(type) { - case compute.Datum: - if a.Kind() == compute.KindArray { - actual := a.(*compute.ArrayDatum).MakeArray() - defer actual.Release() - return assert.Truef(t, array.Equal(e, actual), "expected: %s\ngot: %s", - e, actual) - } - case arrow.Array: - return assert.Truef(t, array.Equal(e, a), "expected: %s\ngot: %s", - e, actual) - } - t.Errorf("expected arrow Array, got %s", actual) - return false - } - panic("unimplemented comparison") -} - -func TestComparisons(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.DefaultAllocator) - defer mem.AssertSize(t, 0) - - var ( - ctx = compute.WithAllocator(context.Background(), mem) - zero = scalar.MakeScalar(int32(0)) - one = scalar.MakeScalar(int32(1)) - two = scalar.MakeScalar(int32(2)) - - str = scalar.MakeScalar("hello") - bin = scalar.MakeScalar([]byte("hello")) - ) - - getArgType := func(dt arrow.DataType) types.Type { - switch dt.ID() { - case arrow.INT32: - return &types.Int32Type{} - case arrow.STRING: - return &types.StringType{} - case arrow.BINARY: - return &types.BinaryType{} - } - panic("wtf") - } - - expect := func(t *testing.T, fn string, arg1, arg2 scalar.Scalar, res bool) { - baseStruct := types.NamedStruct{ - Names: []string{"arg1", "arg2"}, - Struct: types.StructType{ - Types: []types.Type{getArgType(arg1.DataType()), getArgType(arg2.DataType())}, - }, - } - - ex, err := exprs.NewScalarCall(extSet, fn, nil, - expr.MustExpr(expr.NewRootFieldRef(expr.NewStructFieldRef(0), &baseStruct.Struct)), - expr.MustExpr(expr.NewRootFieldRef(expr.NewStructFieldRef(1), &baseStruct.Struct))) - require.NoError(t, err) - - expression := &expr.Extended{ - Extensions: extSet.GetSubstraitRegistry().Set, - ReferredExpr: []expr.ExpressionReference{ - expr.NewExpressionReference([]string{"out"}, ex), - }, - BaseSchema: baseStruct, - } - - input, _ := scalar.NewStructScalarWithNames([]scalar.Scalar{arg1, arg2}, []string{"arg1", "arg2"}) - out, err := exprs.ExecuteScalarSubstrait(ctx, expression, compute.NewDatum(input)) - require.NoError(t, err) - require.Equal(t, compute.KindScalar, out.Kind()) - - result := out.(*compute.ScalarDatum).Value - assert.Equal(t, res, result.(*scalar.Boolean).Value) - } - - expect(t, "equal", one, one, true) - expect(t, "equal", one, two, false) - expect(t, "less", one, two, true) - expect(t, "less", one, zero, false) - expect(t, "greater", one, zero, true) - expect(t, "greater", one, two, false) - - expect(t, "equal", str, bin, true) - expect(t, "equal", bin, str, true) -} - -func TestExecuteFieldRef(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - fromJSON := func(ty arrow.DataType, json string) arrow.Array { - arr, _, err := array.FromJSON(mem, ty, strings.NewReader(json)) - require.NoError(t, err) - return arr - } - - scalarFromJSON := func(ty arrow.DataType, json string) scalar.Scalar { - arr, _, err := array.FromJSON(mem, ty, strings.NewReader(json)) - require.NoError(t, err) - defer arr.Release() - s, err := scalar.GetScalar(arr, 0) - require.NoError(t, err) - return s - } - - tests := []struct { - testName string - ref compute.FieldRef - input compute.Datum - expected compute.Datum - }{ - {"basic ref", compute.FieldRefName("a"), compute.NewDatumWithoutOwning(fromJSON( - arrow.StructOf(arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - `[ - {"a": 6.125}, - {"a": 0.0}, - {"a": -1} - ]`)), compute.NewDatumWithoutOwning(fromJSON( - arrow.PrimitiveTypes.Float64, `[6.125, 0.0, -1]`))}, - {"ref one field", compute.FieldRefName("a"), compute.NewDatumWithoutOwning(fromJSON( - arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - `[ - {"a": 6.125, "b": 7.5}, - {"a": 0.0, "b": 2.125}, - {"a": -1, "b": 4.0} - ]`)), compute.NewDatumWithoutOwning(fromJSON( - arrow.PrimitiveTypes.Float64, `[6.125, 0.0, -1]`))}, - {"second field", compute.FieldRefName("b"), compute.NewDatumWithoutOwning(fromJSON( - arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - `[ - {"a": 6.125, "b": 7.5}, - {"a": 0.0, "b": 2.125}, - {"a": -1, "b": 4.0} - ]`)), compute.NewDatumWithoutOwning(fromJSON( - arrow.PrimitiveTypes.Float64, `[7.5, 2.125, 4.0]`))}, - {"nested field by path", compute.FieldRefPath(compute.FieldPath{0, 0}), compute.NewDatumWithoutOwning(fromJSON( - arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.StructOf( - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - Nullable: true}), - `[ - {"a": {"b": 6.125}}, - {"a": {"b": 0.0}}, - {"a": {"b": -1}} - ]`)), compute.NewDatumWithoutOwning(fromJSON( - arrow.PrimitiveTypes.Float64, `[6.125, 0.0, -1]`))}, - {"nested field by name", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning(fromJSON( - arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.StructOf( - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - Nullable: true}), - `[ - {"a": {"b": 6.125}}, - {"a": {"b": 0.0}}, - {"a": {"b": -1}} - ]`)), compute.NewDatumWithoutOwning(fromJSON( - arrow.PrimitiveTypes.Float64, `[6.125, 0.0, -1]`))}, - {"nested field with nulls", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning(fromJSON( - arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.StructOf( - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - Nullable: true}), - `[ - {"a": {"b": 6.125}}, - {"a": null}, - {"a": {"b": null}} - ]`)), compute.NewDatumWithoutOwning(fromJSON( - arrow.PrimitiveTypes.Float64, `[6.125, null, null]`))}, - {"nested scalar", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning( - scalarFromJSON(arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.StructOf( - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - Nullable: true}), `[{"a": {"b": 64.0}}]`)), - compute.NewDatum(scalar.NewFloat64Scalar(64.0))}, - {"nested scalar with null", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning( - scalarFromJSON(arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.StructOf( - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - Nullable: true}), `[{"a": {"b": null}}]`)), - compute.NewDatum(scalar.MakeNullScalar(arrow.PrimitiveTypes.Float64))}, - {"nested scalar null", compute.FieldRefList("a", "b"), compute.NewDatumWithoutOwning( - scalarFromJSON(arrow.StructOf( - arrow.Field{Name: "a", Type: arrow.StructOf( - arrow.Field{Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}), - Nullable: true}), `[{"a": null}]`)), - compute.NewDatum(scalar.MakeNullScalar(arrow.PrimitiveTypes.Float64))}, - } - - for _, tt := range tests { - t.Run(tt.testName, func(t *testing.T) { - scoped := memory.NewCheckedAllocatorScope(mem) - defer scoped.CheckSize(t) - - ctx := exprs.WithExtensionIDSet(compute.WithAllocator(context.Background(), mem), extSet) - dt := tt.input.(compute.ArrayLikeDatum).Type().(arrow.NestedType) - schema := arrow.NewSchema(dt.Fields(), nil) - ref, err := exprs.NewFieldRef(tt.ref, schema, extSet) - require.NoError(t, err) - assert.NotNil(t, ref) - - actual, err := exprs.ExecuteScalarExpression(ctx, schema, ref, tt.input) - require.NoError(t, err) - defer actual.Release() - - assert.Truef(t, tt.expected.Equals(actual), "expected: %s\ngot: %s", tt.expected, actual) - }) - } -} - -func TestExecuteScalarFuncCall(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - fromJSON := func(ty arrow.DataType, json string) arrow.Array { - arr, _, err := array.FromJSON(mem, ty, strings.NewReader(json)) - require.NoError(t, err) - return arr - } - - basicSchema := arrow.NewSchema([]arrow.Field{ - {Name: "a", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - {Name: "b", Type: arrow.PrimitiveTypes.Float64, Nullable: true}, - }, nil) - - nestedSchema := arrow.NewSchema([]arrow.Field{ - {Name: "a", Type: arrow.StructOf(basicSchema.Fields()...), Nullable: false}, - }, nil) - - bldr := exprs.NewExprBuilder(extSet) - - tests := []struct { - name string - ex exprs.Builder - sc *arrow.Schema - input compute.Datum - expected compute.Datum - }{ - {"add", bldr.MustCallScalar("add", nil, bldr.FieldRef("a"), - bldr.Literal(expr.NewPrimitiveLiteral(float64(3.5), false))), - basicSchema, - compute.NewDatumWithoutOwning(fromJSON(arrow.StructOf(basicSchema.Fields()...), - `[ - {"a": 6.125, "b": 3.375}, - {"a": 0.0, "b": 1}, - {"a": -1, "b": 4.75} - ]`)), compute.NewDatumWithoutOwning(fromJSON(arrow.PrimitiveTypes.Float64, - `[9.625, 3.5, 2.5]`))}, - {"add sub", bldr.MustCallScalar("add", nil, bldr.FieldRef("a"), - bldr.MustCallScalar("subtract", nil, - bldr.WrapLiteral(expr.NewLiteral(float64(3.5), false)), - bldr.FieldRef("b"))), - basicSchema, - compute.NewDatumWithoutOwning(fromJSON(arrow.StructOf(basicSchema.Fields()...), - `[ - {"a": 6.125, "b": 3.375}, - {"a": 0.0, "b": 1}, - {"a": -1, "b": 4.75} - ]`)), compute.NewDatumWithoutOwning(fromJSON(arrow.PrimitiveTypes.Float64, - `[6.25, 2.5, -2.25]`))}, - {"add nested", bldr.MustCallScalar("add", nil, - bldr.FieldRefList("a", "a"), bldr.FieldRefList("a", "b")), nestedSchema, - compute.NewDatumWithoutOwning(fromJSON(arrow.StructOf(nestedSchema.Fields()...), - `[ - {"a": {"a": 6.125, "b": 3.375}}, - {"a": {"a": 0.0, "b": 1}}, - {"a": {"a": -1, "b": 4.75}} - ]`)), compute.NewDatumWithoutOwning(fromJSON(arrow.PrimitiveTypes.Float64, - `[9.5, 1, 3.75]`))}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - scoped := memory.NewCheckedAllocatorScope(mem) - defer scoped.CheckSize(t) - - bldr.SetInputSchema(tt.sc) - ex, err := tt.ex.BuildExpr() - require.NoError(t, err) - - ctx := exprs.WithExtensionIDSet(compute.WithAllocator(context.Background(), mem), extSet) - dt := tt.input.(compute.ArrayLikeDatum).Type().(arrow.NestedType) - schema := arrow.NewSchema(dt.Fields(), nil) - - actual, err := exprs.ExecuteScalarExpression(ctx, schema, ex, tt.input) - require.NoError(t, err) - defer actual.Release() - - assert.Truef(t, tt.expected.Equals(actual), "expected: %s\ngot: %s", tt.expected, actual) - }) - } -} - -func TestGenerateMask(t *testing.T) { - sc, err := boringArrowSchema.AddField(0, arrow.Field{ - Name: "in", Type: arrow.FixedWidthTypes.Boolean, Nullable: true}) - require.NoError(t, err) - - bldr := exprs.NewExprBuilder(extSet) - require.NoError(t, bldr.SetInputSchema(sc)) - - tests := []struct { - name string - json string - filter exprs.Builder - }{ - {"simple", `[ - {"i32": 0, "f32": -0.1, "in": true}, - {"i32": 0, "f32": 0.3, "in": true}, - {"i32": 1, "f32": 0.2, "in": false}, - {"i32": 2, "f32": -0.1, "in": false}, - {"i32": 0, "f32": 0.1, "in": true}, - {"i32": 0, "f32": null, "in": true}, - {"i32": 0, "f32": 1.0, "in": true} - ]`, bldr.MustCallScalar("equal", nil, - bldr.FieldRef("i32"), bldr.Literal(expr.NewPrimitiveLiteral(int32(0), false)))}, - {"complex", `[ - {"f64": 0.3, "f32": 0.1, "in": true}, - {"f64": -0.1, "f32": 0.3, "in": false}, - {"f64": 0.1, "f32": 0.2, "in": true}, - {"f64": 0.0, "f32": -0.1, "in": false}, - {"f64": 1.0, "f32": 0.1, "in": true}, - {"f64": -2.0, "f32": null, "in": null}, - {"f64": 3.0, "f32": 1.0, "in": true} - ]`, bldr.MustCallScalar("greater", nil, - bldr.MustCallScalar("multiply", nil, - bldr.Must(bldr.Cast(bldr.FieldRef("f32"), arrow.PrimitiveTypes.Float64)), - bldr.FieldRef("f64")), - bldr.Literal(expr.NewPrimitiveLiteral(float64(0), false)))}, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - ctx := exprs.WithExtensionIDSet(compute.WithAllocator(context.Background(), mem), extSet) - - rec, _, err := array.RecordFromJSON(mem, sc, strings.NewReader(tt.json)) - require.NoError(t, err) - defer rec.Release() - - input := compute.NewDatumWithoutOwning(rec) - expectedMask := rec.Column(0) - - mask, err := exprs.ExecuteScalarExpression(ctx, sc, - expr.MustExpr(tt.filter.BuildExpr()), input) - require.NoError(t, err) - defer mask.Release() - - assertEqual(t, expectedMask, mask) - }) - } -} diff --git a/go/arrow/compute/exprs/extension_types.go b/go/arrow/compute/exprs/extension_types.go deleted file mode 100644 index 8177675592fc9..0000000000000 --- a/go/arrow/compute/exprs/extension_types.go +++ /dev/null @@ -1,149 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs - -import ( - "encoding/json" - "fmt" - "reflect" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" -) - -type simpleExtensionTypeFactory[P comparable] struct { - arrow.ExtensionBase - - params P - name string - getStorage func(P) arrow.DataType -} - -func (ef *simpleExtensionTypeFactory[P]) String() string { return "extension<" + ef.Serialize() + ">" } -func (ef *simpleExtensionTypeFactory[P]) ExtensionName() string { return ef.name } -func (ef *simpleExtensionTypeFactory[P]) Serialize() string { - s, _ := json.Marshal(ef.params) - return ef.name + string(s) -} -func (ef *simpleExtensionTypeFactory[P]) Deserialize(storage arrow.DataType, data string) (arrow.ExtensionType, error) { - if !strings.HasPrefix(data, ef.name) { - return nil, fmt.Errorf("%w: invalid deserialization of extension type %s", arrow.ErrInvalid, ef.name) - } - - data = strings.TrimPrefix(data, ef.name) - if err := json.Unmarshal([]byte(data), &ef.params); err != nil { - return nil, fmt.Errorf("%w: failed parsing parameters for extension type", err) - } - - if !arrow.TypeEqual(storage, ef.getStorage(ef.params)) { - return nil, fmt.Errorf("%w: invalid storage type for %s: %s (expected: %s)", - arrow.ErrInvalid, ef.name, storage, ef.getStorage(ef.params)) - } - - return &simpleExtensionTypeFactory[P]{ - name: ef.name, - params: ef.params, - getStorage: ef.getStorage, - ExtensionBase: arrow.ExtensionBase{ - Storage: storage, - }, - }, nil -} -func (ef *simpleExtensionTypeFactory[P]) ExtensionEquals(other arrow.ExtensionType) bool { - if ef.name != other.ExtensionName() { - return false - } - - rhs := other.(*simpleExtensionTypeFactory[P]) - return ef.params == rhs.params -} -func (ef *simpleExtensionTypeFactory[P]) ArrayType() reflect.Type { - return reflect.TypeOf(array.ExtensionArrayBase{}) -} - -func (ef *simpleExtensionTypeFactory[P]) CreateType(params P) arrow.DataType { - storage := ef.getStorage(params) - - return &simpleExtensionTypeFactory[P]{ - name: ef.name, - params: params, - getStorage: ef.getStorage, - ExtensionBase: arrow.ExtensionBase{ - Storage: storage, - }, - } -} - -type uuidExtParams struct{} - -var uuidType = simpleExtensionTypeFactory[uuidExtParams]{ - name: "uuid", getStorage: func(uuidExtParams) arrow.DataType { - return &arrow.FixedSizeBinaryType{ByteWidth: 16} - }} - -type fixedCharExtensionParams struct { - Length int32 `json:"length"` -} - -var fixedCharType = simpleExtensionTypeFactory[fixedCharExtensionParams]{ - name: "fixed_char", getStorage: func(p fixedCharExtensionParams) arrow.DataType { - return &arrow.FixedSizeBinaryType{ByteWidth: int(p.Length)} - }, -} - -type varCharExtensionParams struct { - Length int32 `json:"length"` -} - -var varCharType = simpleExtensionTypeFactory[varCharExtensionParams]{ - name: "varchar", getStorage: func(varCharExtensionParams) arrow.DataType { - return arrow.BinaryTypes.String - }, -} - -type intervalYearExtensionParams struct{} - -var intervalYearType = simpleExtensionTypeFactory[intervalYearExtensionParams]{ - name: "interval_year", getStorage: func(intervalYearExtensionParams) arrow.DataType { - return arrow.FixedSizeListOf(2, arrow.PrimitiveTypes.Int32) - }, -} - -type intervalDayExtensionParams struct{} - -var intervalDayType = simpleExtensionTypeFactory[intervalDayExtensionParams]{ - name: "interval_day", getStorage: func(intervalDayExtensionParams) arrow.DataType { - return arrow.FixedSizeListOf(2, arrow.PrimitiveTypes.Int32) - }, -} - -func uuid() arrow.DataType { return uuidType.CreateType(uuidExtParams{}) } -func fixedChar(length int32) arrow.DataType { - return fixedCharType.CreateType(fixedCharExtensionParams{Length: length}) -} -func varChar(length int32) arrow.DataType { - return varCharType.CreateType(varCharExtensionParams{Length: length}) -} -func intervalYear() arrow.DataType { - return intervalYearType.CreateType(intervalYearExtensionParams{}) -} -func intervalDay() arrow.DataType { - return intervalDayType.CreateType(intervalDayExtensionParams{}) -} diff --git a/go/arrow/compute/exprs/field_refs.go b/go/arrow/compute/exprs/field_refs.go deleted file mode 100644 index 0e039d9e26601..0000000000000 --- a/go/arrow/compute/exprs/field_refs.go +++ /dev/null @@ -1,254 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs - -import ( - "fmt" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/arrow/scalar" - "github.com/substrait-io/substrait-go/expr" -) - -func getFields(typ arrow.DataType) []arrow.Field { - if nested, ok := typ.(arrow.NestedType); ok { - return nested.Fields() - } - return nil -} - -// GetRefField evaluates the substrait field reference to retrieve the -// referenced field or return an error. -func GetRefField(ref expr.ReferenceSegment, fields []arrow.Field) (*arrow.Field, error) { - if ref == nil { - return nil, compute.ErrEmpty - } - - var ( - out *arrow.Field - ) - - for ref != nil { - if len(fields) == 0 { - return nil, fmt.Errorf("%w: %s", compute.ErrNoChildren, out.Type) - } - - switch f := ref.(type) { - case *expr.StructFieldRef: - if f.Field < 0 || f.Field >= int32(len(fields)) { - return nil, fmt.Errorf("%w: indices=%s", compute.ErrIndexRange, f) - } - - out = &fields[f.Field] - fields = getFields(out.Type) - default: - return nil, arrow.ErrNotImplemented - } - - ref = ref.GetChild() - } - - return out, nil -} - -// GetRefSchema evaluates the provided substrait field reference against -// the schema to retrieve the referenced (potentially nested) field. -func GetRefSchema(ref expr.ReferenceSegment, schema *arrow.Schema) (*arrow.Field, error) { - return GetRefField(ref, schema.Fields()) -} - -// GetScalar returns the evaluated referenced scalar value from the provided -// scalar which must be appropriate to the type of reference. -// -// A StructFieldRef can only reference against a Struct-type scalar, a -// ListElementRef can only reference against a List or LargeList scalar, -// and a MapKeyRef will only reference against a Map scalar. An error is -// returned if following the reference children ends up with an invalid -// nested reference object. -func GetScalar(ref expr.ReferenceSegment, s scalar.Scalar, mem memory.Allocator, ext ExtensionIDSet) (scalar.Scalar, error) { - if ref == nil { - return nil, compute.ErrEmpty - } - - var out scalar.Scalar - for ref != nil { - switch f := ref.(type) { - case *expr.StructFieldRef: - if s.DataType().ID() != arrow.STRUCT { - return nil, fmt.Errorf("%w: attempting to reference field from non-struct scalar %s", - arrow.ErrInvalid, s) - } - - st := s.(*scalar.Struct) - if f.Field < 0 || f.Field >= int32(len(st.Value)) { - return nil, fmt.Errorf("%w: indices=%s", compute.ErrIndexRange, ref) - } - - out = st.Value[f.Field] - case *expr.ListElementRef: - switch v := s.(type) { - case *scalar.List: - sc, err := scalar.GetScalar(v.Value, int(f.Offset)) - if err != nil { - return nil, err - } - out = sc - case *scalar.LargeList: - sc, err := scalar.GetScalar(v.Value, int(f.Offset)) - if err != nil { - return nil, err - } - out = sc - default: - return nil, fmt.Errorf("%w: cannot get ListElementRef from non-list scalar %s", - arrow.ErrInvalid, v) - } - case *expr.MapKeyRef: - v, ok := s.(*scalar.Map) - if !ok { - return nil, arrow.ErrInvalid - } - - dt, _, err := FromSubstraitType(f.MapKey.GetType(), ext) - if err != nil { - return nil, err - } - - if !arrow.TypeEqual(dt, v.Type.(*arrow.MapType).KeyType()) { - return nil, arrow.ErrInvalid - } - - keyvalDatum, err := literalToDatum(mem, f.MapKey, ext) - if err != nil { - return nil, err - } - - var ( - keyval = keyvalDatum.(*compute.ScalarDatum) - m = v.Value.(*array.Struct) - keys = m.Field(0) - valueScalar scalar.Scalar - ) - for i := 0; i < v.Value.Len(); i++ { - kv, err := scalar.GetScalar(keys, i) - if err != nil { - return nil, err - } - if scalar.Equals(kv, keyval.Value) { - valueScalar, err = scalar.GetScalar(m.Field(1), i) - if err != nil { - return nil, err - } - break - } - } - - if valueScalar == nil { - return nil, arrow.ErrNotFound - } - - out = valueScalar - } - s = out - ref = ref.GetChild() - } - - return out, nil -} - -// GetReferencedValue retrieves the referenced (potentially nested) value from -// the provided datum which may be a scalar, array, or record batch. -func GetReferencedValue(mem memory.Allocator, ref expr.ReferenceSegment, value compute.Datum, ext ExtensionIDSet) (compute.Datum, error) { - if ref == nil { - return nil, compute.ErrEmpty - } - - for ref != nil { - // process the rest of the refs for the scalars - // since arrays can go down to a scalar, but you - // won't get an array from a scalar via ref - if v, ok := value.(*compute.ScalarDatum); ok { - out, err := GetScalar(ref, v.Value, mem, ext) - if err != nil { - return nil, err - } - - return &compute.ScalarDatum{Value: out}, nil - } - - switch r := ref.(type) { - case *expr.MapKeyRef: - return nil, arrow.ErrNotImplemented - case *expr.StructFieldRef: - switch v := value.(type) { - case *compute.ArrayDatum: - if v.Type().ID() != arrow.STRUCT { - return nil, fmt.Errorf("%w: struct field ref for non struct type %s", - arrow.ErrInvalid, v.Type()) - } - - if r.Field < 0 || r.Field >= int32(len(v.Value.Children())) { - return nil, fmt.Errorf("%w: indices=%s", compute.ErrIndexRange, ref) - } - - value = &compute.ArrayDatum{Value: v.Value.Children()[r.Field]} - case *compute.RecordDatum: - if r.Field < 0 || r.Field >= int32(v.Value.NumCols()) { - return nil, fmt.Errorf("%w: indices=%s", compute.ErrIndexRange, ref) - } - - value = &compute.ArrayDatum{Value: v.Value.Column(int(r.Field)).Data()} - default: - return nil, arrow.ErrNotImplemented - } - case *expr.ListElementRef: - switch v := value.(type) { - case *compute.ArrayDatum: - switch v.Type().ID() { - case arrow.LIST, arrow.LARGE_LIST, arrow.FIXED_SIZE_LIST: - arr := v.MakeArray() - defer arr.Release() - - sc, err := scalar.GetScalar(arr, int(r.Offset)) - if err != nil { - return nil, err - } - if s, ok := sc.(scalar.Releasable); ok { - defer s.Release() - } - - value = &compute.ScalarDatum{Value: sc} - default: - return nil, fmt.Errorf("%w: cannot reference list element in non-list array type %s", - arrow.ErrInvalid, v.Type()) - } - - default: - return nil, arrow.ErrNotImplemented - } - } - - ref = ref.GetChild() - } - - return value, nil -} diff --git a/go/arrow/compute/exprs/types.go b/go/arrow/compute/exprs/types.go deleted file mode 100644 index 594a55c9041a8..0000000000000 --- a/go/arrow/compute/exprs/types.go +++ /dev/null @@ -1,745 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package exprs - -import ( - "fmt" - "hash/maphash" - "strconv" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/substrait-io/substrait-go/expr" - "github.com/substrait-io/substrait-go/extensions" - "github.com/substrait-io/substrait-go/types" -) - -const ( - // URI for official Arrow Substrait Extension Types - ArrowExtTypesUri = "https://github.com/apache/arrow/blob/main/format/substrait/extension_types.yaml" - SubstraitDefaultURIPrefix = extensions.SubstraitDefaultURIPrefix - // URI for official Substrait Arithmetic funcs extensions - SubstraitArithmeticFuncsURI = SubstraitDefaultURIPrefix + "functions_arithmetic.yaml" - // URI for official Substrait Comparison funcs extensions - SubstraitComparisonFuncsURI = SubstraitDefaultURIPrefix + "functions_comparison.yaml" - - TimestampTzTimezone = "UTC" -) - -var hashSeed maphash.Seed - -// the default extension registry that will contain the Arrow extension -// type variations and types. -var DefaultExtensionIDRegistry = NewExtensionIDRegistry() - -func init() { - hashSeed = maphash.MakeSeed() - - types := []struct { - dt arrow.DataType - name string - }{ - {arrow.PrimitiveTypes.Uint8, "u8"}, - {arrow.PrimitiveTypes.Uint16, "u16"}, - {arrow.PrimitiveTypes.Uint32, "u32"}, - {arrow.PrimitiveTypes.Uint64, "u64"}, - {arrow.FixedWidthTypes.Float16, "fp16"}, - {arrow.Null, "null"}, - {arrow.FixedWidthTypes.MonthInterval, "interval_month"}, - {arrow.FixedWidthTypes.DayTimeInterval, "interval_day_milli"}, - {arrow.FixedWidthTypes.MonthDayNanoInterval, "interval_month_day_nano"}, - } - - for _, t := range types { - err := DefaultExtensionIDRegistry.RegisterType(extensions.ID{ - URI: ArrowExtTypesUri, Name: t.name}, t.dt) - if err != nil { - panic(err) - } - } - - for _, fn := range []string{"add", "subtract", "multiply", "divide", "power", "sqrt", "abs"} { - err := DefaultExtensionIDRegistry.AddSubstraitScalarToArrow( - extensions.ID{URI: SubstraitArithmeticFuncsURI, Name: fn}, - decodeOptionlessOverflowableArithmetic(fn)) - if err != nil { - panic(err) - } - } - - for _, fn := range []string{"add", "subtract", "multiply", "divide"} { - err := DefaultExtensionIDRegistry.AddArrowToSubstrait(fn, - encodeOptionlessOverflowableArithmetic(extensions.ID{ - URI: SubstraitArithmeticFuncsURI, Name: fn})) - if err != nil { - panic(err) - } - } - - for _, fn := range []string{"equal", "not_equal", "lt", "lte", "gt", "gte"} { - err := DefaultExtensionIDRegistry.AddSubstraitScalarToArrow( - extensions.ID{URI: SubstraitComparisonFuncsURI, Name: fn}, - simpleMapSubstraitToArrowFunc) - if err != nil { - panic(err) - } - } - - for _, fn := range []string{"equal", "not_equal", "less", "less_equal", "greater", "greater_equal"} { - err := DefaultExtensionIDRegistry.AddArrowToSubstrait(fn, - simpleMapArrowToSubstraitFunc(SubstraitComparisonFuncsURI)) - if err != nil { - panic(err) - } - } -} - -type overflowBehavior string - -const ( - overflowSILENT = "SILENT" - overflowSATURATE = "SATURATE" - overflowERROR = "ERROR" -) - -type enumParser[typ ~string] struct { - values map[typ]struct{} -} - -func (e *enumParser[typ]) parse(v string) (typ, error) { - out := typ(v) - if _, ok := e.values[out]; ok { - return out, nil - } - return "", arrow.ErrNotFound -} - -var overflowParser = enumParser[overflowBehavior]{ - values: map[overflowBehavior]struct{}{ - overflowSILENT: {}, - overflowSATURATE: {}, - overflowERROR: {}, - }, -} - -func parseOption[typ ~string](sf *expr.ScalarFunction, optionName string, parser *enumParser[typ], implemented []typ, def typ) (typ, error) { - opts := sf.GetOption(optionName) - if len(opts) == 0 { - return def, nil - } - - for _, o := range opts { - p, err := parser.parse(o) - if err != nil { - return def, arrow.ErrInvalid - } - for _, i := range implemented { - if i == p { - return p, nil - } - } - } - - return def, arrow.ErrNotImplemented -} - -type substraitToArrow = func(*expr.ScalarFunction) (fname string, opts compute.FunctionOptions, err error) -type arrowToSubstrait = func(fname string) (extensions.ID, []*types.FunctionOption, error) - -var substraitToArrowFuncMap = map[string]string{ - "lt": "less", - "gt": "greater", - "lte": "less_equal", - "gte": "greater_equal", -} - -var arrowToSubstraitFuncMap = map[string]string{ - "less": "lt", - "greater": "gt", - "less_equal": "lte", - "greater_equal": "gte", -} - -func simpleMapSubstraitToArrowFunc(sf *expr.ScalarFunction) (fname string, opts compute.FunctionOptions, err error) { - fname, _, _ = strings.Cut(sf.Name(), ":") - f, ok := substraitToArrowFuncMap[fname] - if ok { - fname = f - } - return -} - -func simpleMapArrowToSubstraitFunc(uri string) arrowToSubstrait { - return func(fname string) (extensions.ID, []*types.FunctionOption, error) { - f, ok := arrowToSubstraitFuncMap[fname] - if ok { - fname = f - } - return extensions.ID{URI: uri, Name: fname}, nil, nil - } -} - -func decodeOptionlessOverflowableArithmetic(n string) substraitToArrow { - return func(sf *expr.ScalarFunction) (fname string, opts compute.FunctionOptions, err error) { - overflow, err := parseOption(sf, "overflow", &overflowParser, []overflowBehavior{overflowSILENT, overflowERROR}, overflowSILENT) - if err != nil { - return n, nil, err - } - - switch overflow { - case overflowSILENT: - return n + "_unchecked", nil, nil - case overflowERROR: - return n, nil, nil - default: - return n, nil, arrow.ErrNotImplemented - } - } -} - -func encodeOptionlessOverflowableArithmetic(id extensions.ID) arrowToSubstrait { - return func(fname string) (extensions.ID, []*types.FunctionOption, error) { - fn, _, ok := strings.Cut(fname, ":") - if ok { - id.Name = fname - fname = fn - } - - opts := make([]*types.FunctionOption, 0, 1) - if strings.HasSuffix(fname, "_unchecked") { - opts = append(opts, &types.FunctionOption{ - Name: "overflow", Preference: []string{"SILENT"}}) - } else { - opts = append(opts, &types.FunctionOption{ - Name: "overflow", Preference: []string{"ERROR"}}) - } - - return id, opts, nil - } -} - -// NewExtensionSetDefault is a convenience function to create a new extension -// set using the Default arrow extension ID registry. -// -// See NewExtensionSet for more info. -func NewExtensionSetDefault(set expr.ExtensionRegistry) ExtensionIDSet { - return &extensionSet{ExtensionRegistry: set, reg: DefaultExtensionIDRegistry} -} - -// NewExtensionSet creates a new extension set given a substrait extension registry, -// and an Arrow <--> Substrait registry for mapping substrait extensions to -// their Arrow equivalents. This extension set can then be used to manage a -// particular set of extensions in use by an expression or plan, so when -// serializing you only need to serialize the extensions that have been -// inserted into the extension set. -func NewExtensionSet(set expr.ExtensionRegistry, reg *ExtensionIDRegistry) ExtensionIDSet { - return &extensionSet{ExtensionRegistry: set, reg: reg} -} - -type extensionSet struct { - expr.ExtensionRegistry - reg *ExtensionIDRegistry -} - -func (e *extensionSet) GetArrowRegistry() *ExtensionIDRegistry { return e.reg } -func (e *extensionSet) GetSubstraitRegistry() expr.ExtensionRegistry { return e.ExtensionRegistry } - -func (e *extensionSet) DecodeTypeArrow(anchor uint32) (extensions.ID, arrow.DataType, bool) { - id, ok := e.Set.DecodeType(anchor) - if !ok { - if id, ok = e.Set.DecodeTypeVariation(anchor); !ok { - return id, nil, false - } - } - - dt, ok := e.reg.GetTypeByID(id) - return id, dt, ok -} - -func (e *extensionSet) DecodeFunction(ref uint32) (extensions.ID, substraitToArrow, bool) { - id, ok := e.Set.DecodeFunc(ref) - if !ok { - return id, nil, false - } - - conv, ok := e.reg.GetSubstraitScalarToArrow(id) - if !ok { - id.Name, _, ok = strings.Cut(id.Name, ":") - if ok { - conv, ok = e.reg.GetSubstraitScalarToArrow(id) - } - } - return id, conv, ok -} - -func (e *extensionSet) EncodeTypeVariation(dt arrow.DataType) (extensions.ID, uint32, bool) { - id, ok := e.reg.GetIDByType(dt) - if !ok { - return extensions.ID{}, 0, false - } - - return id, e.Set.GetTypeVariationAnchor(id), true -} - -func (e *extensionSet) EncodeType(dt arrow.DataType) (extensions.ID, uint32, bool) { - id, ok := e.reg.GetIDByType(dt) - if !ok { - return extensions.ID{}, 0, false - } - - return id, e.Set.GetTypeAnchor(id), true -} - -func (e *extensionSet) EncodeFunction(id extensions.ID) uint32 { - return e.Set.GetFuncAnchor(id) -} - -// ExtensionIDRegistry manages a set of mappings between Arrow types -// and functions and their substrait equivalents. -type ExtensionIDRegistry struct { - typeList []arrow.DataType - ids []extensions.ID - - substraitToIdx map[extensions.ID]int - arrowToIdx map[uint64]int - - substraitToArrowFn map[extensions.ID]substraitToArrow - arrowToSubstrait map[string]arrowToSubstrait -} - -// NewExtensionIDRegistry initializes a new registry for use. -func NewExtensionIDRegistry() *ExtensionIDRegistry { - return &ExtensionIDRegistry{ - typeList: make([]arrow.DataType, 0), - ids: make([]extensions.ID, 0), - substraitToIdx: make(map[extensions.ID]int), - arrowToIdx: make(map[uint64]int), - substraitToArrowFn: make(map[extensions.ID]substraitToArrow), - arrowToSubstrait: make(map[string]arrowToSubstrait), - } -} - -// RegisterType creates a mapping between the given extension ID and the -// provided Arrow data type. If this extension ID or arrow type are already -// registered, an arrow.ErrInvalid error will be returned. -func (e *ExtensionIDRegistry) RegisterType(id extensions.ID, dt arrow.DataType) error { - if _, ok := e.substraitToIdx[id]; ok { - return fmt.Errorf("%w: type id already registered", arrow.ErrInvalid) - } - - dthash := arrow.HashType(hashSeed, dt) - if _, ok := e.arrowToIdx[dthash]; ok { - return fmt.Errorf("%w: type already registered", arrow.ErrInvalid) - } - - idx := len(e.ids) - e.typeList = append(e.typeList, dt) - e.ids = append(e.ids, id) - e.substraitToIdx[id] = idx - e.arrowToIdx[dthash] = idx - return nil -} - -// AddSubstraitScalarToArrow creates a mapping between a given extension ID -// and a function which should return the corresponding Arrow compute function -// name along with any relevant FunctionOptions based on the ScalarFunction -// instance passed to it. -// -// Any relevant options should be parsed from the ScalarFunction's options -// and used to ensure the correct arrow compute function is used and necessary -// options are passed. -func (e *ExtensionIDRegistry) AddSubstraitScalarToArrow(id extensions.ID, toArrow substraitToArrow) error { - if _, ok := e.substraitToArrowFn[id]; ok { - return fmt.Errorf("%w: extension id already registered as function", arrow.ErrInvalid) - } - - e.substraitToArrowFn[id] = toArrow - return nil -} - -// AddArrowToSubstrait creates a mapping between the provided arrow compute function -// and a function which should provide the correct substrait ExtensionID and function -// options from that name. -func (e *ExtensionIDRegistry) AddArrowToSubstrait(name string, fn arrowToSubstrait) error { - if _, ok := e.arrowToSubstrait[name]; ok { - return fmt.Errorf("%w: function name '%s' already registered for conversion to substrait", arrow.ErrInvalid, name) - } - - e.arrowToSubstrait[name] = fn - return nil -} - -// GetTypeByID returns the mapped arrow data type from the provided substrait -// extension id. If no mapping exists for this substrait extension id, -// the second return value will be false. -func (e *ExtensionIDRegistry) GetTypeByID(id extensions.ID) (arrow.DataType, bool) { - idx, ok := e.substraitToIdx[id] - if !ok { - return nil, false - } - - return e.typeList[idx], true -} - -// GetIDByType is the inverse of GetTypeByID, returning the mapped substrait -// extension ID corresponding to the provided arrow data type. The second -// return is false if there is no mapping found. -func (e *ExtensionIDRegistry) GetIDByType(typ arrow.DataType) (extensions.ID, bool) { - dthash := arrow.HashType(hashSeed, typ) - idx, ok := e.arrowToIdx[dthash] - if !ok { - return extensions.ID{}, false - } - - return e.ids[idx], true -} - -// GetSubstraitScalarToArrow returns the mapped conversion function for a -// given substrait extension ID to convert a substrait ScalarFunction to -// the corresponding Arrow compute function call. False is returned as -// the second value if there is no mapping available. -func (e *ExtensionIDRegistry) GetSubstraitScalarToArrow(id extensions.ID) (substraitToArrow, bool) { - conv, ok := e.substraitToArrowFn[id] - if !ok { - return nil, ok - } - - return conv, true -} - -// GetArrowToSubstrait returns the mapped function to convert an arrow compute -// function to the corresponding Substrait ScalarFunction extension ID and options. -// False is returned as the second value if there is no mapping found. -func (e *ExtensionIDRegistry) GetArrowToSubstrait(name string) (conv arrowToSubstrait, ok bool) { - conv, ok = e.arrowToSubstrait[name] - if !ok { - fn, _, found := strings.Cut(name, ":") - if found { - conv, ok = e.arrowToSubstrait[fn] - } - } - return -} - -// ExtensionIDSet is an interface for managing the mapping between arrow -// and substrait types and function extensions. -type ExtensionIDSet interface { - GetArrowRegistry() *ExtensionIDRegistry - GetSubstraitRegistry() expr.ExtensionRegistry - - DecodeTypeArrow(anchor uint32) (extensions.ID, arrow.DataType, bool) - DecodeFunction(ref uint32) (extensions.ID, substraitToArrow, bool) - - EncodeType(dt arrow.DataType) (extensions.ID, uint32, bool) - EncodeTypeVariation(dt arrow.DataType) (extensions.ID, uint32, bool) -} - -// IsNullable is a convenience method to return whether or not -// a substrait type has Nullability set to NullabilityRequired or not. -func IsNullable(t types.Type) bool { - return t.GetNullability() != types.NullabilityRequired -} - -// FieldsFromSubstrait produces a list of arrow fields from a list of -// substrait types (such as the fields of a StructType) using nextName -// to determine the names for the fields. -func FieldsFromSubstrait(typeList []types.Type, nextName func() string, ext ExtensionIDSet) (out []arrow.Field, err error) { - out = make([]arrow.Field, len(typeList)) - for i, t := range typeList { - out[i].Name = nextName() - out[i].Nullable = IsNullable(t) - - if st, ok := t.(*types.StructType); ok { - fields, err := FieldsFromSubstrait(st.Types, nextName, ext) - if err != nil { - return nil, err - } - out[i].Type = arrow.StructOf(fields...) - } else { - out[i].Type, _, err = FromSubstraitType(t, ext) - if err != nil { - return nil, err - } - } - } - return -} - -// ToSubstraitType converts an arrow data type to a Substrait Type. Since -// arrow types don't have a nullable flag (it is in the arrow.Field) but -// Substrait types do, the nullability must be passed in here. -func ToSubstraitType(dt arrow.DataType, nullable bool, ext ExtensionIDSet) (types.Type, error) { - var nullability types.Nullability - if nullable { - nullability = types.NullabilityNullable - } else { - nullability = types.NullabilityRequired - } - - switch dt.ID() { - case arrow.BOOL: - return &types.BooleanType{Nullability: nullability}, nil - case arrow.INT8: - return &types.Int8Type{Nullability: nullability}, nil - case arrow.INT16: - return &types.Int16Type{Nullability: nullability}, nil - case arrow.INT32: - return &types.Int32Type{Nullability: nullability}, nil - case arrow.INT64: - return &types.Int64Type{Nullability: nullability}, nil - case arrow.UINT8: - _, anchor, ok := ext.EncodeTypeVariation(dt) - if !ok { - return nil, arrow.ErrNotFound - } - return &types.Int8Type{ - Nullability: nullability, - TypeVariationRef: anchor, - }, nil - case arrow.UINT16: - _, anchor, ok := ext.EncodeTypeVariation(dt) - if !ok { - return nil, arrow.ErrNotFound - } - return &types.Int16Type{ - Nullability: nullability, - TypeVariationRef: anchor, - }, nil - case arrow.UINT32: - _, anchor, ok := ext.EncodeTypeVariation(dt) - if !ok { - return nil, arrow.ErrNotFound - } - return &types.Int32Type{ - Nullability: nullability, - TypeVariationRef: anchor, - }, nil - case arrow.UINT64: - _, anchor, ok := ext.EncodeTypeVariation(dt) - if !ok { - return nil, arrow.ErrNotFound - } - return &types.Int64Type{ - Nullability: nullability, - TypeVariationRef: anchor, - }, nil - case arrow.FLOAT16: - _, anchor, ok := ext.EncodeTypeVariation(dt) - if !ok { - return nil, arrow.ErrNotFound - } - return &types.Int16Type{ - Nullability: nullability, - TypeVariationRef: anchor, - }, nil - case arrow.FLOAT32: - return &types.Float32Type{Nullability: nullability}, nil - case arrow.FLOAT64: - return &types.Float64Type{Nullability: nullability}, nil - case arrow.STRING: - return &types.StringType{Nullability: nullability}, nil - case arrow.BINARY: - return &types.BinaryType{Nullability: nullability}, nil - case arrow.DATE32: - return &types.DateType{Nullability: nullability}, nil - case arrow.EXTENSION: - dt := dt.(arrow.ExtensionType) - switch dt.ExtensionName() { - case "uuid": - return &types.UUIDType{Nullability: nullability}, nil - case "fixed_char": - return &types.FixedCharType{ - Nullability: nullability, - Length: int32(dt.StorageType().(*arrow.FixedSizeBinaryType).ByteWidth), - }, nil - case "varchar": - return &types.VarCharType{Nullability: nullability, Length: -1}, nil - case "interval_year": - return &types.IntervalYearType{Nullability: nullability}, nil - case "interval_day": - return &types.IntervalDayType{Nullability: nullability}, nil - default: - _, anchor, ok := ext.EncodeType(dt) - if !ok { - return nil, arrow.ErrNotFound - } - return &types.UserDefinedType{ - Nullability: nullability, - TypeReference: anchor, - }, nil - } - case arrow.FIXED_SIZE_BINARY: - return &types.FixedBinaryType{Nullability: nullability, - Length: int32(dt.(*arrow.FixedSizeBinaryType).ByteWidth)}, nil - case arrow.DECIMAL128, arrow.DECIMAL256: - dt := dt.(arrow.DecimalType) - return &types.DecimalType{Nullability: nullability, - Precision: dt.GetPrecision(), Scale: dt.GetScale()}, nil - case arrow.STRUCT: - dt := dt.(*arrow.StructType) - fields := make([]types.Type, dt.NumFields()) - var err error - for i, f := range dt.Fields() { - fields[i], err = ToSubstraitType(f.Type, f.Nullable, ext) - if err != nil { - return nil, err - } - } - - return &types.StructType{ - Nullability: nullability, - Types: fields, - }, nil - case arrow.LIST, arrow.FIXED_SIZE_LIST, arrow.LARGE_LIST: - dt := dt.(arrow.NestedType) - elemType, err := ToSubstraitType(dt.Fields()[0].Type, dt.Fields()[0].Nullable, ext) - if err != nil { - return nil, err - } - return &types.ListType{ - Nullability: nullability, - Type: elemType, - }, nil - case arrow.MAP: - dt := dt.(*arrow.MapType) - keyType, err := ToSubstraitType(dt.KeyType(), false, ext) - if err != nil { - return nil, err - } - valueType, err := ToSubstraitType(dt.ItemType(), dt.ItemField().Nullable, ext) - if err != nil { - return nil, err - } - - return &types.MapType{ - Nullability: nullability, - Key: keyType, - Value: valueType, - }, nil - } - - return nil, arrow.ErrNotImplemented -} - -// FromSubstraitType returns the appropriate Arrow data type for the given -// substrait type, using the extension set if necessary. -// Since Substrait types contain their nullability also, the nullability -// returned along with the data type. -func FromSubstraitType(t types.Type, ext ExtensionIDSet) (arrow.DataType, bool, error) { - nullable := IsNullable(t) - - if t.GetTypeVariationReference() > 0 { - _, dt, ok := ext.DecodeTypeArrow(t.GetTypeVariationReference()) - if ok { - return dt, nullable, nil - } - } - - switch t := t.(type) { - case *types.BooleanType: - return arrow.FixedWidthTypes.Boolean, nullable, nil - case *types.Int8Type: - return arrow.PrimitiveTypes.Int8, nullable, nil - case *types.Int16Type: - return arrow.PrimitiveTypes.Int16, nullable, nil - case *types.Int32Type: - return arrow.PrimitiveTypes.Int32, nullable, nil - case *types.Int64Type: - return arrow.PrimitiveTypes.Int64, nullable, nil - case *types.Float32Type: - return arrow.PrimitiveTypes.Float32, nullable, nil - case *types.Float64Type: - return arrow.PrimitiveTypes.Float64, nullable, nil - case *types.StringType: - return arrow.BinaryTypes.String, nullable, nil - case *types.BinaryType: - return arrow.BinaryTypes.Binary, nullable, nil - case *types.TimestampType: - return &arrow.TimestampType{Unit: arrow.Microsecond}, nullable, nil - case *types.TimestampTzType: - return &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: TimestampTzTimezone}, - nullable, nil - case *types.DateType: - return arrow.FixedWidthTypes.Date32, nullable, nil - case *types.TimeType: - return &arrow.Time64Type{Unit: arrow.Microsecond}, nullable, nil - case *types.IntervalYearType: - return intervalYear(), nullable, nil - case *types.IntervalDayType: - return intervalDay(), nullable, nil - case *types.UUIDType: - return uuid(), nullable, nil - case *types.FixedCharType: - return fixedChar(t.Length), nullable, nil - case *types.VarCharType: - return varChar(t.Length), nullable, nil - case *types.FixedBinaryType: - return &arrow.FixedSizeBinaryType{ByteWidth: int(t.Length)}, nullable, nil - case *types.DecimalType: - return &arrow.Decimal128Type{ - Precision: t.Precision, - Scale: t.Scale, - }, nullable, nil - case *types.StructType: - i := 0 - fields, err := FieldsFromSubstrait(t.Types, func() string { - i++ - return strconv.Itoa(i) - }, ext) - if err != nil { - return nil, false, err - } - - return arrow.StructOf(fields...), nullable, nil - case *types.ListType: - elem, elemNullable, err := FromSubstraitType(t.Type, ext) - if err != nil { - return nil, false, err - } - return arrow.ListOfField(arrow.Field{Name: "item", Type: elem, Nullable: elemNullable}), - nullable, nil - case *types.MapType: - key, keyNullable, err := FromSubstraitType(t.Key, ext) - if err != nil { - return nil, false, err - } - if keyNullable { - return nil, false, fmt.Errorf("%w: encountered nullable key field when converting to arrow.Map", - arrow.ErrInvalid) - } - - value, valueNullable, err := FromSubstraitType(t.Value, ext) - if err != nil { - return nil, false, err - } - ret := arrow.MapOf(key, value) - ret.SetItemNullable(valueNullable) - return ret, nullable, nil - case *types.UserDefinedType: - anchor := t.TypeReference - _, dt, ok := ext.DecodeTypeArrow(anchor) - if !ok { - return nil, false, arrow.ErrNotImplemented - } - return dt, nullable, nil - } - - return nil, false, arrow.ErrNotImplemented -} diff --git a/go/arrow/compute/fieldref.go b/go/arrow/compute/fieldref.go deleted file mode 100644 index d69c7d91044c7..0000000000000 --- a/go/arrow/compute/fieldref.go +++ /dev/null @@ -1,587 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package compute - -import ( - "errors" - "fmt" - "hash/maphash" - "reflect" - "strconv" - "strings" - "unicode" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" -) - -var ( - ErrEmpty = errors.New("cannot traverse empty field path") - ErrNoChildren = errors.New("trying to get child of type with no children") - ErrIndexRange = errors.New("index out of range") - ErrMultipleMatches = errors.New("multiple matches") - ErrNoMatch = errors.New("no match") - ErrInvalid = errors.New("field ref invalid") -) - -func getFields(typ arrow.DataType) []arrow.Field { - if nested, ok := typ.(arrow.NestedType); ok { - return nested.Fields() - } - return nil -} - -type listvals interface { - ListValues() arrow.Array -} - -func getChildren(arr arrow.Array) (ret []arrow.Array) { - switch arr := arr.(type) { - case *array.Struct: - ret = make([]arrow.Array, arr.NumField()) - for i := 0; i < arr.NumField(); i++ { - ret[i] = arr.Field(i) - } - case listvals: - ret = []arrow.Array{arr.ListValues()} - } - return -} - -// FieldPath represents a path to a nested field using indices of child fields. -// For example, given the indices {5, 9, 3} the field could be retrieved with: -// schema.Field(5).Type().(*arrow.StructType).Field(9).Type().(*arrow.StructType).Field(3) -// -// Attempting to retrieve a child field using a FieldPath which is not valid for a given -// schema will get an error such as an out of range index, or an empty path. -// -// FieldPaths provide for drilling down to potentially nested children for convenience -// of accepting a slice of fields, a schema or a datatype (which should contain child fields). -// -// A fieldpath can also be used to retrieve a child arrow.Array or column from a record batch. -type FieldPath []int - -func (f FieldPath) String() string { - if len(f) == 0 { - return "FieldPath(empty)" - } - - var b strings.Builder - b.WriteString("FieldPath(") - for _, i := range f { - fmt.Fprint(&b, i) - b.WriteByte(' ') - } - ret := b.String() - return ret[:len(ret)-1] + ")" -} - -// Get retrieves the corresponding nested child field by drilling through the schema's -// fields as per the field path. -func (f FieldPath) Get(s *arrow.Schema) (*arrow.Field, error) { - return f.GetFieldFromSlice(s.Fields()) -} - -// GetFieldFromSlice treats the slice as the top layer of fields, so the first value -// in the field path will index into the slice, and then drill down from there. -func (f FieldPath) GetFieldFromSlice(fields []arrow.Field) (*arrow.Field, error) { - if len(f) == 0 { - return nil, ErrEmpty - } - - var ( - depth = 0 - out *arrow.Field - ) - for _, idx := range f { - if len(fields) == 0 { - return nil, fmt.Errorf("%w: %s", ErrNoChildren, out.Type) - } - - if idx < 0 || idx >= len(fields) { - return nil, fmt.Errorf("%w: indices=%s", ErrIndexRange, f[:depth+1]) - } - - out = &fields[idx] - fields = getFields(out.Type) - depth++ - } - - return out, nil -} - -func (f FieldPath) getArray(arrs []arrow.Array) (arrow.Array, error) { - if len(f) == 0 { - return nil, ErrEmpty - } - - var ( - depth = 0 - out arrow.Array - ) - for _, idx := range f { - if len(arrs) == 0 { - return nil, fmt.Errorf("%w: %s", ErrNoChildren, out.DataType()) - } - - if idx < 0 || idx >= len(arrs) { - return nil, fmt.Errorf("%w. indices=%s", ErrIndexRange, f[:depth+1]) - } - - out = arrs[idx] - arrs = getChildren(out) - depth++ - } - return out, nil -} - -// GetFieldFromType returns the nested field from a datatype by drilling into it's -// child fields. -func (f FieldPath) GetFieldFromType(typ arrow.DataType) (*arrow.Field, error) { - return f.GetFieldFromSlice(getFields(typ)) -} - -// GetField is equivalent to GetFieldFromType(field.Type) -func (f FieldPath) GetField(field arrow.Field) (*arrow.Field, error) { - return f.GetFieldFromType(field.Type) -} - -// GetColumn will return the correct child array by traversing the fieldpath -// going to the nested arrays of the columns in the record batch. -func (f FieldPath) GetColumn(batch arrow.Record) (arrow.Array, error) { - return f.getArray(batch.Columns()) -} - -func (f FieldPath) findAll(fields []arrow.Field) []FieldPath { - _, err := f.GetFieldFromSlice(fields) - if err == nil { - return []FieldPath{f} - } - return nil -} - -// a nameref represents a FieldRef by name of the field -type nameRef string - -func (n nameRef) String() string { - return "Name(" + string(n) + ")" -} - -func (ref nameRef) findAll(fields []arrow.Field) []FieldPath { - out := []FieldPath{} - for i, f := range fields { - if f.Name == string(ref) { - out = append(out, FieldPath{i}) - } - } - return out -} - -func (ref nameRef) hash(h *maphash.Hash) { h.WriteString(string(ref)) } - -type matches struct { - prefixes []FieldPath - refs []*arrow.Field -} - -func (m *matches) add(prefix, suffix FieldPath, fields []arrow.Field) { - f, err := suffix.GetFieldFromSlice(fields) - if err != nil { - panic(err) - } - - m.refs = append(m.refs, f) - m.prefixes = append(m.prefixes, append(prefix, suffix...)) -} - -// refList represents a list of references to use to determine which nested -// field is being referenced. allowing combinations of field indices and names -type refList []FieldRef - -func (r refList) String() string { - var b strings.Builder - b.WriteString("Nested(") - for _, f := range r { - fmt.Fprint(&b, f) - b.WriteByte(' ') - } - ret := b.String() - return ret[:len(ret)-1] + ")" -} - -func (ref refList) hash(h *maphash.Hash) { - for _, r := range ref { - r.hash(h) - } -} - -func (ref refList) findAll(fields []arrow.Field) []FieldPath { - if len(ref) == 0 { - return nil - } - - m := matches{} - for _, list := range ref[0].FindAll(fields) { - m.add(FieldPath{}, list, fields) - } - - for _, r := range ref[1:] { - next := matches{} - for i, f := range m.refs { - for _, match := range r.FindAllField(*f) { - next.add(m.prefixes[i], match, getFields(f.Type)) - } - } - m = next - } - return m.prefixes -} - -type refImpl interface { - fmt.Stringer - findAll(fields []arrow.Field) []FieldPath - hash(h *maphash.Hash) -} - -// FieldRef is a descriptor of a (potentially nested) field within a schema. -// -// Unlike FieldPath (which is exclusively indices of child fields), FieldRef -// may reference a field by name. It can be constructed from either -// a field index, field name, or field path. -// -// Nested fields can be referenced as well, given the schema: -// -// arrow.NewSchema([]arrow.Field{ -// {Name: "a", Type: arrow.StructOf(arrow.Field{Name: "n", Type: arrow.Null})}, -// {Name: "b", Type: arrow.PrimitiveTypes.Int32}, -// }) -// -// the following all indicate the nested field named "n": -// -// FieldRefPath(FieldPath{0, 0}) -// FieldRefList("a", 0) -// FieldRefList("a", "n") -// FieldRefList(0, "n") -// NewFieldRefFromDotPath(".a[0]") -// -// FieldPaths matching a FieldRef are retrieved with the FindAll* functions -// Multiple matches are possible because field names may be duplicated within -// a schema. For example: -// -// aIsAmbiguous := arrow.NewSchema([]arrow.Field{ -// {Name: "a", Type: arrow.PrimitiveTypes.Int32}, -// {Name: "a", Type: arrow.PrimitiveTypes.Float32}, -// }) -// matches := FieldRefName("a").FindAll(aIsAmbiguous) -// assert.Len(matches, 2) -// assert.True(matches[0].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(0)) -// assert.True(matches[1].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(1)) -type FieldRef struct { - impl refImpl -} - -// FieldRefPath constructs a FieldRef from a given FieldPath -func FieldRefPath(p FieldPath) FieldRef { - return FieldRef{impl: p} -} - -// FieldRefIndex is a convenience function to construct a FieldPath reference -// of a single index -func FieldRefIndex(i int) FieldRef { - return FieldRef{impl: FieldPath{i}} -} - -// FieldRefName constructs a FieldRef by name -func FieldRefName(n string) FieldRef { - return FieldRef{impl: nameRef(n)} -} - -// FieldRefList takes an arbitrary number of arguments which can be either -// strings or ints. This will panic if anything other than a string or int -// is passed in. -func FieldRefList(elems ...interface{}) FieldRef { - list := make(refList, len(elems)) - for i, e := range elems { - switch e := e.(type) { - case string: - list[i] = FieldRefName(e) - case int: - list[i] = FieldRefIndex(e) - } - } - return FieldRef{impl: list} -} - -// NewFieldRefFromDotPath parses a dot path into a field ref. -// -// dot_path = '.' name -// -// | '[' digit+ ']' -// | dot_path+ -// -// Examples -// -// ".alpha" => FieldRefName("alpha") -// "[2]" => FieldRefIndex(2) -// ".beta[3]" => FieldRefList("beta", 3) -// "[5].gamma.delta[7]" => FieldRefList(5, "gamma", "delta", 7) -// ".hello world" => FieldRefName("hello world") -// `.\[y\]\\tho\.\` => FieldRef(`[y]\tho.\`) -// -// Note: when parsing a name, a '\' preceding any other character will be -// dropped from the resulting name. therefore if a name must contain the characters -// '.', '\', '[' or ']' then they must be escaped with a preceding '\'. -func NewFieldRefFromDotPath(dotpath string) (out FieldRef, err error) { - if len(dotpath) == 0 { - return out, fmt.Errorf("%w dotpath was empty", ErrInvalid) - } - - parseName := func() string { - var name string - for { - idx := strings.IndexAny(dotpath, `\[.`) - if idx == -1 { - name += dotpath - dotpath = "" - break - } - - if dotpath[idx] != '\\' { - // subscript for a new field ref - name += dotpath[:idx] - dotpath = dotpath[idx:] - break - } - - if len(dotpath) == idx+1 { - // dotpath ends with a backslash; consume it all - name += dotpath - dotpath = "" - break - } - - // append all characters before backslash, then the character which follows it - name += dotpath[:idx] + string(dotpath[idx+1]) - dotpath = dotpath[idx+2:] - } - return name - } - - children := make([]FieldRef, 0) - - for len(dotpath) > 0 { - subscript := dotpath[0] - dotpath = dotpath[1:] - switch subscript { - case '.': - // next element is a name - children = append(children, FieldRef{nameRef(parseName())}) - case '[': - subend := strings.IndexFunc(dotpath, func(r rune) bool { return !unicode.IsDigit(r) }) - if subend == -1 || dotpath[subend] != ']' { - return out, fmt.Errorf("%w: dot path '%s' contained an unterminated index", ErrInvalid, dotpath) - } - idx, _ := strconv.Atoi(dotpath[:subend]) - children = append(children, FieldRef{FieldPath{idx}}) - dotpath = dotpath[subend+1:] - default: - return out, fmt.Errorf("%w: dot path must begin with '[' or '.' got '%s'", ErrInvalid, dotpath) - } - } - - out.flatten(children) - return -} - -func (f FieldRef) hash(h *maphash.Hash) { f.impl.hash(h) } - -// Hash produces a hash of this field reference and takes in a seed so that -// it can maintain consistency across multiple places / processes /etc. -func (f FieldRef) Hash(seed maphash.Seed) uint64 { - h := maphash.Hash{} - h.SetSeed(seed) - f.hash(&h) - return h.Sum64() -} - -// IsName returns true if this fieldref is a name reference -func (f *FieldRef) IsName() bool { - _, ok := f.impl.(nameRef) - return ok -} - -// IsFieldPath returns true if this FieldRef uses a fieldpath -func (f *FieldRef) IsFieldPath() bool { - _, ok := f.impl.(FieldPath) - return ok -} - -// IsNested returns true if this FieldRef expects to represent -// a nested field. -func (f *FieldRef) IsNested() bool { - switch impl := f.impl.(type) { - case nameRef: - return false - case FieldPath: - return len(impl) > 1 - default: - return true - } -} - -// Name returns the name of the field this references if it is -// a Name reference, otherwise the empty string -func (f *FieldRef) Name() string { - n, _ := f.impl.(nameRef) - return string(n) -} - -// FieldPath returns the fieldpath that this FieldRef uses, otherwise -// an empty FieldPath if it's not a FieldPath reference -func (f *FieldRef) FieldPath() FieldPath { - p, _ := f.impl.(FieldPath) - return p -} - -func (f *FieldRef) Equals(other FieldRef) bool { - return reflect.DeepEqual(f.impl, other.impl) -} - -func (f *FieldRef) flatten(children []FieldRef) { - out := make([]FieldRef, 0, len(children)) - - var populate func(refImpl) - populate = func(refs refImpl) { - switch r := refs.(type) { - case nameRef: - out = append(out, FieldRef{r}) - case FieldPath: - out = append(out, FieldRef{r}) - case refList: - for _, c := range r { - populate(c.impl) - } - } - } - - populate(refList(children)) - - if len(out) == 1 { - f.impl = out[0].impl - } else { - f.impl = refList(out) - } -} - -// FindAll returns all the fieldpaths which this FieldRef matches in the given -// slice of fields. -func (f FieldRef) FindAll(fields []arrow.Field) []FieldPath { - return f.impl.findAll(fields) -} - -// FindAllField returns all the fieldpaths that this FieldRef matches against -// the type of the given field. -func (f FieldRef) FindAllField(field arrow.Field) []FieldPath { - return f.impl.findAll(getFields(field.Type)) -} - -// FindOneOrNone is a convenience helper that will either return 1 fieldpath, -// or an empty fieldpath, and will return an error if there are multiple matches. -func (f FieldRef) FindOneOrNone(schema *arrow.Schema) (FieldPath, error) { - matches := f.FindAll(schema.Fields()) - if len(matches) > 1 { - return nil, fmt.Errorf("%w for %s in %s", ErrMultipleMatches, f, schema) - } - if len(matches) == 0 { - return nil, nil - } - return matches[0], nil -} - -// FindOneOrNoneRecord is like FindOneOrNone but for the schema of a record, -// returning an error only if there are multiple matches. -func (f FieldRef) FindOneOrNoneRecord(root arrow.Record) (FieldPath, error) { - return f.FindOneOrNone(root.Schema()) -} - -// FindOne returns an error if the field isn't matched or if there are multiple matches -// otherwise it returns the path to the single valid match. -func (f FieldRef) FindOne(schema *arrow.Schema) (FieldPath, error) { - matches := f.FindAll(schema.Fields()) - if len(matches) == 0 { - return nil, fmt.Errorf("%w for %s in %s", ErrNoMatch, f, schema) - } - if len(matches) > 1 { - return nil, fmt.Errorf("%w for %s in %s", ErrMultipleMatches, f, schema) - } - return matches[0], nil -} - -// GetAllColumns gets all the matching column arrays from the given record that -// this FieldRef references. -func (f FieldRef) GetAllColumns(root arrow.Record) ([]arrow.Array, error) { - out := make([]arrow.Array, 0) - for _, m := range f.FindAll(root.Schema().Fields()) { - n, err := m.GetColumn(root) - if err != nil { - return nil, err - } - out = append(out, n) - } - return out, nil -} - -// GetOneField will return a pointer to a field or an error if it is not found -// or if there are multiple matches. -func (f FieldRef) GetOneField(schema *arrow.Schema) (*arrow.Field, error) { - match, err := f.FindOne(schema) - if err != nil { - return nil, err - } - - return match.GetFieldFromSlice(schema.Fields()) -} - -// GetOneOrNone will return a field or a nil if the field is found or not, and -// only errors if there are multiple matches. -func (f FieldRef) GetOneOrNone(schema *arrow.Schema) (*arrow.Field, error) { - match, err := f.FindOneOrNone(schema) - if err != nil { - return nil, err - } - if len(match) == 0 { - return nil, nil - } - return match.GetFieldFromSlice(schema.Fields()) -} - -// GetOneColumnOrNone returns either a nil or the referenced array if it can be -// found, erroring only if there is an ambiguous multiple matches. -func (f FieldRef) GetOneColumnOrNone(root arrow.Record) (arrow.Array, error) { - match, err := f.FindOneOrNoneRecord(root) - if err != nil { - return nil, err - } - if len(match) == 0 { - return nil, nil - } - return match.GetColumn(root) -} - -func (f FieldRef) String() string { - return "FieldRef." + f.impl.String() -} diff --git a/go/arrow/compute/fieldref_hash.go b/go/arrow/compute/fieldref_hash.go deleted file mode 100644 index 21ef88f1ecb4f..0000000000000 --- a/go/arrow/compute/fieldref_hash.go +++ /dev/null @@ -1,39 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.20 || tinygo - -package compute - -import ( - "hash/maphash" - "math/bits" - "unsafe" - - "github.com/apache/arrow/go/v18/arrow" -) - -func (f FieldPath) hash(h *maphash.Hash) { - raw := unsafe.Pointer(unsafe.SliceData(f)) - var byteLen int - if bits.UintSize == 32 { - byteLen = arrow.Int32Traits.BytesRequired(len(f)) - } else { - byteLen = arrow.Int64Traits.BytesRequired(len(f)) - } - - h.Write(unsafe.Slice((*byte)(raw), byteLen)) -} diff --git a/go/arrow/compute/fieldref_test.go b/go/arrow/compute/fieldref_test.go deleted file mode 100644 index ce2051f942271..0000000000000 --- a/go/arrow/compute/fieldref_test.go +++ /dev/null @@ -1,316 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package compute_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/stretchr/testify/assert" -) - -func TestFieldPathBasics(t *testing.T) { - f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f2 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f3 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - - s := arrow.NewSchema([]arrow.Field{f0, f1, f2, f3}, nil) - - for i := range s.Fields() { - f, err := compute.FieldPath{i}.Get(s) - assert.NoError(t, err) - assert.Equal(t, s.Field(i), *f) - } - - f, err := compute.FieldPath{}.Get(s) - assert.Nil(t, f) - assert.ErrorIs(t, err, compute.ErrEmpty) - - f, err = compute.FieldPath{s.NumFields() * 2}.Get(s) - assert.Nil(t, f) - assert.ErrorIs(t, err, compute.ErrIndexRange) -} - -func TestFieldRefBasics(t *testing.T) { - f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f2 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f3 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - - s := arrow.NewSchema([]arrow.Field{f0, f1, f2, f3}, nil) - - // lookup by index returns Indices{index} - for i := range s.Fields() { - assert.ElementsMatch(t, []compute.FieldPath{{i}}, compute.FieldRefIndex(i).FindAll(s.Fields())) - } - - // out of range index results in failure to match - assert.Empty(t, compute.FieldRefIndex(s.NumFields()*2).FindAll(s.Fields())) - - // lookup by name returns the indices of both matching fields - assert.Equal(t, []compute.FieldPath{{0}, {2}}, compute.FieldRefName("alpha").FindAll(s.Fields())) - assert.Equal(t, []compute.FieldPath{{1}, {3}}, compute.FieldRefName("beta").FindAll(s.Fields())) -} - -func TestFieldRefDotPath(t *testing.T) { - ref, err := compute.NewFieldRefFromDotPath(`.alpha`) - assert.True(t, ref.IsName()) - assert.Equal(t, "alpha", ref.Name()) - assert.False(t, ref.IsFieldPath()) - assert.False(t, ref.IsNested()) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefName("alpha"), ref) - assert.True(t, ref.Equals(compute.FieldRefName("alpha"))) - - ref, err = compute.NewFieldRefFromDotPath(`..`) - assert.Empty(t, ref.Name()) - assert.False(t, ref.IsName()) - assert.False(t, ref.IsFieldPath()) - assert.Nil(t, ref.FieldPath()) - assert.True(t, ref.IsNested()) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefList("", ""), ref) - - ref, err = compute.NewFieldRefFromDotPath(`[2]`) - assert.False(t, ref.IsName()) - assert.True(t, ref.IsFieldPath()) - assert.Equal(t, compute.FieldPath{2}, ref.FieldPath()) - assert.False(t, ref.IsNested()) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefIndex(2), ref) - - ref, err = compute.NewFieldRefFromDotPath(`.beta[3]`) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefList("beta", 3), ref) - - ref, err = compute.NewFieldRefFromDotPath(`[5].gamma.delta[7]`) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefList(5, "gamma", "delta", 7), ref) - - ref, err = compute.NewFieldRefFromDotPath(`.hello world`) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefName("hello world"), ref) - - ref, err = compute.NewFieldRefFromDotPath(`.\[y\]\\tho\.\`) - assert.NoError(t, err) - assert.Equal(t, compute.FieldRefName(`[y]\tho.\`), ref) - - _, err = compute.NewFieldRefFromDotPath(``) - assert.ErrorIs(t, err, compute.ErrInvalid) - - _, err = compute.NewFieldRefFromDotPath(`alpha`) - assert.ErrorIs(t, err, compute.ErrInvalid) - - _, err = compute.NewFieldRefFromDotPath(`[134234`) - assert.ErrorIs(t, err, compute.ErrInvalid) - - _, err = compute.NewFieldRefFromDotPath(`[1stuf]`) - assert.ErrorIs(t, err, compute.ErrInvalid) -} - -func TestFieldPathNested(t *testing.T) { - f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f1_0 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f1 := arrow.Field{Name: "beta", Type: arrow.StructOf(f1_0)} - f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)} - f2 := arrow.Field{Name: "beta", Type: arrow.StructOf(f2_0, f2_1)} - s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil) - - f, err := compute.FieldPath{0}.Get(s) - assert.NoError(t, err) - assert.Equal(t, f0, *f) - - f, err = compute.FieldPath{0, 0}.Get(s) - assert.ErrorIs(t, err, compute.ErrNoChildren) - assert.Nil(t, f) - - f, err = compute.FieldPath{1, 0}.Get(s) - assert.NoError(t, err) - assert.Equal(t, f1_0, *f) - - f, err = compute.FieldPath{2, 0}.Get(s) - assert.NoError(t, err) - assert.Equal(t, f2_0, *f) - - f, err = compute.FieldPath{2, 1, 0}.Get(s) - assert.NoError(t, err) - assert.Equal(t, f2_1_0, *f) - - f, err = compute.FieldPath{1, 0}.GetField(s.Field(2)) - assert.NoError(t, err) - assert.Equal(t, f2_1_0, *f) - - f, err = compute.FieldPath{2, 1, 1}.Get(s) - assert.NoError(t, err) - assert.Equal(t, f2_1_1, *f) -} - -func TestFindFuncs(t *testing.T) { - f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f1_0 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f1 := arrow.Field{Name: "alpha", Type: arrow.StructOf(f1_0)} - f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)} - f2 := arrow.Field{Name: "beta", Type: arrow.StructOf(f2_0, f2_1)} - s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil) - - assert.Equal(t, []compute.FieldPath{{1}}, compute.FieldRefName("gamma").FindAllField(f2)) - fp, err := compute.FieldRefName("alpha").FindOneOrNone(s) - assert.ErrorIs(t, err, compute.ErrMultipleMatches) - assert.Len(t, fp, 0) - fp, err = compute.FieldRefName("alpha").FindOne(s) - assert.ErrorIs(t, err, compute.ErrMultipleMatches) - assert.Len(t, fp, 0) - - fp, err = compute.FieldRefName("beta").FindOneOrNone(s) - assert.NoError(t, err) - assert.Equal(t, compute.FieldPath{2}, fp) - fp, err = compute.FieldRefName("beta").FindOne(s) - assert.NoError(t, err) - assert.Equal(t, compute.FieldPath{2}, fp) - - fp, err = compute.FieldRefName("gamma").FindOneOrNone(s) - assert.NoError(t, err) - assert.Len(t, fp, 0) - - fp, err = compute.FieldRefName("gamma").FindOne(s) - assert.ErrorIs(t, err, compute.ErrNoMatch) - assert.Nil(t, fp) -} - -func TestGetFieldFuncs(t *testing.T) { - f0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f1_0 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f1 := arrow.Field{Name: "alpha", Type: arrow.StructOf(f1_0)} - f2_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_0 := arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32} - f2_1_1 := arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32} - f2_1 := arrow.Field{Name: "gamma", Type: arrow.StructOf(f2_1_0, f2_1_1)} - f2 := arrow.Field{Name: "beta", Type: arrow.StructOf(f2_0, f2_1)} - s := arrow.NewSchema([]arrow.Field{f0, f1, f2}, nil) - - ref, err := compute.NewFieldRefFromDotPath(`[2].alpha`) - assert.NoError(t, err) - - f, err := ref.GetOneField(s) - assert.NoError(t, err) - assert.Equal(t, f2_0, *f) - f, err = ref.GetOneOrNone(s) - assert.NoError(t, err) - assert.Equal(t, f2_0, *f) - - ref = compute.FieldRefList("beta", "gamma", 2) - f, err = ref.GetOneField(s) - assert.ErrorIs(t, err, compute.ErrNoMatch) - assert.Nil(t, f) - f, err = ref.GetOneOrNone(s) - assert.NoError(t, err) - assert.Nil(t, f) - - f, err = compute.FieldRefName("alpha").GetOneOrNone(s) - assert.ErrorIs(t, err, compute.ErrMultipleMatches) - assert.Nil(t, f) -} - -func TestFieldRefRecord(t *testing.T) { - mem := memory.NewCheckedAllocator(memory.NewGoAllocator()) - defer mem.AssertSize(t, 0) - - alphaBldr := array.NewInt32Builder(mem) - defer alphaBldr.Release() - - betaBldr := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32) - defer betaBldr.Release() - - gammaBldr := array.NewStructBuilder(mem, arrow.StructOf( - arrow.Field{Name: "alpha", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, - arrow.Field{Name: "beta", Type: arrow.PrimitiveTypes.Int32, Nullable: true})) - defer gammaBldr.Release() - - alphaBldr.AppendValues([]int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, nil) - betaBldr.AppendValues([]int32{0, 3, 7, 8, 8, 10, 13, 14, 17, 20, 22}, []bool{true, true, true, false, true, true, true, true, true, true}) - for i := 0; i < 22; i++ { - betaBldr.ValueBuilder().(*array.Int32Builder).Append(int32(i * 2)) - } - - gammaBldr.AppendValues([]bool{true, true, true, true, true, true, true, true, true, true}) - gammaBldr.FieldBuilder(0).(*array.Int32Builder).AppendValues([]int32{10, 20, 30, 40, 50, 60, 70, 80, 90, 100}, nil) - gammaBldr.FieldBuilder(1).(*array.Int32Builder).AppendValues([]int32{-10, -20, -30, -40, -50, -60, -70, -80, -90, -100}, nil) - - alpha := alphaBldr.NewInt32Array() - defer alpha.Release() - beta := betaBldr.NewListArray() - defer beta.Release() - gamma := gammaBldr.NewStructArray() - defer gamma.Release() - - rec := array.NewRecord(arrow.NewSchema([]arrow.Field{ - {Name: "alpha", Type: alpha.DataType(), Nullable: true}, - {Name: "alpha", Type: beta.DataType(), Nullable: true}, - {Name: "alpha", Type: gamma.DataType(), Nullable: true}, - }, nil), []arrow.Array{alpha, beta, gamma}, 10) - defer rec.Release() - - arr, err := compute.FieldPath{2, 0}.GetColumn(rec) - assert.NoError(t, err) - assert.Same(t, gamma.Field(0), arr) - - arr, err = compute.FieldPath{}.GetColumn(rec) - assert.ErrorIs(t, err, compute.ErrEmpty) - assert.Nil(t, arr) - - arr, err = compute.FieldPath{1, 0}.GetColumn(rec) - assert.NoError(t, err) - assert.Same(t, beta.ListValues(), arr) - - arr, err = compute.FieldPath{1, 0, 0}.GetColumn(rec) - assert.ErrorIs(t, err, compute.ErrNoChildren) - assert.Nil(t, arr) - - arr, err = compute.FieldPath{2, 2}.GetColumn(rec) - assert.ErrorIs(t, err, compute.ErrIndexRange) - assert.Nil(t, arr) - - arrs, err := compute.FieldRefName("alpha").GetAllColumns(rec) - assert.NoError(t, err) - assert.Equal(t, []arrow.Array{alpha, beta, gamma}, arrs) - - arrs, err = compute.FieldRefName("delta").GetAllColumns(rec) - assert.NoError(t, err) - assert.Len(t, arrs, 0) - - arr, err = compute.FieldRefName("delta").GetOneColumnOrNone(rec) - assert.NoError(t, err) - assert.Nil(t, arr) - - arr, err = compute.FieldRefName("alpha").GetOneColumnOrNone(rec) - assert.ErrorIs(t, err, compute.ErrMultipleMatches) - assert.Nil(t, arr) - - arr, err = compute.FieldRefList("alpha", "beta").GetOneColumnOrNone(rec) - assert.NoError(t, err) - assert.Same(t, gamma.Field(1), arr) -} diff --git a/go/arrow/compute/funckind_string.go b/go/arrow/compute/funckind_string.go deleted file mode 100644 index 204e844133e53..0000000000000 --- a/go/arrow/compute/funckind_string.go +++ /dev/null @@ -1,29 +0,0 @@ -// Code generated by "stringer -type=FuncKind -linecomment"; DO NOT EDIT. - -//go:build go1.18 - -package compute - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[FuncScalar-0] - _ = x[FuncVector-1] - _ = x[FuncScalarAgg-2] - _ = x[FuncHashAgg-3] - _ = x[FuncMeta-4] -} - -const _FuncKind_name = "ScalarVectorScalarAggregateHashAggregateMeta" - -var _FuncKind_index = [...]uint8{0, 6, 12, 27, 40, 44} - -func (i FuncKind) String() string { - if i < 0 || i >= FuncKind(len(_FuncKind_index)-1) { - return "FuncKind(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _FuncKind_name[_FuncKind_index[i]:_FuncKind_index[i+1]] -} diff --git a/go/arrow/compute/functions.go b/go/arrow/compute/functions.go deleted file mode 100644 index ebade11a8e60b..0000000000000 --- a/go/arrow/compute/functions.go +++ /dev/null @@ -1,430 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute - -import ( - "context" - "fmt" - "strings" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute/exec" -) - -type Function interface { - Name() string - Kind() FuncKind - Arity() Arity - Doc() FunctionDoc - NumKernels() int - Execute(context.Context, FunctionOptions, ...Datum) (Datum, error) - DispatchExact(...arrow.DataType) (exec.Kernel, error) - DispatchBest(...arrow.DataType) (exec.Kernel, error) - DefaultOptions() FunctionOptions - Validate() error -} - -// Arity defines the number of required arguments for a function. -// -// Naming conventions are taken from https://en.wikipedia.org/wiki/Arity -type Arity struct { - NArgs int - IsVarArgs bool -} - -// Convenience functions to generating Arities - -func Nullary() Arity { return Arity{0, false} } -func Unary() Arity { return Arity{1, false} } -func Binary() Arity { return Arity{2, false} } -func Ternary() Arity { return Arity{3, false} } -func VarArgs(minArgs int) Arity { return Arity{minArgs, true} } - -type FunctionDoc struct { - // A one-line summary of the function, using a verb. - // - // For example, "Add two numeric arrays or scalars" - Summary string - // A detailed description of the function, meant to follow the summary. - Description string - // Symbolic names (identifiers) for the function arguments. - // - // Can be used to generate nicer function signatures. - ArgNames []string - // Name of the options struct type, if any - OptionsType string - // Whether or not options are required for function execution. - // - // If false, then either there are no options for this function, - // or there is a usable default options value. - OptionsRequired bool -} - -// EmptyFuncDoc is a reusable empty function doc definition for convenience. -var EmptyFuncDoc FunctionDoc - -// FuncKind is an enum representing the type of a function -type FuncKind int8 - -const ( - // A function that performs scalar data operations on whole arrays - // of data. Can generally process Array or Scalar values. The size - // of the output will be the same as the size (or broadcasted size, - // in the case of mixing Array and Scalar inputs) of the input. - FuncScalar FuncKind = iota // Scalar - // A function with array input and output whose behavior depends on - // the values of the entire arrays passed, rather than the value of - // each scalar value. - FuncVector // Vector - // A function that computes a scalar summary statistic from array input. - FuncScalarAgg // ScalarAggregate - // A function that computes grouped summary statistics from array - // input and an array of group identifiers. - FuncHashAgg // HashAggregate - // A function that dispatches to other functions and does not contain - // its own kernels. - FuncMeta // Meta -) - -func validateFunctionSummary(summary string) error { - if strings.Contains(summary, "\n") { - return fmt.Errorf("%w: summary contains a newline", arrow.ErrInvalid) - } - if summary[len(summary)-1] == '.' { - return fmt.Errorf("%w: summary ends with a point", arrow.ErrInvalid) - } - return nil -} - -func validateFunctionDescription(desc string) error { - if len(desc) != 0 && desc[len(desc)-1] == '\n' { - return fmt.Errorf("%w: description ends with a newline", arrow.ErrInvalid) - } - - const maxLineSize = 78 - for _, ln := range strings.Split(desc, "\n") { - if len(ln) > maxLineSize { - return fmt.Errorf("%w: description line length exceeds %d characters", arrow.ErrInvalid, maxLineSize) - } - } - return nil -} - -// baseFunction is the base class for compute functions. Function -// implementations should embed this baseFunction and will contain -// a collection of "kernels" which are implementations of the function -// for specific argument types. Selecting a viable kernel for -// executing the function is referred to as "dispatching". -type baseFunction struct { - name string - kind FuncKind - arity Arity - doc FunctionDoc - defaultOpts FunctionOptions -} - -func (b *baseFunction) Name() string { return b.name } -func (b *baseFunction) Kind() FuncKind { return b.kind } -func (b *baseFunction) Arity() Arity { return b.arity } -func (b *baseFunction) Doc() FunctionDoc { return b.doc } -func (b *baseFunction) DefaultOptions() FunctionOptions { return b.defaultOpts } -func (b *baseFunction) Validate() error { - if b.doc.Summary == "" { - return nil - } - - argCount := len(b.doc.ArgNames) - if argCount != b.arity.NArgs && !(b.arity.IsVarArgs && argCount == b.arity.NArgs+1) { - return fmt.Errorf("in function '%s': number of argument names for function doc != function arity", b.name) - } - - if err := validateFunctionSummary(b.doc.Summary); err != nil { - return err - } - return validateFunctionDescription(b.doc.Description) -} - -func checkOptions(fn Function, opts FunctionOptions) error { - if opts == nil && fn.Doc().OptionsRequired { - return fmt.Errorf("%w: function '%s' cannot be called without options", arrow.ErrInvalid, fn.Name()) - } - return nil -} - -func (b *baseFunction) checkArity(nargs int) error { - switch { - case b.arity.IsVarArgs && nargs < b.arity.NArgs: - return fmt.Errorf("%w: varargs function '%s' needs at least %d arguments, but only %d passed", - arrow.ErrInvalid, b.name, b.arity.NArgs, nargs) - case !b.arity.IsVarArgs && nargs != b.arity.NArgs: - return fmt.Errorf("%w: function '%s' accepts %d arguments but %d passed", - arrow.ErrInvalid, b.name, b.arity.NArgs, nargs) - } - return nil -} - -// kernelType is a type constraint interface that is used for funcImpl -// generic definitions. It will be extended as other kernel types -// are defined. -// -// Currently only ScalarKernels are allowed to be used. -type kernelType interface { - exec.ScalarKernel | exec.VectorKernel - - // specifying the Kernel interface here allows us to utilize - // the methods of the Kernel interface on the generic - // constrained type - exec.Kernel -} - -// funcImpl is the basic implementation for any functions that use kernels -// i.e. all except for Meta functions. -type funcImpl[KT kernelType] struct { - baseFunction - - kernels []KT -} - -func (fi *funcImpl[KT]) DispatchExact(vals ...arrow.DataType) (*KT, error) { - if err := fi.checkArity(len(vals)); err != nil { - return nil, err - } - - for i := range fi.kernels { - if fi.kernels[i].GetSig().MatchesInputs(vals) { - return &fi.kernels[i], nil - } - } - - return nil, fmt.Errorf("%w: function '%s' has no kernel matching input types %s", - arrow.ErrNotImplemented, fi.name, arrow.TypesToString(vals)) -} - -func (fi *funcImpl[KT]) NumKernels() int { return len(fi.kernels) } -func (fi *funcImpl[KT]) Kernels() []*KT { - res := make([]*KT, len(fi.kernels)) - for i := range fi.kernels { - res[i] = &fi.kernels[i] - } - return res -} - -// A ScalarFunction is a function that executes element-wise operations -// on arrays or scalars, and therefore whose results generally do not -// depend on the order of the values in the arguments. Accepts and returns -// arrays that are all of the same size. These functions roughly correspond -// to the functions used in most SQL expressions. -type ScalarFunction struct { - funcImpl[exec.ScalarKernel] -} - -// NewScalarFunction constructs a new ScalarFunction object with the passed in -// name, arity and function doc. -func NewScalarFunction(name string, arity Arity, doc FunctionDoc) *ScalarFunction { - return &ScalarFunction{ - funcImpl: funcImpl[exec.ScalarKernel]{ - baseFunction: baseFunction{ - name: name, - arity: arity, - doc: doc, - kind: FuncScalar, - }, - }, - } -} - -func (s *ScalarFunction) SetDefaultOptions(opts FunctionOptions) { - s.defaultOpts = opts -} - -func (s *ScalarFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error) { - return s.funcImpl.DispatchExact(vals...) -} - -func (s *ScalarFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { - return s.DispatchExact(vals...) -} - -// AddNewKernel constructs a new kernel with the provided signature -// and execution/init functions and then adds it to the function's list of -// kernels. This assumes default null handling (intersection of validity bitmaps) -func (s *ScalarFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, init exec.KernelInitFn) error { - if err := s.checkArity(len(inTypes)); err != nil { - return err - } - - if s.arity.IsVarArgs && len(inTypes) != 1 { - return fmt.Errorf("%w: varargs signatures must have exactly one input type", arrow.ErrInvalid) - } - - sig := &exec.KernelSignature{ - InputTypes: inTypes, - OutType: outType, - IsVarArgs: s.arity.IsVarArgs, - } - - s.kernels = append(s.kernels, exec.NewScalarKernelWithSig(sig, execFn, init)) - return nil -} - -// AddKernel adds the provided kernel to the list of kernels -// this function has. A copy of the kernel is added to the slice of kernels, -// which means that a given kernel object can be created, added and then -// reused to add other kernels. -func (s *ScalarFunction) AddKernel(k exec.ScalarKernel) error { - if err := s.checkArity(len(k.Signature.InputTypes)); err != nil { - return err - } - - if s.arity.IsVarArgs && !k.Signature.IsVarArgs { - return fmt.Errorf("%w: function accepts varargs but kernel signature does not", arrow.ErrInvalid) - } - - s.kernels = append(s.kernels, k) - return nil -} - -// Execute uses the passed in context, function options and arguments to eagerly -// execute the function using kernel dispatch, batch iteration and memory -// allocation details as defined by the kernel. -// -// If opts is nil, then the DefaultOptions() will be used. -func (s *ScalarFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - return execInternal(ctx, s, opts, -1, args...) -} - -type VectorFunction struct { - funcImpl[exec.VectorKernel] -} - -func NewVectorFunction(name string, arity Arity, doc FunctionDoc) *VectorFunction { - return &VectorFunction{ - funcImpl: funcImpl[exec.VectorKernel]{ - baseFunction: baseFunction{ - name: name, - arity: arity, - doc: doc, - kind: FuncVector, - }, - }, - } -} - -func (f *VectorFunction) SetDefaultOptions(opts FunctionOptions) { - f.defaultOpts = opts -} - -func (f *VectorFunction) DispatchExact(vals ...arrow.DataType) (exec.Kernel, error) { - return f.funcImpl.DispatchExact(vals...) -} - -func (f *VectorFunction) DispatchBest(vals ...arrow.DataType) (exec.Kernel, error) { - return f.DispatchExact(vals...) -} - -func (f *VectorFunction) AddNewKernel(inTypes []exec.InputType, outType exec.OutputType, execFn exec.ArrayKernelExec, init exec.KernelInitFn) error { - if err := f.checkArity(len(inTypes)); err != nil { - return err - } - - if f.arity.IsVarArgs && len(inTypes) != 1 { - return fmt.Errorf("%w: varags signatures must have exactly one input type", arrow.ErrInvalid) - } - - sig := &exec.KernelSignature{ - InputTypes: inTypes, - OutType: outType, - IsVarArgs: f.arity.IsVarArgs, - } - f.kernels = append(f.kernels, exec.NewVectorKernelWithSig(sig, execFn, init)) - return nil -} - -func (f *VectorFunction) AddKernel(kernel exec.VectorKernel) error { - if err := f.checkArity(len(kernel.Signature.InputTypes)); err != nil { - return err - } - - if f.arity.IsVarArgs && !kernel.Signature.IsVarArgs { - return fmt.Errorf("%w: function accepts varargs but kernel signature does not", arrow.ErrInvalid) - } - f.kernels = append(f.kernels, kernel) - return nil -} - -func (f *VectorFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - return execInternal(ctx, f, opts, -1, args...) -} - -// MetaFunctionImpl is the signature needed for implementing a MetaFunction -// which is a function that dispatches to another function instead. -type MetaFunctionImpl func(context.Context, FunctionOptions, ...Datum) (Datum, error) - -// MetaFunction is a function which dispatches to other functions, the impl -// must not be nil. -// -// For Array, ChunkedArray and Scalar datums, this may rely on the execution -// of concrete function types, but this must handle other Datum kinds on its -// own. -type MetaFunction struct { - baseFunction - impl MetaFunctionImpl -} - -// NewMetaFunction constructs a new MetaFunction which will call the provided -// impl for dispatching with the expected arity. -// -// Will panic if impl is nil. -func NewMetaFunction(name string, arity Arity, doc FunctionDoc, impl MetaFunctionImpl) *MetaFunction { - if impl == nil { - panic("arrow/compute: cannot construct MetaFunction with nil impl") - } - return &MetaFunction{ - baseFunction: baseFunction{ - name: name, - arity: arity, - doc: doc, - }, - impl: impl, - } -} - -func (MetaFunction) NumKernels() int { return 0 } -func (m *MetaFunction) DispatchExact(...arrow.DataType) (exec.Kernel, error) { - return nil, fmt.Errorf("%w: dispatch for metafunction", arrow.ErrNotImplemented) -} - -func (m *MetaFunction) DispatchBest(...arrow.DataType) (exec.Kernel, error) { - return nil, fmt.Errorf("%w: dispatch for metafunction", arrow.ErrNotImplemented) -} - -func (m *MetaFunction) Execute(ctx context.Context, opts FunctionOptions, args ...Datum) (Datum, error) { - if err := m.checkArity(len(args)); err != nil { - return nil, err - } - if err := checkOptions(m, opts); err != nil { - return nil, err - } - - if opts == nil { - opts = m.defaultOpts - } - - return m.impl(ctx, opts, args...) -} diff --git a/go/arrow/compute/functions_test.go b/go/arrow/compute/functions_test.go deleted file mode 100644 index 31a4cf124e845..0000000000000 --- a/go/arrow/compute/functions_test.go +++ /dev/null @@ -1,69 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.18 - -package compute_test - -import ( - "testing" - - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/compute" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestArityBasics(t *testing.T) { - nullary := compute.Nullary() - assert.Equal(t, 0, nullary.NArgs) - assert.False(t, nullary.IsVarArgs) - - unary := compute.Unary() - assert.Equal(t, 1, unary.NArgs) - assert.False(t, unary.IsVarArgs) - - binary := compute.Binary() - assert.Equal(t, 2, binary.NArgs) - assert.False(t, binary.IsVarArgs) - - ternary := compute.Ternary() - assert.Equal(t, 3, ternary.NArgs) - assert.False(t, ternary.IsVarArgs) - - varargs := compute.VarArgs(2) - assert.Equal(t, 2, varargs.NArgs) - assert.True(t, varargs.IsVarArgs) -} - -func CheckDispatchBest(t *testing.T, funcName string, originalTypes, expected []arrow.DataType) { - fn, exists := compute.GetFunctionRegistry().GetFunction(funcName) - require.True(t, exists) - - vals := make([]arrow.DataType, len(originalTypes)) - copy(vals, originalTypes) - - actualKernel, err := fn.DispatchBest(vals...) - require.NoError(t, err) - expKernel, err := fn.DispatchExact(expected...) - require.NoError(t, err) - - assert.Same(t, expKernel, actualKernel) - assert.Equal(t, len(expected), len(vals)) - for i, v := range vals { - assert.True(t, arrow.TypeEqual(v, expected[i]), v.String(), expected[i].String()) - } -} diff --git a/go/arrow/compute/internal/kernels/Makefile b/go/arrow/compute/internal/kernels/Makefile deleted file mode 100644 index ac00bd837c0b3..0000000000000 --- a/go/arrow/compute/internal/kernels/Makefile +++ /dev/null @@ -1,110 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# this converts rotate instructions from "ro[lr] " -> "ro[lr] , 1" for yasm compatibility -PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/' - -C2GOASM=c2goasm -CC=clang-11 -CXX=clang++-11 -C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=5000 \ - -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib -I../../../../internal/utils/_lib -ASM_FLAGS_AVX2=-mavx2 -mfma -ASM_FLAGS_SSE4=-msse4 -ASM_FLAGS_BMI2=-mbmi2 -ASM_FLAGS_POPCNT=-mpopcnt - -C_FLAGS_NEON=-O3 -fvectorize -mllvm -force-vector-width=16 -fno-asynchronous-unwind-tables -mno-red-zone -mstackrealign -fno-exceptions \ - -fno-rtti -fno-builtin -ffast-math -fno-jump-tables -I_lib -I../../../../internal/utils/_lib - -GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') -ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') - -.PHONEY: assembly - -INTEL_SOURCES := \ - cast_numeric_avx2_amd64.s cast_numeric_sse4_amd64.s constant_factor_avx2_amd64.s \ - constant_factor_sse4_amd64.s base_arithmetic_avx2_amd64.s base_arithmetic_sse4_amd64.s \ - scalar_comparison_avx2_amd64.s scalar_comparison_sse4_amd64.s - -# -# ARROW-15336: DO NOT add the assembly target for Arm64 (ARM_SOURCES) until c2goasm added the Arm64 support. -# min_max_neon_arm64.s was generated by asm2plan9s. -# And manually formatted it as the Arm64 Plan9. -# - -assembly: $(INTEL_SOURCES) - -_lib/cast_numeric_avx2_amd64.s: _lib/cast_numeric.cc - $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/cast_numeric_sse4_amd64.s: _lib/cast_numeric.cc - $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/cast_numeric_neon.s: _lib/cast_numeric.cc - $(CXX) -std=c++17 -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/base_arithmetic_avx2_amd64.s: _lib/base_arithmetic.cc - $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/base_arithmetic_sse4_amd64.s: _lib/base_arithmetic.cc - $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/scalar_comparison_avx2_amd64.s: _lib/scalar_comparison.cc - $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/scalar_comparison_sse4_amd64.s: _lib/scalar_comparison.cc - $(CXX) -std=c++17 -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/base_arithmetic_neon.s: _lib/base_arithmetic.cc - $(CXX) -std=c++17 -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/constant_factor_avx2_amd64.s: _lib/constant_factor.c - $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/constant_factor_sse4_amd64.s: _lib/constant_factor.c - $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/constant_factor_neon.s: _lib/constant_factor.c - $(CC) -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -cast_numeric_avx2_amd64.s: _lib/cast_numeric_avx2_amd64.s - $(C2GOASM) -a -f $^ $@ - -cast_numeric_sse4_amd64.s: _lib/cast_numeric_sse4_amd64.s - $(C2GOASM) -a -f $^ $@ - -constant_factor_avx2_amd64.s: _lib/constant_factor_avx2_amd64.s - $(C2GOASM) -a -f $^ $@ - -constant_factor_sse4_amd64.s: _lib/constant_factor_sse4_amd64.s - $(C2GOASM) -a -f $^ $@ - -base_arithmetic_avx2_amd64.s: _lib/base_arithmetic_avx2_amd64.s - $(C2GOASM) -a -f $^ $@ - -base_arithmetic_sse4_amd64.s: _lib/base_arithmetic_sse4_amd64.s - $(C2GOASM) -a -f $^ $@ - -scalar_comparison_avx2_amd64.s: _lib/scalar_comparison_avx2_amd64.s - $(C2GOASM) -a -f $^ $@ - -scalar_comparison_sse4_amd64.s: _lib/scalar_comparison_sse4_amd64.s - $(C2GOASM) -a -f $^ $@ - -clean: - rm -f $(INTEL_SOURCES) - rm -f $(addprefix _lib/,$(INTEL_SOURCES)) diff --git a/go/arrow/compute/internal/kernels/_lib/base_arithmetic.cc b/go/arrow/compute/internal/kernels/_lib/base_arithmetic.cc deleted file mode 100644 index 199c9d48ac631..0000000000000 --- a/go/arrow/compute/internal/kernels/_lib/base_arithmetic.cc +++ /dev/null @@ -1,484 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include "types.h" -#include "vendored/safe-math.h" - -// Corresponds to equivalent ArithmeticOp enum in base_arithmetic.go -// for passing across which operation to perform. This allows simpler -// implementation at the cost of having to pass the extra int8 and -// perform a switch. -// -// In cases of small arrays, this is completely negligible. In cases -// of large arrays, the time saved by using SIMD here is significantly -// worth the cost. -enum class optype : int8_t { - ADD, - SUB, - MUL, - DIV, - ABSOLUTE_VALUE, - NEGATE, - SQRT, - POWER, - SIN, - COS, - TAN, - ASIN, - ACOS, - ATAN, - ATAN2, - LN, - LOG10, - LOG2, - LOG1P, - LOGB, - SIGN, - - // this impl doesn't actually perform any overflow checks as we need - // to only run overflow checks on non-null entries - ADD_CHECKED, - SUB_CHECKED, - MUL_CHECKED, - DIV_CHECKED, - ABSOLUTE_VALUE_CHECKED, - NEGATE_CHECKED, - SQRT_CHECKED, - POWER_CHECKED, - SIN_CHECKED, - COS_CHECKED, - TAN_CHECKED, - ASIN_CHECKED, - ACOS_CHECKED, - LN_CHECKED, - LOG10_CHECKED, - LOG2_CHECKED, - LOG1P_CHECKED, - LOGB_CHECKED, -}; - -struct Add { - template - static constexpr T Call(Arg0 left, Arg1 right) { - if constexpr (is_arithmetic_v) - return left + right; - } -}; - -struct Sub { - template - static constexpr T Call(Arg0 left, Arg1 right) { - if constexpr (is_arithmetic_v) - return left - right; - } -}; - -struct AddChecked { - template - static constexpr T Call(Arg0 left, Arg1 right) { - static_assert(is_same::value && is_same::value, ""); - if constexpr(is_arithmetic_v) { - return left + right; - } - } -}; - - -struct SubChecked { - template - static constexpr T Call(Arg0 left, Arg1 right) { - static_assert(is_same::value && is_same::value, ""); - if constexpr(is_arithmetic_v) { - return left - right; - } - } -}; - -template -using maybe_make_unsigned = conditional_t && !is_same_v, make_unsigned_t, T>; - -template > -constexpr Unsigned to_unsigned(T signed_) { - return static_cast(signed_); -} - -struct Multiply { - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - static_assert(is_same_v, ""); - - template - static constexpr T Call(Arg0 left, Arg1 right) { - static_assert(is_same_v && is_same_v, ""); - if constexpr(is_floating_point_v) { - return left * right; - } else if constexpr(is_unsigned_v && !is_same_v) { - return left * right; - } else if constexpr(is_signed_v && !is_same_v) { - return to_unsigned(left) * to_unsigned(right); - } else if constexpr(is_same_v || is_same_v) { - // multiplication of 16 bit integer types implicitly promotes to - // signed 32 bit integer. However, some inputs may overflow (which - // triggers undefined behavior). Therefore we first cast to 32 bit - // unsigned integers where overflow is well defined. - return static_cast(left) * static_cast(right); - } - } -}; - -struct MultiplyChecked { - template - static constexpr T Call(Arg0 left, Arg1 right) { - static_assert(is_same_v && is_same_v, ""); - if constexpr(is_arithmetic_v) { - return left * right; - } - } -}; - -struct AbsoluteValue { - template - static constexpr T Call(Arg input) { - if constexpr(is_same_v) { - *(((int*)&input)+0) &= 0x7fffffff; - return input; - } else if constexpr(is_same_v) { - *(((int*)&input)+1) &= 0x7fffffff; - return input; - } else if constexpr(is_unsigned_v) { - return input; - } else { - const auto mask = input >> (sizeof(Arg) * CHAR_BIT - 1); - return (input + mask) ^ mask; - } - } -}; - -struct AbsoluteValueChecked { - template - static constexpr T Call(Arg input) { - if constexpr(is_same_v) { - *(((int*)&input)+0) &= 0x7fffffff; - return input; - } else if constexpr(is_same_v) { - *(((int*)&input)+1) &= 0x7fffffff; - return input; - } else if constexpr(is_unsigned_v) { - return input; - } else { - const auto mask = input >> (sizeof(Arg) * CHAR_BIT - 1); - return (input + mask) ^ mask; - } - } -}; - -struct Negate { - template - static constexpr T Call(Arg input) { - if constexpr(is_floating_point_v) { - return -input; - } else if constexpr(is_unsigned_v) { - return ~input + 1; - } else { - return -input; - } - } -}; - -struct NegateChecked { - template - static constexpr T Call(Arg input) { - static_assert(is_same_v, ""); - if constexpr(is_floating_point_v) { - return -input; - } else if constexpr(is_unsigned_v) { - return 0; - } else { - return -input; - } - } -}; - -struct Sign { - template - static constexpr T Call(Arg input) { - if constexpr(is_floating_point_v) { - return isnan(input) ? input : ((input == 0) ? 0 : (signbit(input) ? -1 : 1)); - } else if constexpr(is_unsigned_v) { - return input > 0 ? 1 : 0; - } else if constexpr(is_signed_v) { - return input > 0 ? 1 : (input ? -1 : 0); - } - } -}; - -template -struct arithmetic_op_arr_arr_impl { - static inline void exec(const void* in_left, const void* in_right, void* out, const int len) { - const T* left = reinterpret_cast(in_left); - const T* right = reinterpret_cast(in_right); - OutT* output = reinterpret_cast(out); - - for (int i = 0; i < len; ++i) { - output[i] = Op::template Call(left[i], right[i]); - } - } -}; - -template -struct arithmetic_op_arr_scalar_impl { - static inline void exec(const void* in_left, const void* scalar_right, void* out, const int len) { - const T* left = reinterpret_cast(in_left); - const T right = *reinterpret_cast(scalar_right); - OutT* output = reinterpret_cast(out); - - for (int i = 0; i < len; ++i) { - output[i] = Op::template Call(left[i], right); - } - } -}; - -template -struct arithmetic_op_scalar_arr_impl { - static inline void exec(const void* scalar_left, const void* in_right, void* out, const int len) { - const T left = *reinterpret_cast(scalar_left); - const T* right = reinterpret_cast(in_right); - OutT* output = reinterpret_cast(out); - - for (int i = 0; i < len; ++i) { - output[i] = Op::template Call(left, right[i]); - } - } -}; - -template -struct arithmetic_unary_op_impl { - static inline void exec(const void* arg, void* out, const int len) { - const T* input = reinterpret_cast(arg); - OutT* output = reinterpret_cast(out); - - for (int i = 0; i < len; ++i) { - output[i] = Op::template Call(input[i]); - } - } -}; - -template typename Impl> -static inline void arithmetic_op(const int type, const void* in_left, const void* in_right, void* output, const int len) { - const auto intype = static_cast(type); - - switch (intype) { - case arrtype::UINT8: - return Impl::exec(in_left, in_right, output, len); - case arrtype::INT8: - return Impl::exec(in_left, in_right, output, len); - case arrtype::UINT16: - return Impl::exec(in_left, in_right, output, len); - case arrtype::INT16: - return Impl::exec(in_left, in_right, output, len); - case arrtype::UINT32: - return Impl::exec(in_left, in_right, output, len); - case arrtype::INT32: - return Impl::exec(in_left, in_right, output, len); - case arrtype::UINT64: - return Impl::exec(in_left, in_right, output, len); - case arrtype::INT64: - return Impl::exec(in_left, in_right, output, len); - case arrtype::FLOAT32: - return Impl::exec(in_left, in_right, output, len); - case arrtype::FLOAT64: - return Impl::exec(in_left, in_right, output, len); - default: - break; - } -} - -template typename Impl, typename Input> -static inline void arithmetic_op(const int otype, const void* input, void* output, const int len) { - const auto outtype = static_cast(otype); - - switch (outtype) { - case arrtype::UINT8: - return Impl::exec(input, output, len); - case arrtype::INT8: - return Impl::exec(input, output, len); - case arrtype::UINT16: - return Impl::exec(input, output, len); - case arrtype::INT16: - return Impl::exec(input, output, len); - case arrtype::UINT32: - return Impl::exec(input, output, len); - case arrtype::INT32: - return Impl::exec(input, output, len); - case arrtype::UINT64: - return Impl::exec(input, output, len); - case arrtype::INT64: - return Impl::exec(input, output, len); - case arrtype::FLOAT32: - return Impl::exec(input, output, len); - case arrtype::FLOAT64: - return Impl::exec(input, output, len); - default: - break; - } -} - - -template typename Impl> -static inline void arithmetic_op(const int type, const void* input, void* output, const int len) { - const auto intype = static_cast(type); - - switch (intype) { - case arrtype::UINT8: - return Impl::exec(input, output, len); - case arrtype::INT8: - return Impl::exec(input, output, len); - case arrtype::UINT16: - return Impl::exec(input, output, len); - case arrtype::INT16: - return Impl::exec(input, output, len); - case arrtype::UINT32: - return Impl::exec(input, output, len); - case arrtype::INT32: - return Impl::exec(input, output, len); - case arrtype::UINT64: - return Impl::exec(input, output, len); - case arrtype::INT64: - return Impl::exec(input, output, len); - case arrtype::FLOAT32: - return Impl::exec(input, output, len); - case arrtype::FLOAT64: - return Impl::exec(input, output, len); - default: - break; - } -} - -template typename Impl> -static inline void arithmetic_op(const int itype, const int otype, const void* input, void* output, const int len) { - const auto intype = static_cast(itype); - - switch (intype) { - case arrtype::UINT8: - return arithmetic_op(otype, input, output, len); - case arrtype::INT8: - return arithmetic_op(otype, input, output, len); - case arrtype::UINT16: - return arithmetic_op(otype, input, output, len); - case arrtype::INT16: - return arithmetic_op(otype, input, output, len); - case arrtype::UINT32: - return arithmetic_op(otype, input, output, len); - case arrtype::INT32: - return arithmetic_op(otype, input, output, len); - case arrtype::UINT64: - return arithmetic_op(otype, input, output, len); - case arrtype::INT64: - return arithmetic_op(otype, input, output, len); - case arrtype::FLOAT32: - return arithmetic_op(otype, input, output, len); - case arrtype::FLOAT64: - return arithmetic_op(otype, input, output, len); - default: - break; - } -} - -template