diff --git a/go/arrow/compute/go.mod b/go/arrow/compute/go.mod index 58361f10c823d..447802c46b1d4 100644 --- a/go/arrow/compute/go.mod +++ b/go/arrow/compute/go.mod @@ -23,6 +23,7 @@ replace github.com/apache/arrow/go/v10 => ../../ require ( github.com/apache/arrow/go/v10 v10.0.0-00010101000000-000000000000 github.com/stretchr/testify v1.8.0 + golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f ) @@ -42,7 +43,7 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect - golang.org/x/sys v0.0.0-20220804214406-8e32c043e418 // indirect + golang.org/x/sys v0.0.0-20220808155132-1c4a2a72c664 // indirect golang.org/x/tools v0.1.12 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go/arrow/compute/go.sum b/go/arrow/compute/go.sum index 173afed769b90..b05bdd419c7c4 100644 --- a/go/arrow/compute/go.sum +++ b/go/arrow/compute/go.sum @@ -31,6 +31,7 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -78,14 +79,17 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= +github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= @@ -98,6 +102,9 @@ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORN github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-sqlite3 v1.14.12/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= @@ -112,6 +119,7 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w= github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk= @@ -192,6 +200,7 @@ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -199,11 +208,13 @@ golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220804214406-8e32c043e418 h1:9vYwv7OjYaky/tlAeD7C4oC9EsPTlaFl1H2jS++V+ME= -golang.org/x/sys v0.0.0-20220804214406-8e32c043e418/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220808155132-1c4a2a72c664 h1:v1W7bwXHsnLLloWYTVEdvGvA7BHMeBYsPcF0GLDxIRs= +golang.org/x/sys v0.0.0-20220808155132-1c4a2a72c664/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -220,6 +231,7 @@ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.9/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= @@ -277,4 +289,32 @@ gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.1.3/go.mod h1:NgwopIslSNH47DimFoV78dnkksY2EFtX0ajyb3K/las= +lukechampine.com/uint128 v1.1.1/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= +lukechampine.com/uint128 v1.2.0/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk= +modernc.org/cc/v3 v3.36.0/go.mod h1:NFUHyPn4ekoC/JHeZFfZurN6ixxawE1BnVonP/oahEI= +modernc.org/cc/v3 v3.36.1/go.mod h1:NFUHyPn4ekoC/JHeZFfZurN6ixxawE1BnVonP/oahEI= +modernc.org/ccgo/v3 v3.0.0-20220428102840-41399a37e894/go.mod h1:eI31LL8EwEBKPpNpA4bU1/i+sKOwOrQy8D87zWUcRZc= +modernc.org/ccgo/v3 v3.0.0-20220430103911-bc99d88307be/go.mod h1:bwdAnOoaIt8Ax9YdWGjxWsdkPcZyRPHqrOvJxaKAKGw= +modernc.org/ccgo/v3 v3.16.4/go.mod h1:tGtX0gE9Jn7hdZFeU88slbTh1UtCYKusWOoCJuvkWsQ= +modernc.org/ccgo/v3 v3.16.6/go.mod h1:tGtX0gE9Jn7hdZFeU88slbTh1UtCYKusWOoCJuvkWsQ= +modernc.org/ccgo/v3 v3.16.8/go.mod h1:zNjwkizS+fIFDrDjIAgBSCLkWbJuHF+ar3QRn+Z9aws= +modernc.org/ccorpus v1.11.6/go.mod h1:2gEUTrWqdpH2pXsmTM1ZkjeSrUWDpjMu2T6m29L/ErQ= +modernc.org/httpfs v1.0.6/go.mod h1:7dosgurJGp0sPaRanU53W4xZYKh14wfzX420oZADeHM= +modernc.org/libc v0.0.0-20220428101251-2d5f3daf273b/go.mod h1:p7Mg4+koNjc8jkqwcoFBJx7tXkpj00G77X7A72jXPXA= +modernc.org/libc v1.16.0/go.mod h1:N4LD6DBE9cf+Dzf9buBlzVJndKr/iJHG97vGLHYnb5A= +modernc.org/libc v1.16.1/go.mod h1:JjJE0eu4yeK7tab2n4S1w8tlWd9MxXLRzheaRnAKymU= +modernc.org/libc v1.16.7/go.mod h1:hYIV5VZczAmGZAnG15Vdngn5HSF5cSkbvfz2B7GRuVU= +modernc.org/libc v1.16.17/go.mod h1:hYIV5VZczAmGZAnG15Vdngn5HSF5cSkbvfz2B7GRuVU= +modernc.org/libc v1.16.19/go.mod h1:p7Mg4+koNjc8jkqwcoFBJx7tXkpj00G77X7A72jXPXA= +modernc.org/mathutil v1.2.2/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/mathutil v1.4.1/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/memory v1.1.1/go.mod h1:/0wo5ibyrQiaoUoH7f9D8dnglAmILJ5/cxZlRECf+Nw= +modernc.org/opt v0.1.1/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= +modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0= +modernc.org/sqlite v1.18.0/go.mod h1:B9fRWZacNxJBHoCJZQr1R54zhVn3fjfl0aszflrTSxY= +modernc.org/strutil v1.1.1/go.mod h1:DE+MQQ/hjKBZS2zNInV5hhcipt5rLPWkmpbGeW5mmdw= +modernc.org/strutil v1.1.2/go.mod h1:OYajnUAcI/MX+XD/Wx7v1bbdvcQSvxgtb0gC+u3d3eg= +modernc.org/tcl v1.13.1/go.mod h1:XOLfOwzhkljL4itZkK6T72ckMgvj0BDsnKNdZVUOecw= +modernc.org/token v1.0.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= +modernc.org/z v1.5.1/go.mod h1:eWFB510QWW5Th9YGZT81s+LwvaAs3Q2yr4sP0rmLkv8= rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= diff --git a/go/arrow/compute/internal/span.go b/go/arrow/compute/internal/span.go new file mode 100644 index 0000000000000..8f4a43f3b9500 --- /dev/null +++ b/go/arrow/compute/internal/span.go @@ -0,0 +1,540 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "reflect" + "unsafe" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/bitutil" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/scalar" +) + +// BufferSpan is a lightweight Buffer holder for ArraySpans that does not +// take ownership of the underlying memory.Buffer at all or could be +// used to reference raw byte slices instead. +type BufferSpan struct { + // Buf should be the byte slice representing this buffer, if this is + // nil then this bufferspan should be considered empty. + Buf []byte + // Owner should point to an underlying parent memory.Buffer if this + // memory is owned by a different, existing, buffer. Retain is not + // called on this buffer, so it must not be released as long as + // this BufferSpan refers to it. + Owner *memory.Buffer + // SelfAlloc tracks whether or not this bufferspan is the only owner + // of the Owning memory.Buffer. This happens when preallocating + // memory or if a kernel allocates it's own buffer for a result. + // In these cases, we have to know so we can properly maintain the + // refcount if this is later turned into an ArrayData object. + SelfAlloc bool +} + +// SetBuffer sets the given buffer into this BufferSpan and marks +// SelfAlloc as false. This should be called when setting a buffer +// that is externally owned/created. +func (b *BufferSpan) SetBuffer(buf *memory.Buffer) { + b.Buf = buf.Bytes() + b.Owner = buf + b.SelfAlloc = false +} + +// WrapBuffer wraps this bufferspan around a buffer and marks +// SelfAlloc as true. This should be called when setting a buffer +// that was allocated as part of an execution rather than just +// re-using an existing buffer from an input array. +func (b *BufferSpan) WrapBuffer(buf *memory.Buffer) { + b.Buf = buf.Bytes() + b.Owner = buf + b.SelfAlloc = true +} + +// ArraySpan is a light-weight, non-owning version of arrow.ArrayData +// for more efficient handling with computation and engines. We use +// explicit go Arrays to define the buffers and some scratch space +// for easily populating and shifting around pointers to memory without +// having to worry about and deal with retain/release during calculations. +type ArraySpan struct { + Type arrow.DataType + Len int64 + Nulls int64 + Offset int64 + Buffers [3]BufferSpan + + // Scratch is a holding spot for things such as + // offsets or union type codes when converting from scalars + Scratch [2]uint64 + + Children []ArraySpan +} + +// UpdateNullCount will count the bits in the null bitmap and update the +// number of nulls if the current null count is unknown, otherwise it just +// returns the value of a.Nulls +func (a *ArraySpan) UpdateNullCount() int64 { + if a.Nulls != array.UnknownNullCount { + return a.Nulls + } + + a.Nulls = a.Len - int64(bitutil.CountSetBits(a.Buffers[0].Buf, int(a.Offset), int(a.Len))) + return a.Nulls +} + +// Dictionary returns a pointer to the array span for the dictionary which +// we will always place as the first (and only) child if it exists. +func (a *ArraySpan) Dictionary() *ArraySpan { return &a.Children[0] } + +// NumBuffers returns the number of expected buffers for this type +func (a *ArraySpan) NumBuffers() int { return getNumBuffers(a.Type) } + +// MakeData generates an arrow.ArrayData object for this ArraySpan, +// properly updating the buffer ref count if necessary. +func (a *ArraySpan) MakeData() arrow.ArrayData { + bufs := make([]*memory.Buffer, a.NumBuffers()) + for i := range bufs { + b := a.GetBuffer(i) + bufs[i] = b + if b != nil && a.Buffers[i].SelfAlloc { + // if this buffer is just a pointer to another existing buffer + // then we never bumped the refcount for that buffer. + // As a result, we won't call release here so that the call + // to array.NewData properly updates the ref counts of the buffers. + // If instead this buffer was allocated during calculation + // (such as during prealloc or by a kernel itself) + // then we need to release after we create the ArrayData so that it + // maintains the correct refcount of 1, giving the resulting + // ArrayData object ownership of this buffer. + defer b.Release() + } + } + + var ( + nulls = int(a.Nulls) + length = int(a.Len) + off = int(a.Offset) + dt = a.Type + children []arrow.ArrayData + ) + + if a.Type.ID() == arrow.NULL { + nulls = int(length) + } else if len(a.Buffers[0].Buf) == 0 { + nulls = 0 + } + + // we use a.Type for the NewData call at the end, so we can + // handle extension types by using dt to point to the storage type + // and let the proper extension type get set into the ArrayData + // object we return. + if dt.ID() == arrow.EXTENSION { + dt = dt.(arrow.ExtensionType).StorageType() + } + + if dt.ID() == arrow.DICTIONARY { + result := array.NewData(a.Type, length, bufs, nil, nulls, off) + dict := a.Dictionary().MakeData() + defer dict.Release() + result.SetDictionary(dict) + return result + } + + if len(a.Children) > 0 { + children = make([]arrow.ArrayData, len(a.Children)) + for i, c := range a.Children { + d := c.MakeData() + defer d.Release() + children[i] = d + } + } + return array.NewData(a.Type, length, bufs, children, nulls, off) +} + +// MakeArray is a convenience function for calling array.MakeFromData(a.MakeData()) +func (a *ArraySpan) MakeArray() arrow.Array { + d := a.MakeData() + defer d.Release() + return array.MakeFromData(d) +} + +// SetSlice updates the offset and length of this ArraySpan to refer to +// a specific slice of the underlying buffers. +func (a *ArraySpan) SetSlice(off, length int64) { + a.Offset, a.Len = off, length + if a.Type.ID() != arrow.NULL { + a.Nulls = array.UnknownNullCount + } else { + a.Nulls = a.Len + } +} + +// GetBuffer returns the buffer for the requested index. If this buffer +// is owned by another array/arrayspan the Owning buffer is returned, +// otherwise if this slice has no owning buffer, we call NewBufferBytes +// to wrap it as a memory.Buffer. Can also return nil if there is no +// buffer in this index. +func (a *ArraySpan) GetBuffer(idx int) *memory.Buffer { + buf := a.Buffers[idx] + switch { + case buf.Owner != nil: + return buf.Owner + case buf.Buf != nil: + return memory.NewBufferBytes(buf.Buf) + } + return nil +} + +// convenience function to resize the children slice if necessary, +// or just shrink the slice without re-allocating if there's enough +// capacity already. +func (a *ArraySpan) resizeChildren(i int) { + if cap(a.Children) >= i { + a.Children = a.Children[:i] + } else { + a.Children = make([]ArraySpan, i) + } +} + +// convenience function for populating the offsets buffer from a scalar +// value's size. +func setOffsetsForScalar[T int32 | int64](span *ArraySpan, buf []T, valueSize int64, bufidx int) { + buf[0] = 0 + buf[1] = T(valueSize) + + b := (*reflect.SliceHeader)(unsafe.Pointer(&buf)) + s := (*reflect.SliceHeader)(unsafe.Pointer(&span.Buffers[bufidx].Buf)) + s.Data = b.Data + s.Len = 2 * int(unsafe.Sizeof(T(0))) + s.Cap = s.Len + + span.Buffers[bufidx].Owner = nil + span.Buffers[bufidx].SelfAlloc = false +} + +// FillFromScalar populates this ArraySpan as if it were a 1 length array +// with the single value equal to the passed in Scalar. +func (a *ArraySpan) FillFromScalar(val scalar.Scalar) { + var ( + trueBit byte = 0x01 + falseBit byte = 0x00 + ) + + a.Type = val.DataType() + a.Len = 1 + typeID := a.Type.ID() + if val.IsValid() { + a.Nulls = 0 + } else { + a.Nulls = 1 + } + + if !arrow.IsUnion(typeID) && typeID != arrow.NULL { + if val.IsValid() { + a.Buffers[0].Buf = []byte{trueBit} + } else { + a.Buffers[0].Buf = []byte{falseBit} + } + a.Buffers[0].Owner = nil + a.Buffers[0].SelfAlloc = false + } + + switch { + case typeID == arrow.BOOL: + if val.(*scalar.Boolean).Value { + a.Buffers[1].Buf = []byte{trueBit} + } else { + a.Buffers[1].Buf = []byte{falseBit} + } + a.Buffers[1].Owner = nil + a.Buffers[1].SelfAlloc = false + case arrow.IsPrimitive(typeID) || arrow.IsDecimal(typeID): + sc := val.(scalar.PrimitiveScalar) + a.Buffers[1].Buf = sc.Data() + a.Buffers[1].Owner = nil + a.Buffers[1].SelfAlloc = false + case typeID == arrow.DICTIONARY: + sc := val.(scalar.PrimitiveScalar) + a.Buffers[1].Buf = sc.Data() + a.Buffers[1].Owner = nil + a.Buffers[1].SelfAlloc = false + a.resizeChildren(1) + a.Children[0].SetMembers(val.(*scalar.Dictionary).Value.Dict.Data()) + case arrow.IsBaseBinary(typeID): + sc := val.(scalar.BinaryScalar) + a.Buffers[1].Buf = arrow.Uint64Traits.CastToBytes(a.Scratch[:]) + a.Buffers[1].Owner = nil + a.Buffers[1].SelfAlloc = false + + var dataBuffer []byte + if sc.IsValid() { + dataBuffer = sc.Data() + a.Buffers[2].Owner = sc.Buffer() + a.Buffers[2].SelfAlloc = false + } + if arrow.IsBinaryLike(typeID) { + setOffsetsForScalar(a, + unsafe.Slice((*int32)(unsafe.Pointer(&a.Scratch[0])), 2), + int64(len(dataBuffer)), 1) + } else { + // large_binary_like + setOffsetsForScalar(a, + unsafe.Slice((*int64)(unsafe.Pointer(&a.Scratch[0])), 2), + int64(len(dataBuffer)), 1) + } + a.Buffers[2].Buf = dataBuffer + case typeID == arrow.FIXED_SIZE_BINARY: + sc := val.(scalar.BinaryScalar) + a.Buffers[1].Buf = sc.Data() + a.Buffers[1].Owner = sc.Buffer() + a.Buffers[1].SelfAlloc = false + case arrow.IsListLike(typeID): + sc := val.(scalar.ListScalar) + valueLen := 0 + a.resizeChildren(1) + + if sc.GetList() != nil { + a.Children[0].SetMembers(sc.GetList().Data()) + valueLen = sc.GetList().Len() + } else { + // even when the value is null, we must populate + // child data to yield a valid array. ugh + fillZeroLength(sc.DataType().(arrow.NestedType).Fields()[0].Type, &a.Children[0]) + } + + switch typeID { + case arrow.LIST, arrow.MAP: + setOffsetsForScalar(a, + unsafe.Slice((*int32)(unsafe.Pointer(&a.Scratch[0])), 2), + int64(valueLen), 1) + case arrow.LARGE_LIST: + setOffsetsForScalar(a, + unsafe.Slice((*int64)(unsafe.Pointer(&a.Scratch[0])), 2), + int64(valueLen), 1) + default: + // fixed size list has no second buffer + a.Buffers[1].Buf, a.Buffers[1].Owner = nil, nil + a.Buffers[1].SelfAlloc = false + } + case typeID == arrow.STRUCT: + sc := val.(*scalar.Struct) + a.Buffers[1].Buf = nil + a.Buffers[1].Owner = nil + a.Buffers[1].SelfAlloc = false + a.resizeChildren(len(sc.Value)) + for i, v := range sc.Value { + a.Children[i].FillFromScalar(v) + } + case arrow.IsUnion(typeID): + // first buffer is kept null since unions have no validity vector + a.Buffers[0].Buf, a.Buffers[0].Owner = nil, nil + a.Buffers[0].SelfAlloc = false + + a.Buffers[1].Buf = arrow.Uint64Traits.CastToBytes(a.Scratch[:])[:1] + a.Buffers[1].Owner = nil + a.Buffers[1].SelfAlloc = false + codes := unsafe.Slice((*arrow.UnionTypeCode)(unsafe.Pointer(&a.Buffers[1].Buf[0])), 1) + + a.resizeChildren(len(a.Type.(arrow.UnionType).Fields())) + switch sc := val.(type) { + case *scalar.DenseUnion: + codes[0] = sc.TypeCode + // has offset, start 4 bytes in so it's aligned to the 32-bit boundaries + off := unsafe.Slice((*int32)(unsafe.Add(unsafe.Pointer(&a.Scratch[0]), arrow.Int32SizeBytes)), 2) + setOffsetsForScalar(a, off, 1, 2) + // we can't "see" the other arrays in the union, but we put the "active" + // union array in the right place and fill zero-length arrays for + // the others. + childIDS := a.Type.(arrow.UnionType).ChildIDs() + for i, f := range a.Type.(arrow.UnionType).Fields() { + if i == childIDS[sc.TypeCode] { + a.Children[i].FillFromScalar(sc.Value) + } else { + fillZeroLength(f.Type, &a.Children[i]) + } + } + case *scalar.SparseUnion: + codes[0] = sc.TypeCode + // sparse union scalars have a full complement of child values + // even though only one of them is relevant, so we just fill them + // in here + for i, v := range sc.Value { + a.Children[i].FillFromScalar(v) + } + } + case typeID == arrow.EXTENSION: + // pass through storage + sc := val.(*scalar.Extension) + a.FillFromScalar(sc.Value) + // restore the extension type + a.Type = val.DataType() + case typeID == arrow.NULL: + for i := range a.Buffers { + a.Buffers[i].Buf = nil + a.Buffers[i].Owner = nil + a.Buffers[i].SelfAlloc = false + } + } +} + +// SetMembers populates this ArraySpan from the given ArrayData object. +// As this is a non-owning reference, the ArrayData object must not +// be fully released while this ArraySpan is in use, otherwise any buffers +// referenced will be released too +func (a *ArraySpan) SetMembers(data arrow.ArrayData) { + a.Type = data.DataType() + a.Len = int64(data.Len()) + if a.Type.ID() == arrow.NULL { + a.Nulls = a.Len + } else { + a.Nulls = int64(data.NullN()) + } + a.Offset = int64(data.Offset()) + + for i, b := range data.Buffers() { + if b != nil { + a.Buffers[i].SetBuffer(b) + } else { + a.Buffers[i].Buf = nil + a.Buffers[i].Owner = nil + a.Buffers[i].SelfAlloc = false + } + } + + typeID := a.Type.ID() + if a.Buffers[0].Buf == nil { + switch typeID { + case arrow.NULL, arrow.SPARSE_UNION, arrow.DENSE_UNION: + default: + // should already be zero, but we make sure + a.Nulls = 0 + } + } + + for i := len(data.Buffers()); i < 3; i++ { + a.Buffers[i].Buf = nil + a.Buffers[i].Owner = nil + a.Buffers[i].SelfAlloc = false + } + + if typeID == arrow.DICTIONARY { + if cap(a.Children) >= 1 { + a.Children = a.Children[:1] + } else { + a.Children = make([]ArraySpan, 1) + } + a.Children[0].SetMembers(data.Dictionary()) + } else { + if cap(a.Children) >= len(data.Children()) { + a.Children = a.Children[:len(data.Children())] + } else { + a.Children = make([]ArraySpan, len(data.Children())) + } + for i, c := range data.Children() { + a.Children[i].SetMembers(c) + } + } +} + +// ExecValue represents a single input to an execution which could +// be either an Array (ArraySpan) or a Scalar value +type ExecValue struct { + Array ArraySpan + Scalar scalar.Scalar +} + +func (e *ExecValue) IsArray() bool { return e.Scalar == nil } +func (e *ExecValue) IsScalar() bool { return !e.IsArray() } + +func (e *ExecValue) Type() arrow.DataType { + if e.IsArray() { + return e.Array.Type + } + return e.Scalar.DataType() +} + +// ExecResult is the result of a kernel execution and should be populated +// by the execution functions and/or a kernel. For now we're just going to +// alias an ArraySpan. +type ExecResult = ArraySpan + +// ExecSpan represents a slice of inputs and is used to provide slices +// of input values to iterate over. +// +// Len is the length of the span (all elements in Values should either +// be scalar or an array with a length + offset of at least Len). +type ExecSpan struct { + Len int64 + Values []ExecValue +} + +func getNumBuffers(dt arrow.DataType) int { + switch dt.ID() { + case arrow.NULL, arrow.STRUCT, arrow.FIXED_SIZE_LIST: + return 1 + case arrow.BINARY, arrow.LARGE_BINARY, arrow.STRING, arrow.LARGE_STRING, arrow.DENSE_UNION: + return 3 + case arrow.EXTENSION: + return getNumBuffers(dt.(arrow.ExtensionType).StorageType()) + default: + return 2 + } +} + +func fillZeroLength(dt arrow.DataType, span *ArraySpan) { + span.Scratch[0], span.Scratch[1] = 0, 0 + span.Type = dt + span.Len = 0 + numBufs := getNumBuffers(dt) + for i := 0; i < numBufs; i++ { + span.Buffers[i].Buf = arrow.Uint64Traits.CastToBytes(span.Scratch[:])[:0] + span.Buffers[i].Owner = nil + } + + for i := numBufs; i < 3; i++ { + span.Buffers[i].Buf, span.Buffers[i].Owner = nil, nil + } + + nt, ok := dt.(arrow.NestedType) + if !ok { + if len(span.Children) > 0 { + span.Children = span.Children[:0] + } + return + } + + if cap(span.Children) >= len(nt.Fields()) { + span.Children = span.Children[:len(nt.Fields())] + } else { + span.Children = make([]ArraySpan, len(nt.Fields())) + } + for i, f := range nt.Fields() { + fillZeroLength(f.Type, &span.Children[i]) + } +} + +// PromoteExecSpanScalars promotes the values of the passed in ExecSpan +// from scalars to Arrays of length 1 for each value. +func PromoteExecSpanScalars(span ExecSpan) { + for i := range span.Values { + if span.Values[i].Scalar != nil { + span.Values[i].Array.FillFromScalar(span.Values[i].Scalar) + span.Values[i].Scalar = nil + } + } +} diff --git a/go/arrow/compute/internal/span_test.go b/go/arrow/compute/internal/span_test.go new file mode 100644 index 0000000000000..1c55a7e55edbd --- /dev/null +++ b/go/arrow/compute/internal/span_test.go @@ -0,0 +1,834 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal_test + +import ( + "reflect" + "strings" + "testing" + "unsafe" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/compute/internal" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/endian" + "github.com/apache/arrow/go/v10/arrow/internal/testing/types" + "github.com/apache/arrow/go/v10/arrow/memory" + "github.com/apache/arrow/go/v10/arrow/scalar" + "github.com/stretchr/testify/assert" +) + +func TestBufferSpan_SetBuffer(t *testing.T) { + type fields struct { + Buf []byte + Owner *memory.Buffer + SelfAlloc bool + } + type args struct { + buf *memory.Buffer + } + foo := []byte{0xde, 0xad, 0xbe, 0xef} + own := memory.NewBufferBytes(foo) + tests := []struct { + name string + fields fields + args args + }{ + {"simple", fields{SelfAlloc: true}, args{own}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := &internal.BufferSpan{ + Buf: tt.fields.Buf, + Owner: tt.fields.Owner, + SelfAlloc: tt.fields.SelfAlloc, + } + b.SetBuffer(tt.args.buf) + assert.Same(t, &foo[0], &b.Buf[0]) + assert.Same(t, own, b.Owner) + assert.False(t, b.SelfAlloc) + }) + } +} + +func TestBufferSpan_WrapBuffer(t *testing.T) { + type fields struct { + Buf []byte + Owner *memory.Buffer + SelfAlloc bool + } + type args struct { + buf *memory.Buffer + } + foo := []byte{0xde, 0xad, 0xbe, 0xef} + own := memory.NewBufferBytes(foo) + tests := []struct { + name string + fields fields + args args + }{ + {"simple", fields{SelfAlloc: false}, args{own}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + b := &internal.BufferSpan{ + Buf: tt.fields.Buf, + Owner: tt.fields.Owner, + SelfAlloc: tt.fields.SelfAlloc, + } + b.WrapBuffer(tt.args.buf) + assert.Same(t, &foo[0], &b.Buf[0]) + assert.Same(t, own, b.Owner) + assert.True(t, b.SelfAlloc) + }) + } +} + +func TestArraySpan_UpdateNullCount(t *testing.T) { + type fields struct { + Type arrow.DataType + Len int64 + Nulls int64 + Offset int64 + Buffers [3]internal.BufferSpan + Scratch [2]uint64 + Children []internal.ArraySpan + } + tests := []struct { + name string + fields fields + want int64 + }{ + {"known", fields{Nulls: 25}, 25}, + {"unknown", fields{ + Nulls: array.UnknownNullCount, + Len: 8, // 0b01101101 + Buffers: [3]internal.BufferSpan{{Buf: []byte{109}}, {}, {}}}, 3}, + {"unknown with offset", fields{ + Nulls: array.UnknownNullCount, + Len: 4, + Offset: 2, // 0b01101101 + Buffers: [3]internal.BufferSpan{{Buf: []byte{109}}, {}, {}}}, 1}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := &internal.ArraySpan{ + Type: tt.fields.Type, + Len: tt.fields.Len, + Nulls: tt.fields.Nulls, + Offset: tt.fields.Offset, + Buffers: tt.fields.Buffers, + Scratch: tt.fields.Scratch, + Children: tt.fields.Children, + } + if got := a.UpdateNullCount(); got != tt.want { + t.Errorf("ArraySpan.UpdateNullCount() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestArraySpan_Dictionary(t *testing.T) { + type fields struct { + Type arrow.DataType + Len int64 + Nulls int64 + Offset int64 + Buffers [3]internal.BufferSpan + Scratch [2]uint64 + Children []internal.ArraySpan + } + children := []internal.ArraySpan{{}} + tests := []struct { + name string + fields fields + want *internal.ArraySpan + }{ + {"basic", fields{Children: children}, &children[0]}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := &internal.ArraySpan{ + Type: tt.fields.Type, + Len: tt.fields.Len, + Nulls: tt.fields.Nulls, + Offset: tt.fields.Offset, + Buffers: tt.fields.Buffers, + Scratch: tt.fields.Scratch, + Children: tt.fields.Children, + } + if got := a.Dictionary(); !reflect.DeepEqual(got, tt.want) { + t.Errorf("ArraySpan.Dictionary() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestArraySpan_NumBuffers(t *testing.T) { + type fields struct { + Type arrow.DataType + Len int64 + Nulls int64 + Offset int64 + Buffers [3]internal.BufferSpan + Scratch [2]uint64 + Children []internal.ArraySpan + } + + arrow.RegisterExtensionType(types.NewUUIDType()) + defer arrow.UnregisterExtensionType("uuid") + + tests := []struct { + name string + fields fields + want int + }{ + {"null", fields{Type: arrow.Null}, 1}, + {"struct", fields{Type: arrow.StructOf()}, 1}, + {"fixed size list", fields{Type: arrow.FixedSizeListOf(4, arrow.PrimitiveTypes.Int32)}, 1}, + {"binary", fields{Type: arrow.BinaryTypes.Binary}, 3}, + {"large binary", fields{Type: arrow.BinaryTypes.LargeBinary}, 3}, + {"string", fields{Type: arrow.BinaryTypes.String}, 3}, + {"large string", fields{Type: arrow.BinaryTypes.LargeString}, 3}, + {"extension", fields{Type: types.NewUUIDType()}, 2}, + {"int32", fields{Type: arrow.PrimitiveTypes.Int32}, 2}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := &internal.ArraySpan{ + Type: tt.fields.Type, + Len: tt.fields.Len, + Nulls: tt.fields.Nulls, + Offset: tt.fields.Offset, + Buffers: tt.fields.Buffers, + Scratch: tt.fields.Scratch, + Children: tt.fields.Children, + } + if got := a.NumBuffers(); got != tt.want { + t.Errorf("ArraySpan.NumBuffers() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestArraySpan_MakeData(t *testing.T) { + type fields struct { + Type arrow.DataType + Len int64 + Nulls int64 + Offset int64 + Buffers [3]internal.BufferSpan + Scratch [2]uint64 + Children []internal.ArraySpan + } + + var ( + buf1 *memory.Buffer + ) + arrow.RegisterExtensionType(types.NewDictExtensionType()) + defer arrow.UnregisterExtensionType("dict-extension") + + tests := []struct { + name string + fields func(mem memory.Allocator) fields + want func(mem memory.Allocator) arrow.ArrayData + }{ + {"null type", func(mem memory.Allocator) fields { + return fields{ + Type: arrow.Null, + Len: 5, + Nulls: array.UnknownNullCount, + } + }, func(mem memory.Allocator) arrow.ArrayData { + return array.NewData(arrow.Null, 5, []*memory.Buffer{nil}, nil, 5, 0) + }}, + {"zero len", func(mem memory.Allocator) fields { + return fields{Type: arrow.PrimitiveTypes.Int32} + }, func(mem memory.Allocator) arrow.ArrayData { + return array.NewData(arrow.PrimitiveTypes.Int32, 0, []*memory.Buffer{nil, nil}, nil, 0, 0) + }}, + {"non-owning offset", func(mem memory.Allocator) fields { + ret := fields{ + Type: arrow.PrimitiveTypes.Int8, + Len: 4, + Nulls: 1, + Offset: 1, + } + buf1 = memory.NewResizableBuffer(mem) + buf1.Resize(1) + buf1.Bytes()[0] = 109 + ret.Buffers[0].SetBuffer(buf1) + ret.Buffers[1].SetBuffer(memory.NewBufferBytes([]byte{5, 5, 5, 5, 5})) + return ret + }, func(mem memory.Allocator) arrow.ArrayData { + // created in the above func, we release after constructing + // the NewData so the refcount is as expected + defer buf1.Release() + return array.NewData(arrow.PrimitiveTypes.Int8, 4, + []*memory.Buffer{buf1, memory.NewBufferBytes([]byte{5, 5, 5, 5, 5})}, nil, 1, 1) + }}, + {"self-alloc", func(mem memory.Allocator) fields { + ret := fields{ + Type: arrow.PrimitiveTypes.Int8, + Len: 4, + } + buf := memory.NewResizableBuffer(mem) + buf.Resize(1) + ret.Buffers[0].WrapBuffer(buf) + buf2 := memory.NewResizableBuffer(mem) + buf2.Resize(4) + ret.Buffers[1].WrapBuffer(buf2) + return ret + }, func(mem memory.Allocator) arrow.ArrayData { + buf := memory.NewResizableBuffer(mem) + buf.Resize(1) + defer buf.Release() + buf2 := memory.NewResizableBuffer(mem) + buf2.Resize(4) + defer buf2.Release() + return array.NewData(arrow.PrimitiveTypes.Int8, 4, []*memory.Buffer{buf, buf2}, nil, 0, 0) + }}, + {"with children", func(mem memory.Allocator) fields { + ret := fields{ + Type: arrow.ListOf(arrow.PrimitiveTypes.Int8), + Len: 1, + Children: []internal.ArraySpan{{ + Type: arrow.PrimitiveTypes.Int8, + Len: 4, + }}, + } + var offsets [8]byte + endian.Native.PutUint32(offsets[4:], 4) + ret.Buffers[1].SetBuffer(memory.NewBufferBytes(offsets[:])) + buf := memory.NewResizableBuffer(mem) + buf.Resize(4) + buf.Bytes()[0] = 1 + buf.Bytes()[1] = 2 + buf.Bytes()[2] = 3 + buf.Bytes()[3] = 4 + + ret.Children[0].Buffers[1].WrapBuffer(buf) + return ret + }, func(mem memory.Allocator) arrow.ArrayData { + buf := memory.NewResizableBuffer(mem) + buf.Resize(4) + buf.Bytes()[0] = 1 + buf.Bytes()[1] = 2 + buf.Bytes()[2] = 3 + buf.Bytes()[3] = 4 + defer buf.Release() + child := array.NewData(arrow.PrimitiveTypes.Int8, 4, []*memory.Buffer{nil, buf}, nil, 0, 0) + defer child.Release() + + var offsets [8]byte + endian.Native.PutUint32(offsets[4:], 4) + + return array.NewData(arrow.ListOf(arrow.PrimitiveTypes.Int8), 1, + []*memory.Buffer{nil, memory.NewBufferBytes(offsets[:])}, + []arrow.ArrayData{child}, 0, 0) + }}, + {"dict-extension-type", func(mem memory.Allocator) fields { + // dict-extension-type is dict(Index: int8, Value: string) + // so there should be an int8 in the arrayspan and + // a child of a string arrayspan in the first index of + // Children + ret := fields{ + Type: types.NewDictExtensionType(), + Len: 1, + Children: []internal.ArraySpan{{ + Type: arrow.BinaryTypes.String, + Len: 2, + }}, + } + + indices := memory.NewResizableBuffer(mem) + indices.Resize(1) + indices.Bytes()[0] = 1 + ret.Buffers[1].WrapBuffer(indices) + + offsets := memory.NewResizableBuffer(mem) + offsets.Resize(3 * arrow.Int32SizeBytes) + copy(offsets.Bytes(), arrow.Int32Traits.CastToBytes([]int32{0, 5, 10})) + + values := memory.NewResizableBuffer(mem) + values.Resize(len("HelloWorld")) + copy(values.Bytes(), []byte("HelloWorld")) + + nulls := memory.NewResizableBuffer(mem) + nulls.Resize(1) + nulls.Bytes()[0] = 3 + ret.Children[0].Buffers[0].WrapBuffer(nulls) + ret.Children[0].Buffers[1].WrapBuffer(offsets) + ret.Children[0].Buffers[2].WrapBuffer(values) + + return ret + }, func(mem memory.Allocator) arrow.ArrayData { + dict, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World"]`)) + defer dict.Release() + index, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int8, strings.NewReader(`[1]`)) + defer index.Release() + + out := array.NewData(types.NewDictExtensionType(), 1, []*memory.Buffer{nil, index.Data().Buffers()[1]}, nil, 0, 0) + out.SetDictionary(dict.Data()) + return out + }}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mem := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer mem.AssertSize(t, 0) + + t.Run("MakeData", func(t *testing.T) { + f := tt.fields(mem) + a := &internal.ArraySpan{ + Type: f.Type, + Len: f.Len, + Nulls: f.Nulls, + Offset: f.Offset, + Buffers: f.Buffers, + Scratch: f.Scratch, + Children: f.Children, + } + got := a.MakeData() + want := tt.want(mem) + if !reflect.DeepEqual(got, want) { + t.Errorf("ArraySpan.MakeData() = %v, want %v", got, want) + } + want.Release() + got.Release() + }) + + t.Run("MakeArray", func(t *testing.T) { + f := tt.fields(mem) + a := &internal.ArraySpan{ + Type: f.Type, + Len: f.Len, + Nulls: f.Nulls, + Offset: f.Offset, + Buffers: f.Buffers, + Scratch: f.Scratch, + Children: f.Children, + } + arr := a.MakeArray() + want := tt.want(mem) + defer want.Release() + exp := array.MakeFromData(want) + + assert.Truef(t, array.Equal(arr, exp), "expected: %s\ngot: %s", exp, arr) + + exp.Release() + arr.Release() + }) + }) + } +} + +func TestArraySpan_SetSlice(t *testing.T) { + type fields struct { + Type arrow.DataType + Len int64 + Nulls int64 + Offset int64 + Buffers [3]internal.BufferSpan + Scratch [2]uint64 + Children []internal.ArraySpan + } + type args struct { + off int64 + length int64 + } + tests := []struct { + name string + fields fields + args args + wantNulls int64 + }{ + {"null type", fields{Type: arrow.Null}, args{5, 10}, 10}, + {"not-null type", fields{Type: arrow.PrimitiveTypes.Int8}, args{5, 10}, array.UnknownNullCount}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := &internal.ArraySpan{ + Type: tt.fields.Type, + Len: tt.fields.Len, + Nulls: tt.fields.Nulls, + Offset: tt.fields.Offset, + Buffers: tt.fields.Buffers, + Scratch: tt.fields.Scratch, + Children: tt.fields.Children, + } + a.SetSlice(tt.args.off, tt.args.length) + assert.Equal(t, tt.args.off, a.Offset) + assert.Equal(t, tt.args.length, a.Len) + assert.Equal(t, tt.wantNulls, a.Nulls) + }) + } +} + +func TestArraySpan_FillFromScalar(t *testing.T) { + var ( + expDecimalBuf [arrow.Decimal128SizeBytes]byte + expScratch [2]uint64 + ) + + endian.Native.PutUint64(expDecimalBuf[:], 1234) + endian.Native.PutUint32(arrow.Uint64Traits.CastToBytes(expScratch[:])[4:], 10) + + dict, _, _ := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World"]`)) + defer dict.Release() + + tests := []struct { + name string + args scalar.Scalar + exp internal.ArraySpan + }{ + {"null-type", + scalar.MakeNullScalar(arrow.Null), + internal.ArraySpan{Type: arrow.Null, Len: 1, Nulls: 1}}, + {"bool valid", + scalar.MakeScalar(true), + internal.ArraySpan{ + Type: arrow.FixedWidthTypes.Boolean, + Len: 1, + Nulls: 0, + Buffers: [3]internal.BufferSpan{{Buf: []byte{0x01}}, {Buf: []byte{0x01}}, {}}, + }}, + {"bool valid false", + scalar.MakeScalar(false), + internal.ArraySpan{ + Type: arrow.FixedWidthTypes.Boolean, + Len: 1, + Nulls: 0, + Buffers: [3]internal.BufferSpan{{Buf: []byte{0x01}}, {Buf: []byte{0x00}}, {}}, + }}, + {"primitive null", + scalar.MakeNullScalar(arrow.PrimitiveTypes.Int32), + internal.ArraySpan{ + Type: arrow.PrimitiveTypes.Int32, + Len: 1, + Nulls: 1, + Buffers: [3]internal.BufferSpan{{Buf: []byte{0x00}}, {Buf: []byte{0, 0, 0, 0}}, {}}, + }}, + {"decimal valid", + scalar.NewDecimal128Scalar(decimal128.FromU64(1234), &arrow.Decimal128Type{Precision: 12, Scale: 2}), + internal.ArraySpan{ + Type: &arrow.Decimal128Type{Precision: 12, Scale: 2}, + Len: 1, + Nulls: 0, + Buffers: [3]internal.BufferSpan{{Buf: []byte{0x01}}, {Buf: expDecimalBuf[:]}, {}}, + }}, + {"dictionary scalar", + scalar.NewDictScalar(scalar.NewInt8Scalar(1), dict), + internal.ArraySpan{ + Type: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Int8, ValueType: arrow.BinaryTypes.String}, + Len: 1, + Nulls: 0, + Buffers: [3]internal.BufferSpan{{Buf: []byte{0x01}}, + {Buf: []byte{1}}, {}, + }, + Children: []internal.ArraySpan{{ + Type: arrow.BinaryTypes.String, + Len: 2, + Buffers: [3]internal.BufferSpan{ + {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, + {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, + }, + }}, + }, + }, + {"binary scalar", + scalar.NewBinaryScalar(dict.Data().Buffers()[2], arrow.BinaryTypes.String), + internal.ArraySpan{ + Type: arrow.BinaryTypes.String, + Len: 1, + Nulls: 0, + Scratch: expScratch, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x01}}, + {Buf: arrow.Uint64Traits.CastToBytes(expScratch[:1])}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}}, + }, + }, + {"large binary", + scalar.NewLargeStringScalarFromBuffer(dict.Data().Buffers()[2]), + internal.ArraySpan{ + Type: arrow.BinaryTypes.LargeString, + Len: 1, + Nulls: 0, + Scratch: [2]uint64{0, 10}, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x01}}, + {Buf: arrow.Uint64Traits.CastToBytes([]uint64{0, 10})}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}}, + }}, + {"fixed size binary", + scalar.NewFixedSizeBinaryScalar(dict.Data().Buffers()[2], &arrow.FixedSizeBinaryType{ByteWidth: 10}), + internal.ArraySpan{ + Type: &arrow.FixedSizeBinaryType{ByteWidth: 10}, + Len: 1, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x01}}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, {}, + }, + }}, + {"map scalar null value", + scalar.MakeNullScalar(arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String)), + internal.ArraySpan{ + Type: arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), + Len: 1, + Nulls: 1, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0}}, + {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, + {}, + }, + Children: []internal.ArraySpan{{ + Type: arrow.StructOf(arrow.Field{Name: "key", Type: arrow.PrimitiveTypes.Int8}, + arrow.Field{Name: "value", Type: arrow.BinaryTypes.String, Nullable: true}), + Len: 0, + Nulls: 0, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{}}, {}, {}, + }, + Children: []internal.ArraySpan{ + { + Type: arrow.PrimitiveTypes.Int8, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{}}, {Buf: []byte{}}, {}, + }, + }, + { + Type: arrow.BinaryTypes.String, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{}}, {Buf: []byte{}}, {Buf: []byte{}}, + }, + }, + }, + }}, + }}, + {"list scalar", + scalar.NewListScalarData(dict.Data()), + internal.ArraySpan{ + Type: arrow.ListOf(arrow.BinaryTypes.String), + Len: 1, + Scratch: [2]uint64{ + *(*uint64)(unsafe.Pointer(&[]int32{0, 2}[0])), + 0, + }, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x1}}, + {Buf: arrow.Int32Traits.CastToBytes([]int32{0, 2})}, + }, + Children: []internal.ArraySpan{{ + Type: arrow.BinaryTypes.String, + Len: 2, + Buffers: [3]internal.BufferSpan{ + {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, + {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, + }, + }}, + }, + }, + {"large list scalar", + scalar.NewLargeListScalarData(dict.Data()), + internal.ArraySpan{ + Type: arrow.LargeListOf(arrow.BinaryTypes.String), + Len: 1, + Scratch: [2]uint64{0, 2}, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x1}}, + {Buf: arrow.Int64Traits.CastToBytes([]int64{0, 2})}, + }, + Children: []internal.ArraySpan{{ + Type: arrow.BinaryTypes.String, + Len: 2, + Buffers: [3]internal.BufferSpan{ + {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, + {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, + }, + }}, + }, + }, + {"fixed size list", + scalar.NewFixedSizeListScalar(dict), + internal.ArraySpan{ + Type: arrow.FixedSizeListOf(2, arrow.BinaryTypes.String), + Len: 1, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x1}}, + {}, {}, + }, + Children: []internal.ArraySpan{{ + Type: arrow.BinaryTypes.String, + Len: 2, + Buffers: [3]internal.BufferSpan{ + {Buf: dict.NullBitmapBytes(), Owner: dict.Data().Buffers()[0]}, + {Buf: dict.Data().Buffers()[1].Bytes(), Owner: dict.Data().Buffers()[1]}, + {Buf: dict.Data().Buffers()[2].Bytes(), Owner: dict.Data().Buffers()[2]}, + }, + }}, + }, + }, + {"struct scalar", + func() scalar.Scalar { + s, _ := scalar.NewStructScalarWithNames([]scalar.Scalar{ + scalar.MakeScalar(int32(5)), scalar.MakeScalar(uint8(10)), + }, []string{"int32", "uint8"}) + return s + }(), + internal.ArraySpan{ + Type: arrow.StructOf( + arrow.Field{Name: "int32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, + arrow.Field{Name: "uint8", Type: arrow.PrimitiveTypes.Uint8, Nullable: true}), + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x1}}, {}, {}, + }, + Len: 1, + Children: []internal.ArraySpan{ + { + Type: arrow.PrimitiveTypes.Int32, + Len: 1, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x1}}, + {Buf: arrow.Int32Traits.CastToBytes([]int32{5})}, + {}, + }, + }, + { + Type: arrow.PrimitiveTypes.Uint8, + Len: 1, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x1}}, + {Buf: []byte{10}}, + {}, + }, + }, + }, + }, + }, + {"dense union scalar", + func() scalar.Scalar { + dt := arrow.UnionOf(arrow.DenseMode, []arrow.Field{ + {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + }, []arrow.UnionTypeCode{3, 42, 43}) + return scalar.NewDenseUnionScalar(scalar.MakeScalar(uint64(25)), 42, dt.(*arrow.DenseUnionType)) + }(), + internal.ArraySpan{ + Type: arrow.UnionOf(arrow.DenseMode, []arrow.Field{ + {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + }, []arrow.UnionTypeCode{3, 42, 43}), + Len: 1, + Scratch: [2]uint64{42, 1}, + Buffers: [3]internal.BufferSpan{{}, + {Buf: []byte{42}}, {Buf: arrow.Int32Traits.CastToBytes([]int32{0, 1})}, + }, + Children: []internal.ArraySpan{ + { + Type: arrow.BinaryTypes.String, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{}}, {Buf: []byte{}}, {Buf: []byte{}}, + }, + }, + { + Type: arrow.PrimitiveTypes.Uint64, + Len: 1, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x1}}, + {Buf: arrow.Uint64Traits.CastToBytes([]uint64{25})}, + {}, + }, + }, + { + Type: arrow.PrimitiveTypes.Uint64, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{}}, {Buf: []byte{}}, {}, + }, + }, + }, + }, + }, + {"sparse union", + func() scalar.Scalar { + dt := arrow.UnionOf(arrow.SparseMode, []arrow.Field{ + {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + }, []arrow.UnionTypeCode{3, 42, 43}) + return scalar.NewSparseUnionScalarFromValue(scalar.MakeScalar(uint64(25)), 1, dt.(*arrow.SparseUnionType)) + }(), + internal.ArraySpan{ + Type: arrow.UnionOf(arrow.SparseMode, []arrow.Field{ + {Name: "string", Type: arrow.BinaryTypes.String, Nullable: true}, + {Name: "number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + {Name: "other_number", Type: arrow.PrimitiveTypes.Uint64, Nullable: true}, + }, []arrow.UnionTypeCode{3, 42, 43}), + Len: 1, + Scratch: [2]uint64{42, 0}, + Buffers: [3]internal.BufferSpan{{}, + {Buf: []byte{42}}, {}, + }, + Children: []internal.ArraySpan{ + { + Type: arrow.BinaryTypes.String, + Len: 1, + Nulls: 1, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x0}}, + {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, + {}, + }, + }, + { + Type: arrow.PrimitiveTypes.Uint64, + Len: 1, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x1}}, + {Buf: arrow.Uint64Traits.CastToBytes([]uint64{25})}, + {}, + }, + }, + { + Type: arrow.PrimitiveTypes.Uint64, + Len: 1, + Nulls: 1, + Buffers: [3]internal.BufferSpan{ + {Buf: []byte{0x0}}, {Buf: []byte{0, 0, 0, 0, 0, 0, 0, 0}}, {}, + }, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := &internal.ArraySpan{ + Nulls: array.UnknownNullCount, + Buffers: [3]internal.BufferSpan{{SelfAlloc: true, Owner: &memory.Buffer{}}, {SelfAlloc: true, Owner: &memory.Buffer{}}, {}}, + } + a.FillFromScalar(tt.args) + assert.Equal(t, tt.exp, *a) + }) + } +} diff --git a/go/arrow/compute/internal/utils.go b/go/arrow/compute/internal/utils.go new file mode 100644 index 0000000000000..480dc33b46aa5 --- /dev/null +++ b/go/arrow/compute/internal/utils.go @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "unsafe" + + "github.com/apache/arrow/go/v10/arrow" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/decimal256" + "github.com/apache/arrow/go/v10/arrow/float16" + "golang.org/x/exp/constraints" +) + +// IntTypes is a type constraint for raw values represented as signed +// integer types by Arrow. We aren't just using constraints.Signed +// because we don't want to include the raw `int` type here whose size +// changes based on the architecture (int32 on 32-bit architectures and +// int64 on 64-bit architectures). +// +// This will also cover types like MonthInterval or the time types +// as their underlying types are int32 and int64 which will get covered +// by using the ~ +type IntTypes interface { + ~int8 | ~int16 | ~int32 | ~int64 +} + +// UintTypes is a type constraint for raw values represented as unsigned +// integer types by Arrow. We aren't just using constraints.Unsigned +// because we don't want to include the raw `uint` type here whose size +// changes based on the architecture (uint32 on 32-bit architectures and +// uint64 on 64-bit architectures). We also don't want to include uintptr +type UintTypes interface { + ~uint8 | ~uint16 | ~uint32 | ~uint64 +} + +// FloatTypes is a type constraint for raw values for representing +// floating point values in Arrow. This consists of constraints.Float and +// float16.Num +type FloatTypes interface { + float16.Num | constraints.Float +} + +// DecimalTypes is a type constraint for raw values representing larger +// decimal type values in Arrow, specifically decimal128 and decimal256. +type DecimalTypes interface { + decimal128.Num | decimal256.Num +} + +// FixedWidthTypes is a type constraint for raw values in Arrow that +// can be represented as FixedWidth byte slices. Specifically this is for +// using Go generics to easily re-type a byte slice to a properly-typed +// slice. Booleans are excluded here since they are represented by Arrow +// as a bitmap and thus the buffer can't be just reinterpreted as a []bool +type FixedWidthTypes interface { + IntTypes | UintTypes | + FloatTypes | DecimalTypes | + arrow.DayTimeInterval | arrow.MonthDayNanoInterval +} + +// GetSpanValues returns a properly typed slice bye reinterpreting +// the buffer at index i using unsafe.Slice. This will take into account +// the offset of the given ArraySpan. +func GetSpanValues[T FixedWidthTypes](span *ArraySpan, i int) []T { + ret := unsafe.Slice((*T)(unsafe.Pointer(&span.Buffers[i].Buf[0])), span.Offset+span.Len) + return ret[span.Offset:] +} diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go index 7bbf480872cf5..15a3c77af22c8 100644 --- a/go/arrow/datatype.go +++ b/go/arrow/datatype.go @@ -293,6 +293,24 @@ func IsBaseBinary(t Type) bool { return false } +// IsBinaryLike returns true for only BINARY and STRING +func IsBinaryLike(t Type) bool { + switch t { + case BINARY, STRING: + return true + } + return false +} + +// IsLargeBinaryLike returns true for only LARGE_BINARY and LARGE_STRING +func IsLargeBinaryLike(t Type) bool { + switch t { + case LARGE_BINARY, LARGE_STRING: + return true + } + return false +} + // IsFixedSizeBinary returns true for Decimal128/256 and FixedSizeBinary func IsFixedSizeBinary(t Type) bool { switch t { @@ -301,3 +319,39 @@ func IsFixedSizeBinary(t Type) bool { } return false } + +// IsDecimal returns true for Decimal128 and Decimal256 +func IsDecimal(t Type) bool { + switch t { + case DECIMAL128, DECIMAL256: + return true + } + return false +} + +// IsUnion returns true for Sparse and Dense Unions +func IsUnion(t Type) bool { + switch t { + case DENSE_UNION, SPARSE_UNION: + return true + } + return false +} + +// IsListLike returns true for List, LargeList, FixedSizeList, and Map +func IsListLike(t Type) bool { + switch t { + case LIST, LARGE_LIST, FIXED_SIZE_LIST, MAP: + return true + } + return false +} + +// IsNested returns true for List, LargeList, FixedSizeList, Map, Struct, and Unions +func IsNested(t Type) bool { + switch t { + case LIST, LARGE_LIST, FIXED_SIZE_LIST, MAP, STRUCT, SPARSE_UNION, DENSE_UNION: + return true + } + return false +} diff --git a/go/arrow/scalar/binary.go b/go/arrow/scalar/binary.go index aedee064d9e8b..19ff685047552 100644 --- a/go/arrow/scalar/binary.go +++ b/go/arrow/scalar/binary.go @@ -30,6 +30,7 @@ type BinaryScalar interface { Retain() Release() + Buffer() *memory.Buffer Data() []byte } @@ -46,6 +47,7 @@ func (b *Binary) Data() []byte { return b.Value.Bytes() } func (b *Binary) equals(rhs Scalar) bool { return bytes.Equal(b.Value.Bytes(), rhs.(BinaryScalar).Data()) } +func (b *Binary) Buffer() *memory.Buffer { return b.Value } func (b *Binary) String() string { if !b.Valid { return "null" diff --git a/go/arrow/scalar/nested.go b/go/arrow/scalar/nested.go index 2d106e50711d3..4a2e99bcf3826 100644 --- a/go/arrow/scalar/nested.go +++ b/go/arrow/scalar/nested.go @@ -362,6 +362,8 @@ func NewDictScalar(index Scalar, dict arrow.Array) *Dictionary { return ret } +func (s *Dictionary) Data() []byte { return s.Value.Index.(PrimitiveScalar).Data() } + func (s *Dictionary) Retain() { if r, ok := s.Value.Index.(Releasable); ok { r.Retain() diff --git a/go/arrow/scalar/parse.go b/go/arrow/scalar/parse.go index d22647435b1fd..8361362a478b9 100644 --- a/go/arrow/scalar/parse.go +++ b/go/arrow/scalar/parse.go @@ -26,6 +26,8 @@ import ( "github.com/apache/arrow/go/v10/arrow" "github.com/apache/arrow/go/v10/arrow/array" + "github.com/apache/arrow/go/v10/arrow/decimal128" + "github.com/apache/arrow/go/v10/arrow/decimal256" "github.com/apache/arrow/go/v10/arrow/float16" "github.com/apache/arrow/go/v10/arrow/memory" ) @@ -432,6 +434,19 @@ func MakeScalarParam(val interface{}, dt arrow.DataType) (Scalar, error) { } return NewMapScalar(v), nil } + case decimal128.Num: + if _, ok := dt.(*arrow.Decimal128Type); !ok { + return nil, fmt.Errorf("mismatch cannot create decimal128 scalar with incorrect data type") + } + + return NewDecimal128Scalar(v, dt), nil + case decimal256.Num: + if _, ok := dt.(*arrow.Decimal256Type); !ok { + return nil, fmt.Errorf("mismatch cannot create decimal256 scalar with incorrect data type") + } + + return NewDecimal256Scalar(v, dt), nil + } if arrow.IsInteger(dt.ID()) { diff --git a/go/arrow/scalar/scalar.go b/go/arrow/scalar/scalar.go index 7ae8b03473480..918267dc661da 100644 --- a/go/arrow/scalar/scalar.go +++ b/go/arrow/scalar/scalar.go @@ -273,6 +273,10 @@ type Decimal128 struct { Value decimal128.Num } +func (s *Decimal128) Data() []byte { + return (*[arrow.Decimal128SizeBytes]byte)(unsafe.Pointer(&s.Value))[:] +} + func (s *Decimal128) value() interface{} { return s.Value } func (s *Decimal128) String() string { @@ -337,6 +341,10 @@ type Decimal256 struct { Value decimal256.Num } +func (s *Decimal256) Data() []byte { + return (*[arrow.Decimal256SizeBytes]byte)(unsafe.Pointer(&s.Value))[:] +} + func (s *Decimal256) value() interface{} { return s.Value } func (s *Decimal256) String() string { @@ -794,14 +802,18 @@ func MakeArrayFromScalar(sc Scalar, length int, mem memory.Allocator) (arrow.Arr data.Release() }() return array.MakeFromData(data), nil - case PrimitiveScalar: - data := finishFixedWidth(s.Data()) - defer data.Release() - return array.MakeFromData(data), nil case *Decimal128: data := finishFixedWidth(arrow.Decimal128Traits.CastToBytes([]decimal128.Num{s.Value})) defer data.Release() return array.MakeFromData(data), nil + case *Decimal256: + data := finishFixedWidth(arrow.Decimal256Traits.CastToBytes([]decimal256.Num{s.Value})) + defer data.Release() + return array.MakeFromData(data), nil + case PrimitiveScalar: + data := finishFixedWidth(s.Data()) + defer data.Release() + return array.MakeFromData(data), nil case *List: values := make([]arrow.Array, length) for i := range values { @@ -953,6 +965,10 @@ func Hash(seed maphash.Seed, s Scalar) uint64 { case *DenseUnion: // typecode is ignored when comparing equality, so don't hash it either out ^= Hash(seed, s.Value) + case *Dictionary: + if s.Value.Index.IsValid() { + out ^= Hash(seed, s.Value.Index) + } case PrimitiveScalar: h.Write(s.Data()) hash() @@ -967,10 +983,6 @@ func Hash(seed maphash.Seed, s Scalar) uint64 { out ^= Hash(seed, c) } } - case *Dictionary: - if s.Value.Index.IsValid() { - out ^= Hash(seed, s.Value.Index) - } } return out