Skip to content

Commit

Permalink
Merge pull request #551 from egor-ryashin/arrow-v12
Browse files Browse the repository at this point in the history
Migrate Arrow to v12
  • Loading branch information
xitongsys authored Aug 6, 2023
2 parents 206c501 + a75e369 commit b6d7d87
Show file tree
Hide file tree
Showing 8 changed files with 353 additions and 97 deletions.
23 changes: 12 additions & 11 deletions common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ import (
"strconv"
"strings"

"github.com/apache/arrow/go/arrow"
"github.com/apache/arrow/go/arrow/array"
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"

"github.com/xitongsys/parquet-go/parquet"
)

Expand Down Expand Up @@ -530,7 +531,7 @@ func DeepCopy(src, dst interface{}) {
return
}

//Get key tag map for map
// Get key tag map for map
func GetKeyTagMap(src *Tag) *Tag {
res := NewTag()
res.InName = "Key"
Expand All @@ -548,7 +549,7 @@ func GetKeyTagMap(src *Tag) *Tag {
return res
}

//Get value tag map for map
// Get value tag map for map
func GetValueTagMap(src *Tag) *Tag {
res := NewTag()
res.InName = "Value"
Expand All @@ -566,7 +567,7 @@ func GetValueTagMap(src *Tag) *Tag {
return res
}

//Convert string to a golang variable name
// Convert string to a golang variable name
func StringToVariableName(str string) string {
ln := len(str)
if ln <= 0 {
Expand All @@ -588,7 +589,7 @@ func StringToVariableName(str string) string {
return name
}

//Convert the first letter of a string to uppercase
// Convert the first letter of a string to uppercase
func HeadToUpper(str string) string {
ln := len(str)
if ln <= 0 {
Expand Down Expand Up @@ -928,7 +929,7 @@ func (table decimalStringFuncTable) MinMaxSize(minVal interface{}, maxVal interf
return Min(table, minVal, val), Max(table, maxVal, val), int32(len(val.(string)))
}

//Get the size of a parquet value
// Get the size of a parquet value
func SizeOf(val reflect.Value) int64 {
var size int64
switch val.Type().Kind() {
Expand Down Expand Up @@ -977,17 +978,17 @@ func ReformPathStr(pathStr string) string {
return strings.ReplaceAll(pathStr, ".", "\x01")
}

//Convert path slice to string
// Convert path slice to string
func PathToStr(path []string) string {
return strings.Join(path, PAR_GO_PATH_DELIMITER)
}

//Convert string to path slice
// Convert string to path slice
func StrToPath(str string) []string {
return strings.Split(str, PAR_GO_PATH_DELIMITER)
}

//Get the pathStr index in a path
// Get the pathStr index in a path
func PathStrIndex(str string) int {
return len(strings.Split(str, PAR_GO_PATH_DELIMITER))
}
Expand Down Expand Up @@ -1030,7 +1031,7 @@ func TransposeTable(table [][]interface{}) [][]interface{} {
//
// If `col` contains Null value but `field` is not marked as Nullable this
// results in an error.
func ArrowColToParquetCol(field arrow.Field, col array.Interface) (
func ArrowColToParquetCol(field arrow.Field, col arrow.Array) (
[]interface{}, error) {
recs := make([]interface{}, col.Len())
switch field.Type.(type) {
Expand Down
6 changes: 3 additions & 3 deletions example/arrow_to_parquet.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import (
"log"
"time"

"github.com/apache/arrow/go/arrow"
"github.com/apache/arrow/go/arrow/array"
"github.com/apache/arrow/go/arrow/memory"
"github.com/apache/arrow/go/v12/arrow"
"github.com/apache/arrow/go/v12/arrow/array"
"github.com/apache/arrow/go/v12/arrow/memory"
"github.com/xitongsys/parquet-go-source/local"
"github.com/xitongsys/parquet-go/reader"
"github.com/xitongsys/parquet-go/writer"
Expand Down
12 changes: 6 additions & 6 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ module github.com/xitongsys/parquet-go
go 1.16

require (
github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516
github.com/apache/thrift v0.14.2
github.com/apache/arrow/go/v12 v12.0.1
github.com/apache/thrift v0.16.0
github.com/aws/aws-sdk-go v1.30.19
github.com/goccy/go-reflect v1.2.0
github.com/golang/snappy v0.0.3
github.com/klauspost/compress v1.13.1
github.com/pierrec/lz4/v4 v4.1.8
github.com/stretchr/testify v1.7.0
github.com/golang/snappy v0.0.4
github.com/klauspost/compress v1.15.9
github.com/pierrec/lz4/v4 v4.1.15
github.com/stretchr/testify v1.8.0
github.com/xitongsys/parquet-go-source v0.0.0-20200817004010-026bad9b25d0
)
302 changes: 279 additions & 23 deletions go.sum

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion schema/arrow.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package schema
import (
"fmt"

"github.com/apache/arrow/go/arrow"
"github.com/apache/arrow/go/v12/arrow"
"github.com/xitongsys/parquet-go/common"
"github.com/xitongsys/parquet-go/parquet"
)
Expand Down
2 changes: 1 addition & 1 deletion schema/arrow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package schema
import (
"testing"

"github.com/apache/arrow/go/arrow"
"github.com/apache/arrow/go/v12/arrow"
"github.com/stretchr/testify/assert"
)

Expand Down
13 changes: 6 additions & 7 deletions writer/arrow.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ package writer
import (
"fmt"

"github.com/apache/arrow/go/arrow"
"github.com/apache/arrow/go/arrow/array"
"github.com/apache/arrow/go/v12/arrow"
"github.com/xitongsys/parquet-go/common"
"github.com/xitongsys/parquet-go/layout"
"github.com/xitongsys/parquet-go/marshal"
Expand All @@ -25,10 +24,10 @@ type ArrowWriter struct {
ParquetWriter
}

//NewArrowWriter creates arrow schema parquet writer given the native
//arrow schema, parquet file writer which contains the parquet file in
//which we will write the record along with the number of parallel threads
//which will write in the file.
// NewArrowWriter creates arrow schema parquet writer given the native
// arrow schema, parquet file writer which contains the parquet file in
// which we will write the record along with the number of parallel threads
// which will write in the file.
func NewArrowWriter(arrowSchema *arrow.Schema, pfile source.ParquetFile,
np int64) (*ArrowWriter, error) {
var err error
Expand Down Expand Up @@ -61,7 +60,7 @@ func NewArrowWriter(arrowSchema *arrow.Schema, pfile source.ParquetFile,
// The function transforms the data from the record, which the go arrow library
// gives as array of columns, to array of rows which the parquet-go library
// can understand as it does not accepts data by columns, but rather by rows.
func (w *ArrowWriter) WriteArrow(record array.Record) error {
func (w *ArrowWriter) WriteArrow(record arrow.Record) error {
table := make([][]interface{}, 0)
for i, column := range record.Columns() {
columnFromRecord, err := common.ArrowColToParquetCol(
Expand Down
Loading

0 comments on commit b6d7d87

Please sign in to comment.