forked from parquet-go/parquet-go
-
Notifications
You must be signed in to change notification settings - Fork 1
/
column_path.go
111 lines (90 loc) · 2.18 KB
/
column_path.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
package parquet
import (
"strings"
)
type columnPath []string
func (path columnPath) append(names ...string) columnPath {
return append(path[:len(path):len(path)], names...)
}
func (path columnPath) equal(other columnPath) bool {
return stringsAreEqual(path, other)
}
func (path columnPath) less(other columnPath) bool {
return stringsAreOrdered(path, other)
}
func (path columnPath) String() string {
return strings.Join(path, ".")
}
func stringsAreEqual(strings1, strings2 []string) bool {
if len(strings1) != len(strings2) {
return false
}
for i := range strings1 {
if strings1[i] != strings2[i] {
return false
}
}
return true
}
func stringsAreOrdered(strings1, strings2 []string) bool {
n := len(strings1)
if n > len(strings2) {
n = len(strings2)
}
for i := 0; i < n; i++ {
if strings1[i] >= strings2[i] {
return false
}
}
return len(strings1) <= len(strings2)
}
type leafColumn struct {
node Node
path columnPath
maxRepetitionLevel byte
maxDefinitionLevel byte
columnIndex int16
}
func forEachLeafColumnOf(node Node, do func(leafColumn)) {
forEachLeafColumn(node, nil, 0, 0, 0, do)
}
func forEachLeafColumn(node Node, path columnPath, columnIndex, maxRepetitionLevel, maxDefinitionLevel int, do func(leafColumn)) int {
switch {
case node.Optional():
maxDefinitionLevel++
case node.Repeated():
maxRepetitionLevel++
maxDefinitionLevel++
}
if node.Leaf() {
do(leafColumn{
node: node,
path: path,
maxRepetitionLevel: makeRepetitionLevel(maxRepetitionLevel),
maxDefinitionLevel: makeDefinitionLevel(maxDefinitionLevel),
columnIndex: makeColumnIndex(columnIndex),
})
return columnIndex + 1
}
for _, field := range node.Fields() {
columnIndex = forEachLeafColumn(
field,
path.append(field.Name()),
columnIndex,
maxRepetitionLevel,
maxDefinitionLevel,
do,
)
}
return columnIndex
}
func lookupColumnPath(node Node, path columnPath) Node {
for node != nil && len(path) > 0 {
node = fieldByName(node, path[0])
path = path[1:]
}
return node
}
func hasColumnPath(node Node, path columnPath) bool {
return lookupColumnPath(node, path) != nil
}