Skip to content

Commit

Permalink
feat: add extracter package (#53)
Browse files Browse the repository at this point in the history
* feat: add jsonextracter package

* style: rename package

* refactor: reorganize export functions

* refactor: hide unnecessary export functions
  • Loading branch information
iamryanchia authored Dec 24, 2024
1 parent 8618d81 commit 054558b
Show file tree
Hide file tree
Showing 13 changed files with 1,548 additions and 0 deletions.
106 changes: 106 additions & 0 deletions extracter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Extracter

Extract specific field from JSON-like data and **output not only the field value but also its upstream structure**.

A typical use case is to trim k8s objects in `TransformingInformer` to save informer memory.

Please refer to [JSONPath Support](https://kubernetes.io/docs/reference/kubectl/jsonpath/) to see JSONPath usage.

## Example

Code:

```go
package main

import (
"encoding/json"
"fmt"

"kusionstack.io/kube-utils/extracter"
)

var pod = []byte(`{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {
"labels": {
"name": "pause",
"app": "pause"
},
"name": "pause",
"namespace": "default"
},
"spec": {
"containers": [
{
"image": "registry.k8s.io/pause:3.8",
"imagePullPolicy": "IfNotPresent",
"name": "pause1"
},
{
"image": "registry.k8s.io/pause:3.8",
"imagePullPolicy": "IfNotPresent",
"name": "pause2"
}
]
}
}`)

func printJSON(data interface{}) {
bytes, _ := json.Marshal(data)
fmt.Println(string(bytes))
}

func main() {
var podData map[string]interface{}
json.Unmarshal(pod, &podData)

kindPath := "{.kind}"
kindExtracter, _ := extracter.New([]string{kindPath}, false)

kind, _ := kindExtracter.Extract(podData)
printJSON(kind)

nameImagePath := "{.spec.containers[*]['name', 'image']}"
nameImageExtracter, _ := extracter.New([]string{nameImagePath}, false)

nameImage, _ := nameImageExtracter.Extract(podData)
printJSON(nameImage)

mergeExtracter, _ := extracter.New([]string{kindPath, nameImagePath}, false)
merged, _ := mergeExtracter.Extract(podData)
printJSON(merged)
}
```

Output:

```plain
{"kind":"Pod"}
{"spec":{"containers":[{"image":"registry.k8s.io/pause:3.8","name":"pause1"},{"image":"registry.k8s.io/pause:3.8","name":"pause2"}]}}
{"kind":"Pod","spec":{"containers":[{"image":"registry.k8s.io/pause:3.8","name":"pause1"},{"image":"registry.k8s.io/pause:3.8","name":"pause2"}]}}
```

## Note

The merge behavior on the list is replacing. Therefore, if you retrieve the container name and image separately and merge them, the resulting output will not contain the image.

Code:

```go
...
namePath := "{.spec.containers[*].name}"
imagePath := "{.spec.containers[*].image}"

mergeExtracter, _ = extracter.New([]string{imagePath, namePath}, false)
merged, _ = mergeExtracter.Extract(podData)
printJSON(merged)
...
```

Output:

```plain
{"spec":{"containers":[{"name":"pause1"},{"name":"pause2"}]}}
```
96 changes: 96 additions & 0 deletions extracter/extracter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/**
* Copyright 2024 KusionStack Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package extracter

import (
"fmt"

"k8s.io/client-go/util/jsonpath"
)

type Extracter interface {
Extract(data map[string]interface{}) (map[string]interface{}, error)
}

// New creates an Extracter. For each jsonPaths, FieldPathExtracter will
// be parsed whenever possible, as it has better performance
func New(jsonPaths []string, allowMissingKeys bool) (Extracter, error) {
var extracters []Extracter

for _, p := range jsonPaths {
parser, err := Parse(p, p)
if err != nil {
return nil, fmt.Errorf("error in parsing path %q: %w", p, err)
}

rootNodes := parser.Root.Nodes
if len(rootNodes) == 0 {
extracters = append(extracters, NewNestedFieldPathExtracter(nil, allowMissingKeys))
continue
}

if len(rootNodes) == 1 {
nodes := rootNodes[0].(*jsonpath.ListNode).Nodes
fields := make([]string, 0, len(nodes))
for _, node := range nodes {
if node.Type() == jsonpath.NodeField {
fields = append(fields, node.(*jsonpath.FieldNode).Value)
}
}

if len(nodes) == len(fields) {
fp := NewNestedFieldPathExtracter(fields, allowMissingKeys)
extracters = append(extracters, fp)
continue
}
}

jp := &jsonPathExtracter{name: parser.Name, parser: parser, allowMissingKeys: allowMissingKeys}
extracters = append(extracters, jp)
}

if len(extracters) == 1 {
return extracters[0], nil
}

return &Extracters{extracters}, nil
}

// Extracters makes it easy when you want to extract multi fields and merge them.
type Extracters struct {
extracters []Extracter
}

// Extract calls all extracters in order and merges their outputs by calling mergeFields.
func (e *Extracters) Extract(data map[string]interface{}) (map[string]interface{}, error) {
var merged map[string]interface{}

for _, ex := range e.extracters {
field, err := ex.Extract(data)
if err != nil {
return nil, err
}

if merged == nil {
merged = field
} else {
merged = mergeFields(merged, field)
}
}

return merged, nil
}
105 changes: 105 additions & 0 deletions extracter/extracter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/**
* Copyright 2024 KusionStack Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package extracter

import (
"encoding/json"
"reflect"
"testing"
)

func TestNew(t *testing.T) {
type args struct {
paths []string
allowMissingKeys bool
}
tests := []struct {
name string
args args
want Extracter
wantErr bool
}{
{name: "invalid path", args: args{paths: []string{`{`}, allowMissingKeys: false}, want: nil, wantErr: true},
{name: "fieldPath extracter", args: args{paths: []string{`{}`}, allowMissingKeys: false}, want: &nestedFieldPathExtracter{}, wantErr: false},
{name: "fieldPath extracter", args: args{paths: []string{``}, allowMissingKeys: false}, want: &nestedFieldPathExtracter{}, wantErr: false},
{name: "fieldPath extracter", args: args{paths: []string{`{.metadata.labels.name}`}, allowMissingKeys: false}, want: &nestedFieldPathExtracter{}, wantErr: false},
{name: "fieldPath extracter", args: args{paths: []string{`{.metadata.labels['name']}`}, allowMissingKeys: false}, want: &nestedFieldPathExtracter{}, wantErr: false},
{name: "jsonPath extracter", args: args{paths: []string{`{.metadata.labels.name}{.metadata.labels.app}`}, allowMissingKeys: false}, want: nil, wantErr: true},
{name: "jsonPath extracter", args: args{paths: []string{`{.metadata.labels['name', 'app']}`}, allowMissingKeys: false}, want: &jsonPathExtracter{}, wantErr: false},
{name: "jsonPath extracter", args: args{paths: []string{`{.spec.containers[*].name}`}, allowMissingKeys: false}, want: &jsonPathExtracter{}, wantErr: false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := New(tt.args.paths, tt.args.allowMissingKeys)
if (err != nil) != tt.wantErr {
t.Errorf("New() error = %v, wantErr %v", err, tt.wantErr)
return
}

if reflect.TypeOf(tt.want) != reflect.TypeOf(got) {
t.Errorf("New() = %T, want %T", got, tt.want)
}
})
}
}

func TestExtracters_Extract(t *testing.T) {
containerNamePath := `{.spec.containers[*].name}`
containerImagePath := `{.spec.containers[*].image}`
kindPath := "{.kind}"
apiVersionPath := "{.apiVersion}"

type args struct {
paths []string
input map[string]interface{}
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
name: "merge name and image", args: args{paths: []string{containerImagePath, containerNamePath}, input: podData},
want: `{"spec":{"containers":[{"name":"pause1"},{"name":"pause2"}]}}`, wantErr: false,
},
{
name: "name kind apiVersion", args: args{paths: []string{containerNamePath, kindPath, apiVersionPath}, input: podData},
want: `{"apiVersion":"v1","kind":"Pod","spec":{"containers":[{"name":"pause1"},{"name":"pause2"}]}}`, wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ex, err := New(tt.args.paths, true)
if (err != nil) != tt.wantErr {
t.Errorf("Extracters_Extract() error = %v, wantErr %v", err, tt.wantErr)
return
}

got, err := ex.Extract(tt.args.input)
if (err != nil) != tt.wantErr {
t.Errorf("Extracters_Extract() error = %v, wantErr %v", err, tt.wantErr)
return
}

data, _ := json.Marshal(got)
if string(data) != tt.want {
t.Errorf("Extracters_Extract() = %v, want %v", string(data), tt.want)
}
})
}
}
71 changes: 71 additions & 0 deletions extracter/fieldpath.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/**
* Copyright 2024 KusionStack Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package extracter

import (
"fmt"
)

// NewNestedFieldPathExtracter constructs a FieldPathExtracter.
func NewNestedFieldPathExtracter(nestedField []string, allowMissingKeys bool) Extracter {
return &nestedFieldPathExtracter{nestedField: nestedField, allowMissingKeys: allowMissingKeys}
}

// nestedFieldPathExtracter is used to wrap NestedFieldNoCopy function as an Extracter.
type nestedFieldPathExtracter struct {
nestedField []string
allowMissingKeys bool
}

// Extract outputs the nestedField's value and its upstream structure.
func (n *nestedFieldPathExtracter) Extract(data map[string]interface{}) (map[string]interface{}, error) {
return NestedFieldNoCopy(data, n.allowMissingKeys, n.nestedField...)
}

// NestedFieldNoCopy is similar to JSONPath.Extract. The difference is that it
// can only operate on map and does not support list, but has better performance.
func NestedFieldNoCopy(data map[string]interface{}, allowMissingKeys bool, fields ...string) (map[string]interface{}, error) {
if len(fields) == 0 {
return nil, nil
}

result := map[string]interface{}{}
cur := result

for i, field := range fields {
if val, ok := data[field]; ok {
if i != len(fields)-1 {
if data, ok = val.(map[string]interface{}); !ok {
return nil, fmt.Errorf("%v is of the type %T, expected map[string]interface{}", val, val)
}

m := map[string]interface{}{}
cur[field] = m
cur = m
} else {
cur[field] = val
}
} else {
if allowMissingKeys {
return result, nil
}
return nil, fmt.Errorf("field %q not exist", field)
}
}

return result, nil
}
Loading

0 comments on commit 054558b

Please sign in to comment.