Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add extracter package #53

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions extracter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Extracter

Extract specific field from JSON-like data and **output not only the field value but also its upstream structure**.

A typical use case is to trim k8s objects in `TransformingInformer` to save informer memory.

Please refer to [JSONPath Support](https://kubernetes.io/docs/reference/kubectl/jsonpath/) to see JSONPath usage.

## Example

Code:

```go
package main

import (
"encoding/json"
"fmt"

"kusionstack.io/kube-utils/extracter"
)

var pod = []byte(`{
"apiVersion": "v1",
"kind": "Pod",
"metadata": {
"labels": {
"name": "pause",
"app": "pause"
},
"name": "pause",
"namespace": "default"
},
"spec": {
"containers": [
{
"image": "registry.k8s.io/pause:3.8",
"imagePullPolicy": "IfNotPresent",
"name": "pause1"
},
{
"image": "registry.k8s.io/pause:3.8",
"imagePullPolicy": "IfNotPresent",
"name": "pause2"
}
]
}
}`)

func printJSON(data interface{}) {
bytes, _ := json.Marshal(data)
fmt.Println(string(bytes))
}

func main() {
var podData map[string]interface{}
json.Unmarshal(pod, &podData)

kindPath := "{.kind}"
kindExtracter, _ := extracter.New([]string{kindPath}, false)

kind, _ := kindExtracter.Extract(podData)
printJSON(kind)

nameImagePath := "{.spec.containers[*]['name', 'image']}"
nameImageExtracter, _ := extracter.New([]string{nameImagePath}, false)

nameImage, _ := nameImageExtracter.Extract(podData)
printJSON(nameImage)

mergeExtracter, _ := extracter.New([]string{kindPath, nameImagePath}, false)
merged, _ := mergeExtracter.Extract(podData)
printJSON(merged)
}
```

Output:

```plain
{"kind":"Pod"}
{"spec":{"containers":[{"image":"registry.k8s.io/pause:3.8","name":"pause1"},{"image":"registry.k8s.io/pause:3.8","name":"pause2"}]}}
{"kind":"Pod","spec":{"containers":[{"image":"registry.k8s.io/pause:3.8","name":"pause1"},{"image":"registry.k8s.io/pause:3.8","name":"pause2"}]}}
```

## Note

The merge behavior on the list is replacing. Therefore, if you retrieve the container name and image separately and merge them, the resulting output will not contain the image.

Code:

```go
...
namePath := "{.spec.containers[*].name}"
imagePath := "{.spec.containers[*].image}"

mergeExtracter, _ = extracter.New([]string{imagePath, namePath}, false)
merged, _ = mergeExtracter.Extract(podData)
printJSON(merged)
...
```

Output:

```plain
{"spec":{"containers":[{"name":"pause1"},{"name":"pause2"}]}}
```
96 changes: 96 additions & 0 deletions extracter/extracter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/**
* Copyright 2024 KusionStack Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package extracter

import (
"fmt"

"k8s.io/client-go/util/jsonpath"
)

type Extracter interface {
Extract(data map[string]interface{}) (map[string]interface{}, error)
}

// New creates an Extracter. For each jsonPaths, FieldPathExtracter will
// be parsed whenever possible, as it has better performance
func New(jsonPaths []string, allowMissingKeys bool) (Extracter, error) {
var extracters []Extracter

for _, p := range jsonPaths {
parser, err := Parse(p, p)
if err != nil {
return nil, fmt.Errorf("error in parsing path %q: %w", p, err)
}

rootNodes := parser.Root.Nodes
if len(rootNodes) == 0 {
extracters = append(extracters, NewNestedFieldPathExtracter(nil, allowMissingKeys))
continue
}

if len(rootNodes) == 1 {
nodes := rootNodes[0].(*jsonpath.ListNode).Nodes
fields := make([]string, 0, len(nodes))
for _, node := range nodes {
if node.Type() == jsonpath.NodeField {
fields = append(fields, node.(*jsonpath.FieldNode).Value)
}
}

if len(nodes) == len(fields) {
fp := NewNestedFieldPathExtracter(fields, allowMissingKeys)
extracters = append(extracters, fp)
continue
}
}

jp := &jsonPathExtracter{name: parser.Name, parser: parser, allowMissingKeys: allowMissingKeys}
extracters = append(extracters, jp)
}

if len(extracters) == 1 {
return extracters[0], nil
}

return &Extracters{extracters}, nil
}

// Extracters makes it easy when you want to extract multi fields and merge them.
type Extracters struct {
extracters []Extracter
}

// Extract calls all extracters in order and merges their outputs by calling mergeFields.
func (e *Extracters) Extract(data map[string]interface{}) (map[string]interface{}, error) {
var merged map[string]interface{}

for _, ex := range e.extracters {
field, err := ex.Extract(data)
if err != nil {
return nil, err
}

if merged == nil {
merged = field
} else {
merged = mergeFields(merged, field)
}
}

return merged, nil
}
105 changes: 105 additions & 0 deletions extracter/extracter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/**
* Copyright 2024 KusionStack Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package extracter

import (
"encoding/json"
"reflect"
"testing"
)

func TestNew(t *testing.T) {
type args struct {
paths []string
allowMissingKeys bool
}
tests := []struct {
name string
args args
want Extracter
wantErr bool
}{
{name: "invalid path", args: args{paths: []string{`{`}, allowMissingKeys: false}, want: nil, wantErr: true},
{name: "fieldPath extracter", args: args{paths: []string{`{}`}, allowMissingKeys: false}, want: &nestedFieldPathExtracter{}, wantErr: false},
{name: "fieldPath extracter", args: args{paths: []string{``}, allowMissingKeys: false}, want: &nestedFieldPathExtracter{}, wantErr: false},
{name: "fieldPath extracter", args: args{paths: []string{`{.metadata.labels.name}`}, allowMissingKeys: false}, want: &nestedFieldPathExtracter{}, wantErr: false},
{name: "fieldPath extracter", args: args{paths: []string{`{.metadata.labels['name']}`}, allowMissingKeys: false}, want: &nestedFieldPathExtracter{}, wantErr: false},
{name: "jsonPath extracter", args: args{paths: []string{`{.metadata.labels.name}{.metadata.labels.app}`}, allowMissingKeys: false}, want: nil, wantErr: true},
{name: "jsonPath extracter", args: args{paths: []string{`{.metadata.labels['name', 'app']}`}, allowMissingKeys: false}, want: &jsonPathExtracter{}, wantErr: false},
{name: "jsonPath extracter", args: args{paths: []string{`{.spec.containers[*].name}`}, allowMissingKeys: false}, want: &jsonPathExtracter{}, wantErr: false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := New(tt.args.paths, tt.args.allowMissingKeys)
if (err != nil) != tt.wantErr {
t.Errorf("New() error = %v, wantErr %v", err, tt.wantErr)
return
}

if reflect.TypeOf(tt.want) != reflect.TypeOf(got) {
t.Errorf("New() = %T, want %T", got, tt.want)
}
})
}
}

func TestExtracters_Extract(t *testing.T) {
containerNamePath := `{.spec.containers[*].name}`
containerImagePath := `{.spec.containers[*].image}`
kindPath := "{.kind}"
apiVersionPath := "{.apiVersion}"

type args struct {
paths []string
input map[string]interface{}
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
name: "merge name and image", args: args{paths: []string{containerImagePath, containerNamePath}, input: podData},
want: `{"spec":{"containers":[{"name":"pause1"},{"name":"pause2"}]}}`, wantErr: false,
},
{
name: "name kind apiVersion", args: args{paths: []string{containerNamePath, kindPath, apiVersionPath}, input: podData},
want: `{"apiVersion":"v1","kind":"Pod","spec":{"containers":[{"name":"pause1"},{"name":"pause2"}]}}`, wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ex, err := New(tt.args.paths, true)
if (err != nil) != tt.wantErr {
t.Errorf("Extracters_Extract() error = %v, wantErr %v", err, tt.wantErr)
return
}

got, err := ex.Extract(tt.args.input)
if (err != nil) != tt.wantErr {
t.Errorf("Extracters_Extract() error = %v, wantErr %v", err, tt.wantErr)
return
}

data, _ := json.Marshal(got)
if string(data) != tt.want {
t.Errorf("Extracters_Extract() = %v, want %v", string(data), tt.want)
}
})
}
}
71 changes: 71 additions & 0 deletions extracter/fieldpath.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/**
* Copyright 2024 KusionStack Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package extracter

import (
"fmt"
)

// NewNestedFieldPathExtracter constructs a FieldPathExtracter.
func NewNestedFieldPathExtracter(nestedField []string, allowMissingKeys bool) Extracter {
return &nestedFieldPathExtracter{nestedField: nestedField, allowMissingKeys: allowMissingKeys}
}

// nestedFieldPathExtracter is used to wrap NestedFieldNoCopy function as an Extracter.
type nestedFieldPathExtracter struct {
nestedField []string
allowMissingKeys bool
}

// Extract outputs the nestedField's value and its upstream structure.
func (n *nestedFieldPathExtracter) Extract(data map[string]interface{}) (map[string]interface{}, error) {
return NestedFieldNoCopy(data, n.allowMissingKeys, n.nestedField...)
}

// NestedFieldNoCopy is similar to JSONPath.Extract. The difference is that it
// can only operate on map and does not support list, but has better performance.
func NestedFieldNoCopy(data map[string]interface{}, allowMissingKeys bool, fields ...string) (map[string]interface{}, error) {
if len(fields) == 0 {
return nil, nil
}

result := map[string]interface{}{}
cur := result

for i, field := range fields {
if val, ok := data[field]; ok {
if i != len(fields)-1 {
if data, ok = val.(map[string]interface{}); !ok {
return nil, fmt.Errorf("%v is of the type %T, expected map[string]interface{}", val, val)
}

m := map[string]interface{}{}
cur[field] = m
cur = m
} else {
cur[field] = val
}
} else {
if allowMissingKeys {
return result, nil
}
return nil, fmt.Errorf("field %q not exist", field)
}
}

return result, nil
}
Loading
Loading