Skip to content

Commit

Permalink
expression: JSON_SCHEMA_VALID() (#52780)
Browse files Browse the repository at this point in the history
close #52779
  • Loading branch information
dveeden authored Jun 3, 2024
1 parent 2069651 commit fc3132c
Show file tree
Hide file tree
Showing 16 changed files with 324 additions and 7 deletions.
26 changes: 26 additions & 0 deletions DEPS.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -6266,6 +6266,32 @@ def go_deps():
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/prometheus/prometheus/com_github_prometheus_prometheus-v0.50.1.zip",
],
)
go_repository(
name = "com_github_qri_io_jsonpointer",
build_file_proto_mode = "disable_global",
importpath = "github.com/qri-io/jsonpointer",
sha256 = "6870d4b9fc5ac8efb9226447975fecfb07241133e23c7e661f5aac1a3088f338",
strip_prefix = "github.com/qri-io/[email protected]",
urls = [
"http://bazel-cache.pingcap.net:8080/gomod/github.com/qri-io/jsonpointer/com_github_qri_io_jsonpointer-v0.1.1.zip",
"http://ats.apps.svc/gomod/github.com/qri-io/jsonpointer/com_github_qri_io_jsonpointer-v0.1.1.zip",
"https://cache.hawkingrei.com/gomod/github.com/qri-io/jsonpointer/com_github_qri_io_jsonpointer-v0.1.1.zip",
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/qri-io/jsonpointer/com_github_qri_io_jsonpointer-v0.1.1.zip",
],
)
go_repository(
name = "com_github_qri_io_jsonschema",
build_file_proto_mode = "disable_global",
importpath = "github.com/qri-io/jsonschema",
sha256 = "51305cc45fd383b24de94e2eb421ffba8d83679520c18348842c4255025c5940",
strip_prefix = "github.com/qri-io/[email protected]",
urls = [
"http://bazel-cache.pingcap.net:8080/gomod/github.com/qri-io/jsonschema/com_github_qri_io_jsonschema-v0.2.1.zip",
"http://ats.apps.svc/gomod/github.com/qri-io/jsonschema/com_github_qri_io_jsonschema-v0.2.1.zip",
"https://cache.hawkingrei.com/gomod/github.com/qri-io/jsonschema/com_github_qri_io_jsonschema-v0.2.1.zip",
"https://storage.googleapis.com/pingcapmirror/gomod/github.com/qri-io/jsonschema/com_github_qri_io_jsonschema-v0.2.1.zip",
],
)
go_repository(
name = "com_github_quasilyte_go_ruleguard",
build_file_proto_mode = "disable_global",
Expand Down
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ require (
github.com/prometheus/client_model v0.6.1
github.com/prometheus/common v0.53.0
github.com/prometheus/prometheus v0.50.1
github.com/qri-io/jsonschema v0.2.1
github.com/robfig/cron/v3 v3.0.1
github.com/sasha-s/go-deadlock v0.3.1
github.com/shirou/gopsutil/v3 v3.24.4
Expand Down Expand Up @@ -157,6 +158,7 @@ require (
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
github.com/pierrec/lz4/v4 v4.1.15 // indirect
github.com/qri-io/jsonpointer v0.1.1 // indirect
github.com/zeebo/xxh3 v1.0.2 // indirect
)

Expand Down Expand Up @@ -307,7 +309,7 @@ require (
google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda // indirect
google.golang.org/protobuf v1.33.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apimachinery v0.28.6 // indirect
k8s.io/klog/v2 v2.120.1 // indirect
Expand Down
7 changes: 7 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,10 @@ github.com/prometheus/procfs v0.13.0 h1:GqzLlQyfsPbaEHaQkO7tbDlriv/4o5Hudv6OXHGK
github.com/prometheus/procfs v0.13.0/go.mod h1:cd4PFCR54QLnGKPaKGA6l+cfuNXtht43ZKY6tow0Y1g=
github.com/prometheus/prometheus v0.50.1 h1:N2L+DYrxqPh4WZStU+o1p/gQlBaqFbcLBTjlp3vpdXw=
github.com/prometheus/prometheus v0.50.1/go.mod h1:FvE8dtQ1Ww63IlyKBn1V4s+zMwF9kHkVNkQBR1pM4CU=
github.com/qri-io/jsonpointer v0.1.1 h1:prVZBZLL6TW5vsSB9fFHFAMBLI4b0ri5vribQlTJiBA=
github.com/qri-io/jsonpointer v0.1.1/go.mod h1:DnJPaYgiKu56EuDp8TU5wFLdZIcAnb/uH9v37ZaMV64=
github.com/qri-io/jsonschema v0.2.1 h1:NNFoKms+kut6ABPf6xiKNM5214jzxAhDBrPHCJ97Wg0=
github.com/qri-io/jsonschema v0.2.1/go.mod h1:g7DPkiOsK1xv6T/Ao5scXRkd+yTFygcANPBaaqW+VrI=
github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:N/ElC8H3+5XpJzTSTfLsJV/mx9Q9g7kxmchpfZyxgzM=
github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
Expand Down Expand Up @@ -767,6 +771,9 @@ github.com/sasha-s/go-deadlock v0.3.1 h1:sqv7fDNShgjcaxkO0JNcOAlr8B9+cV5Ey/OB71e
github.com/sasha-s/go-deadlock v0.3.1/go.mod h1:F73l+cr82YSh10GxyRI6qZiCgK64VaZjwesgfQ1/iLM=
github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys=
github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500 h1:WnNuhiq+FOY3jNj6JXFT+eLN3CQ/oPIsDPRanvwsmbI=
github.com/shabbyrobe/gocovmerge v0.0.0-20190829150210-3e036491d500/go.mod h1:+njLrG5wSeoG4Ds61rFgEzKvenR2UHbjMoDHsczxly0=
github.com/shirou/gopsutil/v3 v3.21.12/go.mod h1:BToYZVTlSVlfazpDDYFnsVZLaoRG+g8ufT6fPQLdJzA=
Expand Down
1 change: 1 addition & 0 deletions pkg/executor/reload_expr_pushdown_blacklist.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ var funcName2Alias = map[string]string{
"json_merge_preserve": ast.JSONMergePreserve,
"json_pretty": ast.JSONPretty,
"json_quote": ast.JSONQuote,
"json_schema_valid": ast.JSONSchemaValid,
"json_search": ast.JSONSearch,
"json_storage_size": ast.JSONStorageSize,
"json_depth": ast.JSONDepth,
Expand Down
1 change: 1 addition & 0 deletions pkg/expression/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ go_library(
"@com_github_pingcap_errors//:errors",
"@com_github_pingcap_failpoint//:failpoint",
"@com_github_pingcap_tipb//go-tipb",
"@com_github_qri_io_jsonschema//:jsonschema",
"@com_github_tikv_client_go_v2//oracle",
"@org_uber_go_atomic//:atomic",
"@org_uber_go_zap//:zap",
Expand Down
1 change: 1 addition & 0 deletions pkg/expression/builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,7 @@ var funcs = map[string]functionClass{
ast.JSONMergePreserve: &jsonMergePreserveFunctionClass{baseFunctionClass{ast.JSONMergePreserve, 2, -1}},
ast.JSONPretty: &jsonPrettyFunctionClass{baseFunctionClass{ast.JSONPretty, 1, 1}},
ast.JSONQuote: &jsonQuoteFunctionClass{baseFunctionClass{ast.JSONQuote, 1, 1}},
ast.JSONSchemaValid: &jsonSchemaValidFunctionClass{baseFunctionClass{ast.JSONSchemaValid, 2, 2}},
ast.JSONSearch: &jsonSearchFunctionClass{baseFunctionClass{ast.JSONSearch, 3, -1}},
ast.JSONStorageFree: &jsonStorageFreeFunctionClass{baseFunctionClass{ast.JSONStorageFree, 1, 1}},
ast.JSONStorageSize: &jsonStorageSizeFunctionClass{baseFunctionClass{ast.JSONStorageSize, 1, 1}},
Expand Down
96 changes: 96 additions & 0 deletions pkg/expression/builtin_json.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,21 @@ package expression

import (
"bytes"
"context"
goJSON "encoding/json"
"strconv"
"strings"

"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/charset"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/chunk"
"github.com/pingcap/tidb/pkg/util/hack"
"github.com/pingcap/tipb/go-tipb"
"github.com/qri-io/jsonschema"
)

var (
Expand All @@ -53,6 +56,7 @@ var (
_ functionClass = &jsonMergePreserveFunctionClass{}
_ functionClass = &jsonPrettyFunctionClass{}
_ functionClass = &jsonQuoteFunctionClass{}
_ functionClass = &jsonSchemaValidFunctionClass{}
_ functionClass = &jsonSearchFunctionClass{}
_ functionClass = &jsonStorageSizeFunctionClass{}
_ functionClass = &jsonDepthFunctionClass{}
Expand All @@ -77,6 +81,7 @@ var (
_ builtinFunc = &builtinJSONOverlapsSig{}
_ builtinFunc = &builtinJSONStorageSizeSig{}
_ builtinFunc = &builtinJSONDepthSig{}
_ builtinFunc = &builtinJSONSchemaValidSig{}
_ builtinFunc = &builtinJSONSearchSig{}
_ builtinFunc = &builtinJSONKeysSig{}
_ builtinFunc = &builtinJSONKeys2ArgsSig{}
Expand Down Expand Up @@ -1796,3 +1801,94 @@ func (b *builtinJSONLengthSig) evalInt(ctx EvalContext, row chunk.Row) (res int6
}
return int64(obj.GetElemCount()), false, nil
}

type jsonSchemaValidFunctionClass struct {
baseFunctionClass
}

func (c *jsonSchemaValidFunctionClass) getFunction(ctx BuildContext, args []Expression) (builtinFunc, error) {
if err := c.verifyArgs(args); err != nil {
return nil, err
}
bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETInt, types.ETJson, types.ETJson)
if err != nil {
return nil, err
}

sig := &builtinJSONSchemaValidSig{baseBuiltinFunc: bf}
return sig, nil
}

type builtinJSONSchemaValidSig struct {
baseBuiltinFunc

schemaCache builtinFuncCache[jsonschema.Schema]
}

func (b *builtinJSONSchemaValidSig) Clone() builtinFunc {
newSig := &builtinJSONSchemaValidSig{}
newSig.cloneFrom(&b.baseBuiltinFunc)
return newSig
}

func (b *builtinJSONSchemaValidSig) evalInt(ctx EvalContext, row chunk.Row) (res int64, isNull bool, err error) {
var schema jsonschema.Schema

// First argument is the schema
schemaData, schemaIsNull, err := b.args[0].EvalJSON(ctx, row)
if err != nil {
return res, false, err
}
if schemaIsNull {
return res, true, err
}

if b.args[0].ConstLevel() >= ConstOnlyInContext {
schema, err = b.schemaCache.getOrInitCache(ctx, func() (jsonschema.Schema, error) {
failpoint.Inject("jsonSchemaValidDisableCacheRefresh", func() {
failpoint.Return(jsonschema.Schema{}, errors.New("Cache refresh disabled by failpoint"))
})
dataBin, err := schemaData.MarshalJSON()
if err != nil {
return jsonschema.Schema{}, err
}
if err := goJSON.Unmarshal(dataBin, &schema); err != nil {
return jsonschema.Schema{}, err
}
return schema, nil
})
if err != nil {
return res, false, err
}
} else {
dataBin, err := schemaData.MarshalJSON()
if err != nil {
return res, false, err
}
if err := goJSON.Unmarshal(dataBin, &schema); err != nil {
return res, false, err
}
}

// Second argument is the JSON document
docData, docIsNull, err := b.args[1].EvalJSON(ctx, row)
if err != nil {
return res, false, err
}
if docIsNull {
return res, true, err
}
docDataBin, err := docData.MarshalJSON()
if err != nil {
return res, false, err
}
errs, err := schema.ValidateBytes(context.Background(), docDataBin)
if err != nil {
return res, false, err
}
if len(errs) > 0 {
return res, false, nil
}
res = 1
return res, false, nil
}
98 changes: 98 additions & 0 deletions pkg/expression/builtin_json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"fmt"
"testing"

"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/parser/terror"
Expand Down Expand Up @@ -1342,3 +1343,100 @@ func TestJSONMergePatch(t *testing.T) {
}
}
}

func TestJSONSchemaValid(t *testing.T) {
ctx := createContext(t)
fc := funcs[ast.JSONSchemaValid]
tbl := []struct {
Input any
Expected any
}{
// nulls
{[]any{nil, `{}`}, nil},
{[]any{`{}`, nil}, nil},
{[]any{nil, nil}, nil},

// empty
{[]any{`{}`, `{}`}, 1},

// required
{[]any{`{"required": ["a","b"]}`, `{"a": 5}`}, 0},
{[]any{`{"required": ["a","b"]}`, `{"a": 5, "b": 6}`}, 1},

// type
{[]any{`{"type": ["string"]}`, `{}`}, 0},
{[]any{`{"type": ["string"]}`, `"foobar"`}, 1},
{[]any{`{"type": ["object"]}`, `{}`}, 1},
{[]any{`{"type": ["object"]}`, `"foobar"`}, 0},

// properties, type
{[]any{`{"properties": {"a": {"type": "number"}}}`, `{}`}, 1},
{[]any{`{"properties": {"a": {"type": "number"}}}`, `{"a": "foobar"}`}, 0},
{[]any{`{"properties": {"a": {"type": "number"}}}`, `{"a": 5}`}, 1},

// properties, minimum
{[]any{`{"properties": {"a": {"type": "number", "minimum": 6}}}`, `{"a": 5}`}, 0},

// properties, pattern
{[]any{`{"properties": {"a": {"type": "string", "pattern": "^a"}}}`, `{"a": "abc"}`}, 1},
{[]any{`{"properties": {"a": {"type": "string", "pattern": "^a"}}}`, `{"a": "cba"}`}, 0},
}
dtbl := tblToDtbl(tbl)
for _, tt := range dtbl {
f, err := fc.getFunction(ctx, datumsToConstants(tt["Input"]))
require.NoError(t, err)
d, err := evalBuiltinFunc(f, ctx, chunk.Row{})
require.NoError(t, err)
if tt["Expected"][0].IsNull() {
require.True(t, d.IsNull())
} else {
testutil.DatumEqual(
t, tt["Expected"][0], d,
fmt.Sprintf("JSON_SCHEMA_VALID(%s,%s) = %d (expected: %d)",
tt["Input"][0].GetString(),
tt["Input"][1].GetString(),
d.GetInt64(),
tt["Expected"][0].GetInt64(),
),
)
}
}
}

// TestJSONSchemaValidCache is to test if the cached schema is used
func TestJSONSchemaValidCache(t *testing.T) {
ctx := createContext(t)
fc := funcs[ast.JSONSchemaValid]
tbl := []struct {
Input any
Expected any
}{
{[]any{`{}`, `{}`}, 1},
}
dtbl := tblToDtbl(tbl)

for _, tt := range dtbl {
// Get the function and eval once, ensuring it is cached
f, err := fc.getFunction(ctx, datumsToConstants(tt["Input"]))
require.NoError(t, err)
_, err = evalBuiltinFunc(f, ctx, chunk.Row{})
require.NoError(t, err)

// Disable the cache function
require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/expression/jsonSchemaValidDisableCacheRefresh", `return(true)`))

// This eval should use the cache and not call the function.
_, err = evalBuiltinFunc(f, ctx, chunk.Row{})
require.NoError(t, err)

// Now get a new cache by getting the function again.
f, err = fc.getFunction(ctx, datumsToConstants(tt["Input"]))
require.NoError(t, err)

// Empty cache, we call the function. This should return an error.
_, err = evalBuiltinFunc(f, ctx, chunk.Row{})
require.Error(t, err)
}

require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/expression/jsonSchemaValidDisableCacheRefresh"))
}
1 change: 1 addition & 0 deletions pkg/expression/function_traits.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ var booleanFunctions = map[string]struct{}{
ast.IsIPv4Compat: {},
ast.IsIPv4Mapped: {},
ast.IsIPv6: {},
ast.JSONSchemaValid: {},
ast.JSONValid: {},
ast.RegexpLike: {},
}
1 change: 1 addition & 0 deletions pkg/parser/ast/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ const (
JSONMergePreserve = "json_merge_preserve"
JSONPretty = "json_pretty"
JSONQuote = "json_quote"
JSONSchemaValid = "json_schema_valid"
JSONSearch = "json_search"
JSONStorageFree = "json_storage_free"
JSONStorageSize = "json_storage_size"
Expand Down
Loading

0 comments on commit fc3132c

Please sign in to comment.