Skip to content

Commit

Permalink
Merge #35742
Browse files Browse the repository at this point in the history
35742: sql: improve inverted index validation performance r=lucy-zhang a=lucy-zhang

To compute the expected number of keys in an inverted index, we currently
generate every key and count the number of unique keys, since array elements
can produce duplicate keys. This PR avoids some of these allocations by doing a
simple count for every non-array JSON type. For a 25GB table of random JSON
values generated by the workload, I got a ~15% speedup on the `select
sum(crdb_internal.json_num_index_entries(v)) from json.j` query.

Release note: None

Co-authored-by: Lucy Zhang <[email protected]>
  • Loading branch information
craig[bot] and lucy-zhang committed May 13, 2019
2 parents 75c0f74 + a1fc446 commit 30dfd78
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 13 deletions.
12 changes: 12 additions & 0 deletions pkg/util/json/encoded.go
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,18 @@ func (j *jsonEncoded) encodeInvertedIndexKeys(b []byte) ([][]byte, error) {
return decoded.encodeInvertedIndexKeys(b)
}

// numInvertedIndexEntries implements the JSON interface.
func (j *jsonEncoded) numInvertedIndexEntries() (int, error) {
if j.isScalar() || j.containerLen == 0 {
return 1, nil
}
decoded, err := j.decode()
if err != nil {
return 0, err
}
return decoded.numInvertedIndexEntries()
}

func (j *jsonEncoded) allPaths() ([]JSON, error) {
decoded, err := j.decode()
if err != nil {
Expand Down
68 changes: 55 additions & 13 deletions pkg/util/json/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ type JSON interface {
// one per path through the receiver.
encodeInvertedIndexKeys(b []byte) ([][]byte, error)

numInvertedIndexEntries() (int, error)

// allPaths returns a slice of new JSON documents, each a path to a leaf
// through the receiver. Note that leaves include the empty object and array
// in addition to scalars.
Expand Down Expand Up @@ -763,23 +765,63 @@ func (j jsonObject) encodeInvertedIndexKeys(b []byte) ([][]byte, error) {
// array are encoded identically in the inverted index, the total number of
// distinct index entries may be less than the total number of paths.
func NumInvertedIndexEntries(j JSON) (int, error) {
// TODO (lucy): Figure out how to avoid allocating every path
keys, err := EncodeInvertedIndexKeys(make([]byte, 0), j)
if err != nil {
return 0, err
return j.numInvertedIndexEntries()
}

func (j jsonNull) numInvertedIndexEntries() (int, error) {
return 1, nil
}
func (jsonTrue) numInvertedIndexEntries() (int, error) {
return 1, nil
}
func (jsonFalse) numInvertedIndexEntries() (int, error) {
return 1, nil
}
func (j jsonString) numInvertedIndexEntries() (int, error) {
return 1, nil
}
func (j jsonNumber) numInvertedIndexEntries() (int, error) {
return 1, nil
}
func (j jsonArray) numInvertedIndexEntries() (int, error) {
switch len(j) {
case 0:
return 1, nil
case 1:
return j[0].numInvertedIndexEntries()
default:
keys, err := j.encodeInvertedIndexKeys(make([]byte, 0))
if err != nil {
return 0, err
}

// Count distinct keys
sort.Slice(keys, func(i int, j int) bool {
return bytes.Compare(keys[i], keys[j]) < 0
})
n := 0
for i := 0; i < len(keys); i++ {
if i == 0 || bytes.Compare(keys[i-1], keys[i]) < 0 {
n++
}
}
return n, nil
}
}

// Count distinct keys
sort.Slice(keys, func(i int, j int) bool {
return bytes.Compare(keys[i], keys[j]) < 0
})
n := 0
for i := 0; i < len(keys); i++ {
if i == 0 || bytes.Compare(keys[i-1], keys[i]) < 0 {
n++
func (j jsonObject) numInvertedIndexEntries() (int, error) {
if len(j) == 0 {
return 1, nil
}
count := 0
for _, kv := range j {
n, err := kv.v.numInvertedIndexEntries()
if err != nil {
return 0, err
}
count += n
}
return n, nil
return count, nil
}

// AllPaths returns a slice of new JSON documents, each a path to a leaf
Expand Down
9 changes: 9 additions & 0 deletions pkg/util/json/json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1302,6 +1302,7 @@ func TestNumInvertedIndexEntries(t *testing.T) {
{`[[[]]]`, 1},
{`[[{}]]`, 1},
{`[{}, []]`, 2},
{`[1]`, 1},
{`[1, 2]`, 2},
{`[1, [1]]`, 2},
{`[1, 2, 1, 2]`, 2},
Expand Down Expand Up @@ -1923,6 +1924,14 @@ func BenchmarkFetchKey(b *testing.B) {
}
}

func BenchmarkJSONNumInvertedIndexEntries(b *testing.B) {
j := jsonTestShorthand(sampleJSON)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = NumInvertedIndexEntries(j)
}
}

func TestJSONRemovePath(t *testing.T) {
queryTests := map[string][]struct {
path []string
Expand Down

0 comments on commit 30dfd78

Please sign in to comment.