Skip to content

Commit

Permalink
opt: index accelerate && (overlaps) expressions for array inverted in…
Browse files Browse the repository at this point in the history
…dexes

Previously, we did not support index acceleration for queries involving
array overlaps (&&).

This change adds support for using the inverted index with && expressions on
Array columns. When there is an inverted index available, a scan will be done on
the Array column using the spans found from the constant value.

Fixes: cockroachdb#75477

Release note (performance improvement): Expressions using the overlaps (&&)
operator for arrays now support index-acceleration for faster execution in
some cases.

Release justification: low risk, high benefit changes to existing
functionality.
  • Loading branch information
RajivTS committed Apr 9, 2022
1 parent 5d94c57 commit a8386f0
Show file tree
Hide file tree
Showing 8 changed files with 689 additions and 37 deletions.
103 changes: 103 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -1492,3 +1492,106 @@ CREATE TABLE public.table_desc_inverted_index (
FAMILY f1 (id, last_accessed),
FAMILY f2 (testdata)
)
subtest overlaps_with_array

statement ok
DROP TABLE cb

statement ok
CREATE TABLE cb (
id INT PRIMARY KEY,
numbers INT[],
words STRING[],
primes INT[],
INVERTED INDEX n (numbers),
INVERTED INDEX w (words),
INVERTED INDEX p (primes)
)

statement ok
INSERT INTO cb VALUES
(0, ARRAY[], ARRAY[], ARRAY[]),
(1, ARRAY[0], ARRAY[NULL, ''], ARRAY[2]),
(2, ARRAY[1], ARRAY['cat'], ARRAY[3]),
(3, ARRAY[0,1], ARRAY['mouse'], ARRAY[2,3]),
(4, ARRAY[NULL], ARRAY['cat', 'mouse'], ARRAY[2]),
(5, ARRAY[0,1,2], ARRAY['cat', NULL, 'mouse'], ARRAY[2,3,5]),
(6, ARRAY[3,4,5], ARRAY['rat'], ARRAY[2,3]),
(7, ARRAY[1,2,1], ARRAY['rat', NULL, ''], ARRAY[2,3]),
(8, ARRAY[0,1,NULL], ARRAY[''], ARRAY[7]),
(9, NULL, NULL, NULL)

query T
SELECT numbers FROM cb WHERE numbers && ARRAY[]::INT[] ORDER BY numbers
----

query T
SELECT numbers FROM cb WHERE numbers && ARRAY[NULL]::INT[] ORDER BY numbers
----

query T
SELECT numbers FROM cb@n WHERE numbers && ARRAY[1,NULL]::INT[] ORDER BY numbers
----
{0,1}
{0,1,NULL}
{0,1,2}
{1}
{1,2,1}

query T
SELECT numbers FROM cb@n WHERE numbers && ARRAY[0,1,2] ORDER BY numbers
----
{0}
{0,1}
{0,1,NULL}
{0,1,2}
{1}
{1,2,1}

query T
SELECT numbers FROM cb@n WHERE numbers && ARRAY[5,4,3] ORDER BY numbers
----
{3,4,5}

query T
SELECT words FROM cb WHERE words && ARRAY[]::STRING[] ORDER BY words
----

query T
SELECT words FROM cb@w WHERE words && ARRAY['']::STRING[] ORDER BY words
----
{NULL,""}
{""}
{rat,NULL,""}

query T
SELECT words FROM cb WHERE words && ARRAY[NULL]::STRING[] ORDER BY words
----

query T
SELECT words FROM cb@w WHERE words && ARRAY['cat'] ORDER BY words
----
{cat}
{cat,NULL,mouse}
{cat,mouse}

query T
SELECT words FROM cb@w WHERE words && ARRAY[NULL, 'cat', 'mouse'] ORDER BY words
----
{cat}
{cat,NULL,mouse}
{cat,mouse}
{mouse}

query T
SELECT words FROM cb@w WHERE words && ARRAY[NULL, 'rat'] ORDER BY words
----
{rat}
{rat,NULL,""}

query T
SELECT primes FROM cb WHERE primes && numbers ORDER BY primes
----
{2,3}
{2,3}
{2,3,5}
114 changes: 104 additions & 10 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ CREATE TABLE e (
a INT PRIMARY KEY,
b INT[],
FAMILY (a,b),
INVERTED INDEX(b)
INVERTED INDEX bIdx (b)
)

statement ok
Expand Down Expand Up @@ -1049,7 +1049,7 @@ vectorized: true
└── • scan
columns: (a)
estimated row count: 111 (missing stats)
table: e@e_b_idx
table: e@bidx
spans: /1-/2

query T
Expand Down Expand Up @@ -1134,10 +1134,10 @@ vectorized: true
└── • zigzag join
columns: (a, b_inverted_key, a, b_inverted_key)
left table: e@e_b_idx
left table: e@bidx
left columns: (a, b_inverted_key)
left fixed values: 1 column
right table: e@e_b_idx
right table: e@bidx
right columns: (a, b_inverted_key)
right fixed values: 1 column

Expand All @@ -1161,10 +1161,10 @@ vectorized: true
└── • zigzag join
columns: (a, b_inverted_key, a, b_inverted_key)
left table: e@e_b_idx
left table: e@bidx
left columns: (a, b_inverted_key)
left fixed values: 1 column
right table: e@e_b_idx
right table: e@bidx
right columns: (a, b_inverted_key)
right fixed values: 1 column

Expand All @@ -1189,7 +1189,7 @@ vectorized: true
└── • scan
columns: (a)
estimated row count: 111 (missing stats)
table: e@e_b_idx
table: e@bidx
spans: /[]-/"D"

query T
Expand Down Expand Up @@ -1221,7 +1221,7 @@ vectorized: true
└── • scan
columns: (a, b_inverted_key)
estimated row count: 111 (missing stats)
table: e@e_b_idx
table: e@bidx
spans: /[]-/"D" /0-/3

query T
Expand All @@ -1244,7 +1244,7 @@ vectorized: true
└── • scan
columns: (a)
estimated row count: 111 (missing stats)
table: e@e_b_idx
table: e@bidx
spans: /[]-/"D"

query T
Expand Down Expand Up @@ -1276,7 +1276,7 @@ vectorized: true
└── • scan
columns: (a, b_inverted_key)
estimated row count: 111 (missing stats)
table: e@e_b_idx
table: e@bidx
spans: /[]-/"D" /0-/2

query T
Expand Down Expand Up @@ -1422,6 +1422,100 @@ vectorized: true
table: d@foo_inv
spans: /"f"-/"f"/PrefixEnd /[]-/{} /Arr/"f"-/Arr/"f"/PrefixEnd /Arr/{}-/Arr/{}/PrefixEnd /Arr/"a"/"b"-/Arr/"a"/"b"/PrefixEnd /Arr/"c"/{}-/Arr/"c"/{}/PrefixEnd /Arr/"c"/"d"/[]-/Arr/"c"/"d"/{} /Arr/"c"/"d"/Arr/"e"-/Arr/"c"/"d"/Arr/"e"/PrefixEnd

# Test that queries with overlaps && operator use the inverted index
# for non-empty non-null array
# TODO: Add normalization rule to convert to no-op since predicate
# is a contradiction
query T
EXPLAIN (VERBOSE) SELECT * FROM e WHERE b && ARRAY[]::INT[]
----
distribution: local
vectorized: true
·
• filter
│ columns: (a, b)
│ estimated row count: 333 (missing stats)
│ filter: b && ARRAY[]
└── • scan
columns: (a, b)
estimated row count: 1,000 (missing stats)
table: e@e_pkey
spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT * FROM e@bIdx WHERE b && ARRAY[0,1,2]
----
distribution: local
vectorized: true
·
• index join
│ columns: (a, b)
│ estimated row count: 333 (missing stats)
│ table: e@e_pkey
│ key columns: a
└── • project
│ columns: (a)
│ estimated row count: 111 (missing stats)
└── • inverted filter
│ columns: (a, b_inverted_key)
│ inverted column: b_inverted_key
│ num spans: 1
└── • scan
columns: (a, b_inverted_key)
estimated row count: 111 (missing stats)
table: e@bidx
spans: /0-/3

# TODO: Add normalization rule to convert to no-op since predicate
# is a contradiction
query T
EXPLAIN (VERBOSE) SELECT * FROM e WHERE b && ARRAY[NULL]::INT[]
----
distribution: local
vectorized: true
·
• filter
│ columns: (a, b)
│ estimated row count: 333 (missing stats)
│ filter: b && ARRAY[NULL]
└── • scan
columns: (a, b)
estimated row count: 1,000 (missing stats)
table: e@e_pkey
spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT * FROM e@bIdx WHERE b && ARRAY[0,1,NULL]
----
distribution: local
vectorized: true
·
• index join
│ columns: (a, b)
│ estimated row count: 333 (missing stats)
│ table: e@e_pkey
│ key columns: a
└── • project
│ columns: (a)
│ estimated row count: 111 (missing stats)
└── • inverted filter
│ columns: (a, b_inverted_key)
│ inverted column: b_inverted_key
│ num spans: 1
└── • scan
columns: (a, b_inverted_key)
estimated row count: 111 (missing stats)
table: e@bidx
spans: /0-/2

# Ensure that an inverted index with a composite primary key still encodes
# the primary key data in the composite value.
statement ok
Expand Down
44 changes: 44 additions & 0 deletions pkg/sql/opt/invertedidx/json_array.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,21 @@ func getInvertedExprForJSONOrArrayIndexForContainedBy(
return invertedExpr
}

// getInvertedExprForArrayIndexForOverlaps gets an inverted.Expression
// that constrains an Array index according to the given constant.
// This results in a span expression representing the union of all paths
// through the Array. This function is only used when checking if an
// indexed Array column overlaps (&&) with a constant.
func getInvertedExprForArrayIndexForOverlaps(
evalCtx *tree.EvalContext, d tree.Datum,
) inverted.Expression {
invertedExpr, err := rowenc.EncodeOverlapsInvertedIndexSpans(evalCtx, d)
if err != nil {
panic(err)
}
return invertedExpr
}

type jsonOrArrayInvertedExpr struct {
tree.ComparisonExpr

Expand Down Expand Up @@ -230,6 +245,8 @@ func NewJSONOrArrayDatumsToInvertedExpr(
invertedExpr = getInvertedExprForJSONOrArrayIndexForContainedBy(evalCtx, d)
case treecmp.Contains:
invertedExpr = getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, d)
case treecmp.Overlaps:
invertedExpr = getInvertedExprForArrayIndexForOverlaps(evalCtx, d)
default:
return nil, fmt.Errorf("%s cannot be index-accelerated", t)
}
Expand Down Expand Up @@ -346,6 +363,8 @@ func (j *jsonOrArrayFilterPlanner) extractInvertedFilterConditionFromLeaf(
if fetch, ok := t.Left.(*memo.FetchValExpr); ok {
invertedExpr = j.extractJSONFetchValEqCondition(evalCtx, fetch, t.Right)
}
case *memo.OverlapsExpr:
invertedExpr = j.extractArrayOverlapsCondition(evalCtx, t.Left, t.Right)
}

if invertedExpr == nil {
Expand All @@ -364,6 +383,31 @@ func (j *jsonOrArrayFilterPlanner) extractInvertedFilterConditionFromLeaf(
return invertedExpr, remainingFilters, nil
}

// extractArrayOverlapsCondition extracts an InvertedExpression
// representing an inverted filter over the planner's inverted index, based
// on the given left and right expression arguments. Returns an empty
// InvertedExpression if no inverted filter could be extracted.
func (j *jsonOrArrayFilterPlanner) extractArrayOverlapsCondition(
evalCtx *tree.EvalContext, left, right opt.ScalarExpr,
) inverted.Expression {
var constantVal opt.ScalarExpr
if isIndexColumn(j.tabID, j.index, left, j.computedColumns) && memo.CanExtractConstDatum(right) {
// When the first argument is a variable or expression corresponding to the
// index column and the second argument is a constant, we can generate an
// inverted expression with the constant value picked from right.
constantVal = right
} else if isIndexColumn(j.tabID, j.index, right, j.computedColumns) && memo.CanExtractConstDatum(left) {
// When the second argument is a variable or expression corresponding to
// the index column and the first argument is a constant, we can generate an
// inverted expression with the constant value picked from left.
constantVal = left
} else {
// If none of the conditions are met, we cannot create an InvertedExpression.
return inverted.NonInvertedColExpression{}
}
return getInvertedExprForArrayIndexForOverlaps(evalCtx, memo.ExtractConstDatum(constantVal))
}

// extractJSONOrArrayContainsCondition extracts an InvertedExpression
// representing an inverted filter over the planner's inverted index, based
// on the given left and right expression arguments. Returns an empty
Expand Down
Loading

0 comments on commit a8386f0

Please sign in to comment.