Skip to content

Commit

Permalink
expression: pushdown substring/char_length to tikv (#30191)
Browse files Browse the repository at this point in the history
close #31125
  • Loading branch information
guo-shaoge authored Dec 30, 2021
1 parent 9b9a300 commit 7f85091
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 47 deletions.
10 changes: 5 additions & 5 deletions cmd/explaintest/r/index_merge.result
Original file line number Diff line number Diff line change
Expand Up @@ -722,11 +722,11 @@ c1 c2 c3 c4 c5
explain select /*+ use_index_merge(t1) */ * from t1 where (c1 < 10 or c2 < 10) and substring(c3, 1, 1) = '1' order by 1;
id estRows task access object operator info
Sort_5 4433.77 root test.t1.c1
└─Selection_12 4433.77 root eq(substring(cast(test.t1.c3, var_string(20)), 1, 1), "1")
└─IndexMerge_11 5542.21 root
├─IndexRangeScan_8(Build) 3323.33 cop[tikv] table:t1, index:c1(c1) range:[-inf,10), keep order:false, stats:pseudo
├─IndexRangeScan_9(Build) 3323.33 cop[tikv] table:t1, index:c2(c2) range:[-inf,10), keep order:false, stats:pseudo
└─TableRowIDScan_10(Probe) 5542.21 cop[tikv] table:t1 keep order:false, stats:pseudo
└─IndexMerge_12 4433.77 root
├─IndexRangeScan_8(Build) 3323.33 cop[tikv] table:t1, index:c1(c1) range:[-inf,10), keep order:false, stats:pseudo
├─IndexRangeScan_9(Build) 3323.33 cop[tikv] table:t1, index:c2(c2) range:[-inf,10), keep order:false, stats:pseudo
└─Selection_11(Probe) 4433.77 cop[tikv] eq(substring(cast(test.t1.c3, var_string(20)), 1, 1), "1")
└─TableRowIDScan_10 5542.21 cop[tikv] table:t1 keep order:false, stats:pseudo
select /*+ use_index_merge(t1) */ * from t1 where (c1 < 10 or c2 < 10) and substring(c3, 1, 1) = '1' order by 1;
c1 c2 c3 c4 c5
1 1 1 1 1
Expand Down
15 changes: 7 additions & 8 deletions cmd/explaintest/r/tpch.result
Original file line number Diff line number Diff line change
Expand Up @@ -1294,14 +1294,13 @@ cntrycode
order by
cntrycode;
id estRows task access object operator info
Sort 1.00 root Column#27
└─Projection 1.00 root Column#27, Column#28, Column#29
└─HashAgg 1.00 root group by:Column#33, funcs:count(1)->Column#28, funcs:sum(Column#31)->Column#29, funcs:firstrow(Column#32)->Column#27
└─Projection 0.00 root tpch.customer.c_acctbal, substring(tpch.customer.c_phone, 1, 2)->Column#32, substring(tpch.customer.c_phone, 1, 2)->Column#33
Sort 1.00 root Column#31
└─Projection 1.00 root Column#31, Column#32, Column#33
└─HashAgg 1.00 root group by:Column#37, funcs:count(1)->Column#32, funcs:sum(Column#35)->Column#33, funcs:firstrow(Column#36)->Column#31
└─Projection 0.00 root tpch.customer.c_acctbal, substring(tpch.customer.c_phone, 1, 2)->Column#36, substring(tpch.customer.c_phone, 1, 2)->Column#37
└─HashJoin 0.00 root anti semi join, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)]
├─TableReader(Build) 75000000.00 root data:TableFullScan
│ └─TableFullScan 75000000.00 cop[tikv] table:orders keep order:false
└─Selection(Probe) 0.00 root in(substring(tpch.customer.c_phone, 1, 2), "20", "40", "22", "30", "39", "42", "21")
└─TableReader 0.00 root data:Selection
└─Selection 0.00 cop[tikv] gt(tpch.customer.c_acctbal, NULL)
└─TableFullScan 7500000.00 cop[tikv] table:customer keep order:false
└─TableReader(Probe) 0.00 root data:Selection
└─Selection 0.00 cop[tikv] gt(tpch.customer.c_acctbal, NULL), in(substring(tpch.customer.c_phone, 1, 2), "20", "40", "22", "30", "39", "42", "21")
└─TableFullScan 7500000.00 cop[tikv] table:customer keep order:false
23 changes: 22 additions & 1 deletion expression/expr_to_pb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1078,14 +1078,15 @@ func TestExprPushDownToTiKV(t *testing.T) {
exprs := make([]Expression, 0)

//jsonColumn := genColumn(mysql.TypeJSON, 1)
//intColumn := genColumn(mysql.TypeLonglong, 2)
intColumn := genColumn(mysql.TypeLonglong, 2)
//realColumn := genColumn(mysql.TypeDouble, 3)
//decimalColumn := genColumn(mysql.TypeNewDecimal, 4)
stringColumn := genColumn(mysql.TypeString, 5)
//datetimeColumn := genColumn(mysql.TypeDatetime, 6)
binaryStringColumn := genColumn(mysql.TypeString, 7)
binaryStringColumn.RetType.Collate = charset.CollationBin

// Test exprs that cannot be pushed.
function, err := NewFunction(mock.NewContext(), ast.InetAton, types.NewFieldType(mysql.TypeString), stringColumn)
require.NoError(t, err)
exprs = append(exprs, function)
Expand Down Expand Up @@ -1121,6 +1122,26 @@ func TestExprPushDownToTiKV(t *testing.T) {
pushed, remained := PushDownExprs(sc, exprs, client, kv.TiKV)
require.Len(t, pushed, 0)
require.Len(t, remained, len(exprs))

// Test exprs that can be pushed.
exprs = exprs[:0]
pushed = pushed[:0]
remained = remained[:0]

substringRelated := []string{ast.Substr, ast.Substring, ast.Mid}
for _, exprName := range substringRelated {
function, err = NewFunction(mock.NewContext(), exprName, types.NewFieldType(mysql.TypeString), stringColumn, intColumn, intColumn)
require.NoError(t, err)
exprs = append(exprs, function)
}

function, err = NewFunction(mock.NewContext(), ast.CharLength, types.NewFieldType(mysql.TypeString), stringColumn)
require.NoError(t, err)
exprs = append(exprs, function)

pushed, remained = PushDownExprs(sc, exprs, client, kv.TiKV)
require.Len(t, pushed, len(exprs))
require.Len(t, remained, 0)
}

func TestExprOnlyPushDownToTiKV(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion expression/expression.go
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,7 @@ func scalarExprSupportedByTiKV(sf *ScalarFunction) bool {
// string functions.
ast.Length, ast.BitLength, ast.Concat, ast.ConcatWS /*ast.Locate,*/, ast.Replace, ast.ASCII, ast.Hex,
ast.Reverse, ast.LTrim, ast.RTrim /*ast.Left,*/, ast.Strcmp, ast.Space, ast.Elt, ast.Field,
InternalFuncFromBinary, InternalFuncToBinary,
InternalFuncFromBinary, InternalFuncToBinary, ast.Mid, ast.Substring, ast.Substr, ast.CharLength,

// json functions.
ast.JSONType, ast.JSONExtract, ast.JSONObject, ast.JSONArray, ast.JSONMerge, ast.JSONSet,
Expand Down
32 changes: 16 additions & 16 deletions expression/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3472,36 +3472,36 @@ func TestExprPushdown(t *testing.T) {
"(4,'511111','611',7,8,9),(5,'611111','711',8,9,10)")

// case 1, index scan without double read, some filters can not be pushed to cop task
rows := tk.MustQuery("explain format = 'brief' select col2, col1 from t use index(key1) where col2 like '5%' and substr(col1, 1, 1) = '4'").Rows()
rows := tk.MustQuery("explain format = 'brief' select col2, col1 from t use index(key1) where col2 like '5%' and from_base64(to_base64(substr(col1, 1, 1))) = '4'").Rows()
require.Equal(t, "root", fmt.Sprintf("%v", rows[1][2]))
require.Equal(t, "eq(substr(test.t.col1, 1, 1), \"4\")", fmt.Sprintf("%v", rows[1][4]))
require.Equal(t, "eq(from_base64(to_base64(substr(test.t.col1, 1, 1))), \"4\")", fmt.Sprintf("%v", rows[1][4]))
require.Equal(t, "cop[tikv]", fmt.Sprintf("%v", rows[3][2]))
require.Equal(t, "like(test.t.col2, \"5%\", 92)", fmt.Sprintf("%v", rows[3][4]))
tk.MustQuery("select col2, col1 from t use index(key1) where col2 like '5%' and substr(col1, 1, 1) = '4'").Check(testkit.Rows("511 411111"))
tk.MustQuery("select count(col2) from t use index(key1) where col2 like '5%' and substr(col1, 1, 1) = '4'").Check(testkit.Rows("1"))
tk.MustQuery("select col2, col1 from t use index(key1) where col2 like '5%' and from_base64(to_base64(substr(col1, 1, 1))) = '4'").Check(testkit.Rows("511 411111"))
tk.MustQuery("select count(col2) from t use index(key1) where col2 like '5%' and from_base64(to_base64(substr(col1, 1, 1))) = '4'").Check(testkit.Rows("1"))

// case 2, index scan without double read, none of the filters can be pushed to cop task
rows = tk.MustQuery("explain format = 'brief' select col1, col2 from t use index(key2) where substr(col2, 1, 1) = '5' and substr(col1, 1, 1) = '4'").Rows()
rows = tk.MustQuery("explain format = 'brief' select col1, col2 from t use index(key2) where from_base64(to_base64(substr(col2, 1, 1))) = '5' and from_base64(to_base64(substr(col1, 1, 1))) = '4'").Rows()
require.Equal(t, "root", fmt.Sprintf("%v", rows[0][2]))
require.Equal(t, "eq(substr(test.t.col1, 1, 1), \"4\"), eq(substr(test.t.col2, 1, 1), \"5\")", fmt.Sprintf("%v", rows[0][4]))
tk.MustQuery("select col1, col2 from t use index(key2) where substr(col2, 1, 1) = '5' and substr(col1, 1, 1) = '4'").Check(testkit.Rows("411111 511"))
tk.MustQuery("select count(col1) from t use index(key2) where substr(col2, 1, 1) = '5' and substr(col1, 1, 1) = '4'").Check(testkit.Rows("1"))
require.Equal(t, "eq(from_base64(to_base64(substr(test.t.col1, 1, 1))), \"4\"), eq(from_base64(to_base64(substr(test.t.col2, 1, 1))), \"5\")", fmt.Sprintf("%v", rows[0][4]))
tk.MustQuery("select col1, col2 from t use index(key2) where from_base64(to_base64(substr(col2, 1, 1))) = '5' and from_base64(to_base64(substr(col1, 1, 1))) = '4'").Check(testkit.Rows("411111 511"))
tk.MustQuery("select count(col1) from t use index(key2) where from_base64(to_base64(substr(col2, 1, 1))) = '5' and from_base64(to_base64(substr(col1, 1, 1))) = '4'").Check(testkit.Rows("1"))

// case 3, index scan with double read, some filters can not be pushed to cop task
rows = tk.MustQuery("explain format = 'brief' select id from t use index(key1) where col2 like '5%' and substr(col1, 1, 1) = '4'").Rows()
rows = tk.MustQuery("explain format = 'brief' select id from t use index(key1) where col2 like '5%' and from_base64(to_base64(substr(col1, 1, 1))) = '4'").Rows()
require.Equal(t, "root", fmt.Sprintf("%v", rows[1][2]))
require.Equal(t, "eq(substr(test.t.col1, 1, 1), \"4\")", fmt.Sprintf("%v", rows[1][4]))
require.Equal(t, "eq(from_base64(to_base64(substr(test.t.col1, 1, 1))), \"4\")", fmt.Sprintf("%v", rows[1][4]))
require.Equal(t, "cop[tikv]", fmt.Sprintf("%v", rows[3][2]))
require.Equal(t, "like(test.t.col2, \"5%\", 92)", fmt.Sprintf("%v", rows[3][4]))
tk.MustQuery("select id from t use index(key1) where col2 like '5%' and substr(col1, 1, 1) = '4'").Check(testkit.Rows("3"))
tk.MustQuery("select count(id) from t use index(key1) where col2 like '5%' and substr(col1, 1, 1) = '4'").Check(testkit.Rows("1"))
tk.MustQuery("select id from t use index(key1) where col2 like '5%' and from_base64(to_base64(substr(col1, 1, 1))) = '4'").Check(testkit.Rows("3"))
tk.MustQuery("select count(id) from t use index(key1) where col2 like '5%' and from_base64(to_base64(substr(col1, 1, 1))) = '4'").Check(testkit.Rows("1"))

// case 4, index scan with double read, none of the filters can be pushed to cop task
rows = tk.MustQuery("explain format = 'brief' select id from t use index(key2) where substr(col2, 1, 1) = '5' and substr(col1, 1, 1) = '4'").Rows()
rows = tk.MustQuery("explain format = 'brief' select id from t use index(key2) where from_base64(to_base64(substr(col2, 1, 1))) = '5' and from_base64(to_base64(substr(col1, 1, 1))) = '4'").Rows()
require.Equal(t, "root", fmt.Sprintf("%v", rows[1][2]))
require.Equal(t, "eq(substr(test.t.col1, 1, 1), \"4\"), eq(substr(test.t.col2, 1, 1), \"5\")", fmt.Sprintf("%v", rows[1][4]))
tk.MustQuery("select id from t use index(key2) where substr(col2, 1, 1) = '5' and substr(col1, 1, 1) = '4'").Check(testkit.Rows("3"))
tk.MustQuery("select count(id) from t use index(key2) where substr(col2, 1, 1) = '5' and substr(col1, 1, 1) = '4'").Check(testkit.Rows("1"))
require.Equal(t, "eq(from_base64(to_base64(substr(test.t.col1, 1, 1))), \"4\"), eq(from_base64(to_base64(substr(test.t.col2, 1, 1))), \"5\")", fmt.Sprintf("%v", rows[1][4]))
tk.MustQuery("select id from t use index(key2) where from_base64(to_base64(substr(col2, 1, 1))) = '5' and from_base64(to_base64(substr(col1, 1, 1))) = '4'").Check(testkit.Rows("3"))
tk.MustQuery("select count(id) from t use index(key2) where from_base64(to_base64(substr(col2, 1, 1))) = '5' and from_base64(to_base64(substr(col1, 1, 1))) = '4'").Check(testkit.Rows("1"))
}
func TestIssue16973(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
Expand Down
30 changes: 14 additions & 16 deletions planner/core/testdata/integration_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -2155,14 +2155,13 @@
" ├─TableReader(Build) 10000.00 root data:TableFullScan",
" │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
" └─StreamAgg(Probe) 1.00 root funcs:min(test.t1.c1)->Column#8, funcs:sum(0)->Column#9, funcs:count(1)->Column#10",
" └─Selection 0.01 root substring(cast(test.t1.c3, var_string(20)), 10)",
" └─IndexMerge 0.01 root ",
" ├─Selection(Build) 1.00 cop[tikv] eq(10, test.t2.c3)",
" │ └─TableRangeScan 1.00 cop[tikv] table:t1 range:[10,10], keep order:false, stats:pseudo",
" ├─Selection(Build) 8.00 cop[tikv] eq(1, test.t2.c3)",
" │ └─IndexRangeScan 10.00 cop[tikv] table:t1, index:c2(c2) range:[1,1], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.01 cop[tikv] or(and(eq(test.t1.c1, 10), eq(10, test.t2.c3)), and(eq(test.t1.c2, 1), eq(1, test.t2.c3)))",
" └─TableRowIDScan 9.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
" └─IndexMerge 0.01 root ",
" ├─Selection(Build) 1.00 cop[tikv] eq(10, test.t2.c3)",
" │ └─TableRangeScan 1.00 cop[tikv] table:t1 range:[10,10], keep order:false, stats:pseudo",
" ├─Selection(Build) 8.00 cop[tikv] eq(1, test.t2.c3)",
" │ └─IndexRangeScan 10.00 cop[tikv] table:t1, index:c2(c2) range:[1,1], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.01 cop[tikv] or(and(eq(test.t1.c1, 10), eq(10, test.t2.c3)), and(eq(test.t1.c2, 1), eq(1, test.t2.c3))), substring(cast(test.t1.c3, var_string(20)), 10)",
" └─TableRowIDScan 9.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Res": [
"1 1 1",
Expand Down Expand Up @@ -2199,14 +2198,13 @@
" ├─TableReader(Build) 10000.00 root data:TableFullScan",
" │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
" └─StreamAgg(Probe) 1.00 root funcs:min(test.t1.c1)->Column#8, funcs:sum(0)->Column#9, funcs:count(1)->Column#10",
" └─Selection 3.03 root substring(cast(test.t1.c3, var_string(20)), 10)",
" └─IndexMerge 3.78 root ",
" ├─Selection(Build) 3.33 cop[tikv] eq(test.t1.c1, test.t2.c3)",
" │ └─TableRangeScan 3333.33 cop[tikv] table:t1 range:[10,+inf], keep order:false, stats:pseudo",
" ├─Selection(Build) 8.00 cop[tikv] eq(1, test.t2.c3)",
" │ └─IndexRangeScan 10.00 cop[tikv] table:t1, index:c2(c2) range:[1,1], keep order:false, stats:pseudo",
" └─Selection(Probe) 3.78 cop[tikv] or(and(ge(test.t1.c1, 10), eq(test.t1.c1, test.t2.c3)), and(eq(test.t1.c2, 1), eq(1, test.t2.c3)))",
" └─TableRowIDScan 3338.67 cop[tikv] table:t1 keep order:false, stats:pseudo"
" └─IndexMerge 3.03 root ",
" ├─Selection(Build) 3.33 cop[tikv] eq(test.t1.c1, test.t2.c3)",
" │ └─TableRangeScan 3333.33 cop[tikv] table:t1 range:[10,+inf], keep order:false, stats:pseudo",
" ├─Selection(Build) 8.00 cop[tikv] eq(1, test.t2.c3)",
" │ └─IndexRangeScan 10.00 cop[tikv] table:t1, index:c2(c2) range:[1,1], keep order:false, stats:pseudo",
" └─Selection(Probe) 3.03 cop[tikv] or(and(ge(test.t1.c1, 10), eq(test.t1.c1, test.t2.c3)), and(eq(test.t1.c2, 1), eq(1, test.t2.c3))), substring(cast(test.t1.c3, var_string(20)), 10)",
" └─TableRowIDScan 3338.67 cop[tikv] table:t1 keep order:false, stats:pseudo"
],
"Res": [
"1 1 1",
Expand Down

0 comments on commit 7f85091

Please sign in to comment.