From 9d74d6404f492c5b6f38f0c42511dbbec80cc5a3 Mon Sep 17 00:00:00 2001 From: Yiding Cui Date: Mon, 29 Apr 2019 11:07:15 +0800 Subject: [PATCH] planner, executor: index join enhancement (#8471) --- cmd/explaintest/r/explain_complex.result | 6 +- .../r/explain_complex_stats.result | 6 +- cmd/explaintest/r/explain_easy.result | 2 +- cmd/explaintest/r/index_join.result | 4 +- cmd/explaintest/r/topn_push_down.result | 8 +- cmd/explaintest/r/tpch.result | 20 +- executor/builder.go | 71 ++- executor/executor_pkg_test.go | 20 +- executor/index_lookup_join.go | 72 ++- executor/index_lookup_join_test.go | 6 +- planner/core/cbo_test.go | 4 +- planner/core/exhaust_physical_plans.go | 506 ++++++++++++++---- planner/core/exhaust_physical_plans_test.go | 238 ++++++++ planner/core/explain.go | 4 +- planner/core/find_best_task.go | 3 +- planner/core/physical_plans.go | 9 +- planner/core/resolve_indices.go | 13 + planner/core/rule_partition_processor.go | 3 +- statistics/selectivity.go | 3 +- util/ranger/detacher.go | 14 +- util/ranger/ranger.go | 20 +- util/ranger/ranger_test.go | 3 +- 22 files changed, 831 insertions(+), 204 deletions(-) create mode 100644 planner/core/exhaust_physical_plans_test.go diff --git a/cmd/explaintest/r/explain_complex.result b/cmd/explaintest/r/explain_complex.result index f8b87a8ded5fe..e23a148310563 100644 --- a/cmd/explaintest/r/explain_complex.result +++ b/cmd/explaintest/r/explain_complex.result @@ -120,7 +120,7 @@ Projection_13 1.00 root gad.id, test.dd.id, gad.aid, gad.cm, test.dd.dic, test.d └─HashAgg_19 1.00 root group by:gad.aid, test.dd.dic, funcs:firstrow(gad.id), firstrow(gad.aid), firstrow(gad.cm), firstrow(gad.p1), firstrow(gad.p2), firstrow(gad.p3), firstrow(gad.p4), firstrow(gad.p5), firstrow(gad.p6_md5), firstrow(gad.p7_md5), firstrow(gad.ext), firstrow(gad.t), firstrow(test.dd.id), firstrow(test.dd.dic), firstrow(test.dd.ip), firstrow(test.dd.t) └─IndexJoin_24 0.00 root inner join, inner:IndexLookUp_23, outer key:gad.aid, inner key:test.dd.aid, other cond:eq(test.dd.ip, gad.ip), gt(test.dd.t, gad.t) ├─IndexLookUp_23 0.00 root - │ ├─IndexScan_20 10.00 cop table:dd, index:aid, dic, range: decided by [gad.aid gad.ip], keep order:false, stats:pseudo + │ ├─IndexScan_20 10.00 cop table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, gad.aid)], keep order:false, stats:pseudo │ └─Selection_22 0.00 cop eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t)) │ └─TableScan_21 10.00 cop table:dd, keep order:false, stats:pseudo └─IndexLookUp_33 3.33 root @@ -137,7 +137,7 @@ Projection_10 0.00 root gad.id, sdk.id, gad.aid, gad.cm, sdk.dic, sdk.ip, sdk.t, │ └─Selection_26 0.00 cop eq(gad.bm, 0), eq(gad.dit, "mac"), eq(gad.pt, "ios"), not(isnull(gad.dic)) │ └─TableScan_25 3333.33 cop table:st, keep order:false, stats:pseudo └─IndexLookUp_17 0.00 root - ├─IndexScan_14 10.00 cop table:sdk, index:aid, dic, range: decided by [gad.aid gad.dic], keep order:false, stats:pseudo + ├─IndexScan_14 10.00 cop table:sdk, index:aid, dic, range: decided by [eq(sdk.aid, gad.aid)], keep order:false, stats:pseudo └─Selection_16 0.00 cop eq(sdk.bm, 0), eq(sdk.pt, "ios"), gt(sdk.t, 1477971479), not(isnull(sdk.mac)), not(isnull(sdk.t)) └─TableScan_15 10.00 cop table:dd, keep order:false, stats:pseudo explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5; @@ -157,7 +157,7 @@ Projection_10 0.00 root dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, rr. │ └─Selection_40 0.00 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic)) │ └─TableScan_39 10000.00 cop table:dt, range:[0,+inf], keep order:false, stats:pseudo └─IndexLookUp_18 3.33 root - ├─IndexScan_15 10.00 cop table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false, stats:pseudo + ├─IndexScan_15 10.00 cop table:rr, index:aid, dic, range: decided by [eq(rr.aid, dt.aid) eq(rr.dic, dt.dic)], keep order:false, stats:pseudo └─Selection_17 3.33 cop eq(rr.pt, "ios"), gt(rr.t, 1478185592) └─TableScan_16 10.00 cop table:rr, keep order:false, stats:pseudo explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,sum(am) as am from pp where ps=2 and ppt>=1478188800 and ppt<1478275200 and pi in ('510017','520017') and uid in ('18089709','18090780') group by pc,cr; diff --git a/cmd/explaintest/r/explain_complex_stats.result b/cmd/explaintest/r/explain_complex_stats.result index 830fcaba61da5..5db97abb7f63e 100644 --- a/cmd/explaintest/r/explain_complex_stats.result +++ b/cmd/explaintest/r/explain_complex_stats.result @@ -133,7 +133,7 @@ Projection_13 424.00 root gad.id, test.dd.id, gad.aid, gad.cm, test.dd.dic, test │ └─Selection_28 424.00 cop eq(gad.bm, 0), eq(gad.pt, "android"), gt(gad.t, 1478143908), not(isnull(gad.ip)) │ └─TableScan_27 1999.00 cop table:gad, range:[0,+inf], keep order:false └─IndexLookUp_23 455.80 root - ├─IndexScan_20 1.00 cop table:dd, index:aid, dic, range: decided by [gad.aid gad.ip], keep order:false + ├─IndexScan_20 1.00 cop table:dd, index:aid, dic, range: decided by [eq(test.dd.aid, gad.aid)], keep order:false └─Selection_22 455.80 cop eq(test.dd.bm, 0), eq(test.dd.pt, "android"), gt(test.dd.t, 1478143908), not(isnull(test.dd.ip)), not(isnull(test.dd.t)) └─TableScan_21 1.00 cop table:dd, keep order:false explain select gad.id as gid,sdk.id as sid,gad.aid as aid,gad.cm as cm,sdk.dic as dic,sdk.ip as ip, sdk.t as t, gad.p1 as p1, gad.p2 as p2, gad.p3 as p3, gad.p4 as p4, gad.p5 as p5, gad.p6_md5 as p6, gad.p7_md5 as p7, gad.ext as ext from st gad join dd sdk on gad.aid = sdk.aid and gad.dic = sdk.mac and gad.t < sdk.t where gad.t > 1477971479 and gad.bm = 0 and gad.pt = 'ios' and gad.dit = 'mac' and sdk.t > 1477971479 and sdk.bm = 0 and sdk.pt = 'ios' limit 3000; @@ -145,7 +145,7 @@ Projection_10 170.34 root gad.id, sdk.id, gad.aid, gad.cm, sdk.dic, sdk.ip, sdk. │ └─Selection_22 170.34 cop eq(gad.bm, 0), eq(gad.dit, "mac"), eq(gad.pt, "ios"), gt(gad.t, 1477971479), not(isnull(gad.dic)) │ └─TableScan_21 1999.00 cop table:gad, range:[0,+inf], keep order:false └─IndexLookUp_17 509.04 root - ├─IndexScan_14 1.00 cop table:sdk, index:aid, dic, range: decided by [gad.aid gad.dic], keep order:false + ├─IndexScan_14 1.00 cop table:sdk, index:aid, dic, range: decided by [eq(sdk.aid, gad.aid)], keep order:false └─Selection_16 509.04 cop eq(sdk.bm, 0), eq(sdk.pt, "ios"), gt(sdk.t, 1477971479), not(isnull(sdk.mac)), not(isnull(sdk.t)) └─TableScan_15 1.00 cop table:dd, keep order:false explain SELECT cm, p1, p2, p3, p4, p5, p6_md5, p7_md5, count(1) as click_pv, count(DISTINCT ip) as click_ip FROM st WHERE (t between 1478188800 and 1478275200) and aid='cn.sbkcq' and pt='android' GROUP BY cm, p1, p2, p3, p4, p5, p6_md5, p7_md5; @@ -165,7 +165,7 @@ Projection_10 428.32 root dt.id, dt.aid, dt.pt, dt.dic, dt.cm, rr.gid, rr.acd, r │ └─Selection_40 428.32 cop eq(dt.bm, 0), eq(dt.pt, "ios"), gt(dt.t, 1478185592), not(isnull(dt.dic)) │ └─TableScan_39 2000.00 cop table:dt, range:[0,+inf], keep order:false └─IndexLookUp_18 970.00 root - ├─IndexScan_15 1.00 cop table:rr, index:aid, dic, range: decided by [dt.aid dt.dic], keep order:false + ├─IndexScan_15 1.00 cop table:rr, index:aid, dic, range: decided by [eq(rr.aid, dt.aid) eq(rr.dic, dt.dic)], keep order:false └─Selection_17 970.00 cop eq(rr.pt, "ios"), gt(rr.t, 1478185592) └─TableScan_16 1.00 cop table:rr, keep order:false explain select pc,cr,count(DISTINCT uid) as pay_users,count(oid) as pay_times,sum(am) as am from pp where ps=2 and ppt>=1478188800 and ppt<1478275200 and pi in ('510017','520017') and uid in ('18089709','18090780') group by pc,cr; diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index 3a64d978d4b8d..1497fb03490de 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -47,7 +47,7 @@ IndexJoin_12 4166.67 root left outer join, inner:IndexLookUp_11, outer key:test. │ └─TableScan_23 3333.33 cop table:t1, range:(1,+inf], keep order:false, stats:pseudo └─IndexLookUp_11 0.00 root ├─Selection_10 0.00 cop not(isnull(test.t2.c1)) - │ └─IndexScan_8 10.00 cop table:t2, index:c1, range: decided by [test.t1.c2], keep order:false, stats:pseudo + │ └─IndexScan_8 10.00 cop table:t2, index:c1, range: decided by [eq(test.t2.c1, test.t1.c2)], keep order:false, stats:pseudo └─TableScan_9 0.00 cop table:t2, keep order:false, stats:pseudo explain update t1 set t1.c2 = 2 where t1.c1 = 1; id count task operator info diff --git a/cmd/explaintest/r/index_join.result b/cmd/explaintest/r/index_join.result index 07d177671c292..6d5555bc8993e 100644 --- a/cmd/explaintest/r/index_join.result +++ b/cmd/explaintest/r/index_join.result @@ -9,7 +9,7 @@ id count task operator info IndexJoin_16 5.00 root inner join, inner:IndexLookUp_15, outer key:test.t2.a, inner key:test.t1.a ├─IndexLookUp_15 0.00 root │ ├─Selection_14 0.00 cop not(isnull(test.t1.a)) -│ │ └─IndexScan_12 5.00 cop table:t1, index:a, range: decided by [test.t2.a], keep order:false +│ │ └─IndexScan_12 5.00 cop table:t1, index:a, range: decided by [eq(test.t1.a, test.t2.a)], keep order:false │ └─TableScan_13 0.00 cop table:t1, keep order:false, stats:pseudo └─TableReader_19 1.00 root data:Selection_18 └─Selection_18 1.00 cop not(isnull(test.t2.a)) @@ -23,5 +23,5 @@ Projection_6 5.00 root test.t1.a, test.t1.b, test.t2.a, test.t2.b │ └─TableScan_28 1.00 cop table:t2, range:[-inf,+inf], keep order:false └─IndexLookUp_11 0.00 root ├─Selection_10 0.00 cop not(isnull(test.t1.a)) - │ └─IndexScan_8 5.00 cop table:t1, index:a, range: decided by [test.t2.a], keep order:false + │ └─IndexScan_8 5.00 cop table:t1, index:a, range: decided by [eq(test.t1.a, test.t2.a)], keep order:false └─TableScan_9 0.00 cop table:t1, keep order:false, stats:pseudo diff --git a/cmd/explaintest/r/topn_push_down.result b/cmd/explaintest/r/topn_push_down.result index 132df50fc2f19..e8ba90678635a 100644 --- a/cmd/explaintest/r/topn_push_down.result +++ b/cmd/explaintest/r/topn_push_down.result @@ -178,12 +178,12 @@ Projection_13 0.00 root te.expect_time │ │ └─Selection_73 0.00 cop eq(tr.brand_identy, 32314), eq(tr.domain_type, 2) │ │ └─TableScan_71 0.00 cop table:tr, keep order:false, stats:pseudo │ └─IndexLookUp_35 250.00 root - │ ├─IndexScan_32 10.00 cop table:te, index:trade_id, range: decided by [tr.id], keep order:false, stats:pseudo + │ ├─IndexScan_32 10.00 cop table:te, index:trade_id, range: decided by [eq(te.trade_id, tr.id)], keep order:false, stats:pseudo │ └─Selection_34 250.00 cop ge(te.expect_time, 2018-04-23 00:00:00.000000), le(te.expect_time, 2018-04-23 23:59:59.000000) │ └─TableScan_33 10.00 cop table:te, keep order:false, stats:pseudo └─IndexReader_91 0.00 root index:Selection_90 └─Selection_90 0.00 cop not(isnull(p.relate_id)) - └─IndexScan_89 10.00 cop table:p, index:relate_id, range: decided by [tr.id], keep order:false, stats:pseudo + └─IndexScan_89 10.00 cop table:p, index:relate_id, range: decided by [eq(p.relate_id, tr.id)], keep order:false, stats:pseudo desc select 1 as a from dual order by a limit 1; id count task operator info Projection_6 1.00 root 1 @@ -226,7 +226,7 @@ Limit_11 5.00 root offset:0, count:5 ├─TableReader_17 4.00 root data:TableScan_16 │ └─TableScan_16 4.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo └─IndexReader_14 10.00 root index:IndexScan_13 - └─IndexScan_13 10.00 cop table:t2, index:a, range: decided by [t1.a], keep order:false, stats:pseudo + └─IndexScan_13 10.00 cop table:t2, index:a, range: decided by [eq(t2.a, t1.a)], keep order:false, stats:pseudo explain select /*+ TIDB_INLJ(t2) */ * from t t1 left join t t2 on t1.a = t2.a where t2.a is null limit 5; id count task operator info Limit_12 5.00 root offset:0, count:5 @@ -235,7 +235,7 @@ Limit_12 5.00 root offset:0, count:5 ├─TableReader_19 4.00 root data:TableScan_18 │ └─TableScan_18 4.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo └─IndexReader_16 10.00 root index:IndexScan_15 - └─IndexScan_15 10.00 cop table:t2, index:a, range: decided by [t1.a], keep order:false, stats:pseudo + └─IndexScan_15 10.00 cop table:t2, index:a, range: decided by [eq(t2.a, t1.a)], keep order:false, stats:pseudo explain select /*+ TIDB_SMJ(t1, t2) */ * from t t1 join t t2 on t1.a = t2.a limit 5; id count task operator info Limit_11 5.00 root offset:0, count:5 diff --git a/cmd/explaintest/r/tpch.result b/cmd/explaintest/r/tpch.result index 5ee157d771853..a083977411ea6 100644 --- a/cmd/explaintest/r/tpch.result +++ b/cmd/explaintest/r/tpch.result @@ -261,7 +261,7 @@ Projection_14 10.00 root tpch.lineitem.l_orderkey, 7_col_0, tpch.orders.o_orderd │ └─Selection_51 36870000.00 cop lt(tpch.orders.o_orderdate, 1995-03-13 00:00:00.000000) │ └─TableScan_50 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false └─IndexLookUp_28 162945114.27 root - ├─IndexScan_25 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false + ├─IndexScan_25 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false └─Selection_27 162945114.27 cop gt(tpch.lineitem.l_shipdate, 1995-03-13 00:00:00.000000) └─TableScan_26 1.00 cop table:lineitem, keep order:false /* @@ -302,7 +302,7 @@ Sort_10 1.00 root tpch.orders.o_orderpriority:asc │ └─Selection_32 2925937.50 cop ge(tpch.orders.o_orderdate, 1995-01-01 00:00:00.000000), lt(tpch.orders.o_orderdate, 1995-04-01) │ └─TableScan_31 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false └─IndexLookUp_20 240004648.80 root - ├─IndexScan_17 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false + ├─IndexScan_17 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false └─Selection_19 240004648.80 cop lt(tpch.lineitem.l_commitdate, tpch.lineitem.l_receiptdate) └─TableScan_18 1.00 cop table:lineitem, keep order:false /* @@ -538,7 +538,7 @@ Sort_29 718.01 root all_nations.o_year:asc │ │ │ │ └─Selection_77 22382008.93 cop ge(tpch.orders.o_orderdate, 1995-01-01 00:00:00.000000), le(tpch.orders.o_orderdate, 1996-12-31 00:00:00.000000) │ │ │ │ └─TableScan_76 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false │ │ │ └─IndexLookUp_55 1.00 root - │ │ │ ├─IndexScan_53 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false + │ │ │ ├─IndexScan_53 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false │ │ │ └─TableScan_54 1.00 cop table:lineitem, keep order:false │ │ └─TableReader_83 61674.00 root data:Selection_82 │ │ └─Selection_82 61674.00 cop eq(tpch.part.p_type, "SMALL PLATED COPPER") @@ -612,7 +612,7 @@ Sort_25 2406.00 root profit.nation:asc, profit.o_year:desc │ └─TableReader_40 1.00 root data:TableScan_39 │ └─TableScan_39 1.00 cop table:orders, range: decided by [tpch.lineitem.l_orderkey], keep order:false └─IndexLookUp_34 1.00 root - ├─IndexScan_32 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.lineitem.l_suppkey tpch.lineitem.l_partkey], keep order:false + ├─IndexScan_32 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [eq(tpch.partsupp.ps_partkey, tpch.lineitem.l_partkey) eq(tpch.partsupp.ps_suppkey, tpch.lineitem.l_suppkey)], keep order:false └─TableScan_33 1.00 cop table:partsupp, keep order:false /* Q10 Returned Item Reporting Query @@ -673,7 +673,7 @@ Projection_17 20.00 root tpch.customer.c_custkey, tpch.customer.c_name, 9_col_0, │ └─Selection_47 3017307.69 cop ge(tpch.orders.o_orderdate, 1993-08-01 00:00:00.000000), lt(tpch.orders.o_orderdate, 1993-11-01) │ └─TableScan_46 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false └─IndexLookUp_31 73916005.00 root - ├─IndexScan_28 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false + ├─IndexScan_28 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false └─Selection_30 73916005.00 cop eq(tpch.lineitem.l_returnflag, "R") └─TableScan_29 1.00 cop table:lineitem, keep order:false /* @@ -936,7 +936,7 @@ Sort_13 3863988.24 root supplier_cnt:desc, tpch.part.p_brand:asc, tpch.part.p_ty │ │ └─Selection_40 1200618.43 cop in(tpch.part.p_size, 48, 19, 12, 4, 41, 7, 21, 39), ne(tpch.part.p_brand, "Brand#34"), not(like(tpch.part.p_type, "LARGE BRUSHED%", 92)) │ │ └─TableScan_39 10000000.00 cop table:part, range:[-inf,+inf], keep order:false │ └─IndexReader_26 1.00 root index:IndexScan_25 - │ └─IndexScan_25 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false + │ └─IndexScan_25 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [eq(tpch.partsupp.ps_partkey, tpch.part.p_partkey)], keep order:false └─TableReader_46 400000.00 root data:Selection_45 └─Selection_45 400000.00 cop like(tpch.supplier.s_comment, "%Customer%Complaints%", 92) └─TableScan_44 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false @@ -1042,7 +1042,7 @@ Projection_24 100.00 root tpch.customer.c_name, tpch.customer.c_custkey, tpch.or │ └─HashAgg_53 74063872.00 cop group by:tpch.lineitem.l_orderkey, funcs:sum(tpch.lineitem.l_quantity), firstrow(tpch.lineitem.l_orderkey) │ └─TableScan_58 300005811.00 cop table:lineitem, range:[-inf,+inf], keep order:false └─IndexLookUp_37 1.00 root - ├─IndexScan_35 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false + ├─IndexScan_35 1.00 cop table:lineitem, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(tpch.lineitem.l_orderkey, tpch.orders.o_orderkey)], keep order:false └─TableScan_36 1.00 cop table:lineitem, keep order:false /* Q19 Discounted Revenue Query @@ -1165,7 +1165,7 @@ Sort_28 20000.00 root tpch.supplier.s_name:asc │ │ └─Selection_74 80007.93 cop like(tpch.part.p_name, "green%", 92) │ │ └─TableScan_73 10000000.00 cop table:part, range:[-inf,+inf], keep order:false │ └─IndexLookUp_58 1.00 root - │ ├─IndexScan_56 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [tpch.part.p_partkey], keep order:false + │ ├─IndexScan_56 1.00 cop table:partsupp, index:PS_PARTKEY, PS_SUPPKEY, range: decided by [eq(tpch.partsupp.ps_partkey, tpch.part.p_partkey)], keep order:false │ └─TableScan_57 1.00 cop table:partsupp, keep order:false └─TableReader_80 44189356.65 root data:Selection_79 └─Selection_79 44189356.65 cop ge(tpch.lineitem.l_shipdate, 1993-01-01 00:00:00.000000), lt(tpch.lineitem.l_shipdate, 1994-01-01) @@ -1239,10 +1239,10 @@ Projection_25 1.00 root tpch.supplier.s_name, 17_col_0 │ │ └─Selection_60 0.80 cop eq(tpch.orders.o_orderstatus, "F") │ │ └─TableScan_59 1.00 cop table:orders, range: decided by [l1.l_orderkey], keep order:false │ └─IndexLookUp_55 1.00 root - │ ├─IndexScan_53 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [l1.l_orderkey], keep order:false + │ ├─IndexScan_53 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(l2.l_orderkey, l1.l_orderkey)], keep order:false │ └─TableScan_54 1.00 cop table:lineitem, keep order:false └─IndexLookUp_39 240004648.80 root - ├─IndexScan_36 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [l1.l_orderkey], keep order:false + ├─IndexScan_36 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [eq(l3.l_orderkey, l1.l_orderkey)], keep order:false └─Selection_38 240004648.80 cop gt(l3.l_receiptdate, l3.l_commitdate) └─TableScan_37 1.00 cop table:lineitem, keep order:false /* diff --git a/executor/builder.go b/executor/builder.go index fbd9d25444607..596b64c72b48c 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -39,7 +39,6 @@ import ( "github.com/pingcap/tidb/metrics" plannercore "github.com/pingcap/tidb/planner/core" "github.com/pingcap/tidb/sessionctx" - "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/table" "github.com/pingcap/tidb/types" @@ -1674,6 +1673,7 @@ func (b *executorBuilder) buildIndexLookUpJoin(v *plannercore.PhysicalIndexJoin) isOuterJoin: v.JoinType.IsOuterJoin(), indexRanges: v.Ranges, keyOff2IdxOff: v.KeyOff2IdxOff, + lastColHelper: v.CompareFilters, } outerKeyCols := make([]int, len(v.OuterJoinKeys)) for i := 0; i < len(v.OuterJoinKeys); i++ { @@ -1912,25 +1912,25 @@ type dataReaderBuilder struct { selectResultHook // for testing } -func (builder *dataReaderBuilder) buildExecutorForIndexJoin(ctx context.Context, datums [][]types.Datum, - IndexRanges []*ranger.Range, keyOff2IdxOff []int) (Executor, error) { +func (builder *dataReaderBuilder) buildExecutorForIndexJoin(ctx context.Context, lookUpContents []*indexJoinLookUpContent, + IndexRanges []*ranger.Range, keyOff2IdxOff []int, cwc *plannercore.ColWithCmpFuncManager) (Executor, error) { switch v := builder.Plan.(type) { case *plannercore.PhysicalTableReader: - return builder.buildTableReaderForIndexJoin(ctx, v, datums) + return builder.buildTableReaderForIndexJoin(ctx, v, lookUpContents) case *plannercore.PhysicalIndexReader: - return builder.buildIndexReaderForIndexJoin(ctx, v, datums, IndexRanges, keyOff2IdxOff) + return builder.buildIndexReaderForIndexJoin(ctx, v, lookUpContents, IndexRanges, keyOff2IdxOff, cwc) case *plannercore.PhysicalIndexLookUpReader: - return builder.buildIndexLookUpReaderForIndexJoin(ctx, v, datums, IndexRanges, keyOff2IdxOff) + return builder.buildIndexLookUpReaderForIndexJoin(ctx, v, lookUpContents, IndexRanges, keyOff2IdxOff, cwc) case *plannercore.PhysicalUnionScan: - return builder.buildUnionScanForIndexJoin(ctx, v, datums, IndexRanges, keyOff2IdxOff) + return builder.buildUnionScanForIndexJoin(ctx, v, lookUpContents, IndexRanges, keyOff2IdxOff, cwc) } return nil, errors.New("Wrong plan type for dataReaderBuilder") } func (builder *dataReaderBuilder) buildUnionScanForIndexJoin(ctx context.Context, v *plannercore.PhysicalUnionScan, - values [][]types.Datum, indexRanges []*ranger.Range, keyOff2IdxOff []int) (Executor, error) { + values []*indexJoinLookUpContent, indexRanges []*ranger.Range, keyOff2IdxOff []int, cwc *plannercore.ColWithCmpFuncManager) (Executor, error) { childBuilder := &dataReaderBuilder{Plan: v.Children()[0], executorBuilder: builder.executorBuilder} - reader, err := childBuilder.buildExecutorForIndexJoin(ctx, values, indexRanges, keyOff2IdxOff) + reader, err := childBuilder.buildExecutorForIndexJoin(ctx, values, indexRanges, keyOff2IdxOff, cwc) if err != nil { return nil, err } @@ -1943,14 +1943,14 @@ func (builder *dataReaderBuilder) buildUnionScanForIndexJoin(ctx context.Context return us, nil } -func (builder *dataReaderBuilder) buildTableReaderForIndexJoin(ctx context.Context, v *plannercore.PhysicalTableReader, datums [][]types.Datum) (Executor, error) { +func (builder *dataReaderBuilder) buildTableReaderForIndexJoin(ctx context.Context, v *plannercore.PhysicalTableReader, lookUpContents []*indexJoinLookUpContent) (Executor, error) { e, err := buildNoRangeTableReader(builder.executorBuilder, v) if err != nil { return nil, err } - handles := make([]int64, 0, len(datums)) - for _, datum := range datums { - handles = append(handles, datum[0].GetInt64()) + handles := make([]int64, 0, len(lookUpContents)) + for _, content := range lookUpContents { + handles = append(handles, content.keys[0].GetInt64()) } return builder.buildTableReaderFromHandles(ctx, e, handles) } @@ -1984,12 +1984,12 @@ func (builder *dataReaderBuilder) buildTableReaderFromHandles(ctx context.Contex } func (builder *dataReaderBuilder) buildIndexReaderForIndexJoin(ctx context.Context, v *plannercore.PhysicalIndexReader, - values [][]types.Datum, indexRanges []*ranger.Range, keyOff2IdxOff []int) (Executor, error) { + lookUpContents []*indexJoinLookUpContent, indexRanges []*ranger.Range, keyOff2IdxOff []int, cwc *plannercore.ColWithCmpFuncManager) (Executor, error) { e, err := buildNoRangeIndexReader(builder.executorBuilder, v) if err != nil { return nil, err } - kvRanges, err := buildKvRangesForIndexJoin(e.ctx.GetSessionVars().StmtCtx, e.physicalTableID, e.index.ID, values, indexRanges, keyOff2IdxOff) + kvRanges, err := buildKvRangesForIndexJoin(e.ctx, e.physicalTableID, e.index.ID, lookUpContents, indexRanges, keyOff2IdxOff, cwc) if err != nil { return nil, err } @@ -1998,12 +1998,12 @@ func (builder *dataReaderBuilder) buildIndexReaderForIndexJoin(ctx context.Conte } func (builder *dataReaderBuilder) buildIndexLookUpReaderForIndexJoin(ctx context.Context, v *plannercore.PhysicalIndexLookUpReader, - values [][]types.Datum, indexRanges []*ranger.Range, keyOff2IdxOff []int) (Executor, error) { + lookUpContents []*indexJoinLookUpContent, indexRanges []*ranger.Range, keyOff2IdxOff []int, cwc *plannercore.ColWithCmpFuncManager) (Executor, error) { e, err := buildNoRangeIndexLookUpReader(builder.executorBuilder, v) if err != nil { return nil, err } - e.kvRanges, err = buildKvRangesForIndexJoin(e.ctx.GetSessionVars().StmtCtx, getPhysicalTableID(e.table), e.index.ID, values, indexRanges, keyOff2IdxOff) + e.kvRanges, err = buildKvRangesForIndexJoin(e.ctx, getPhysicalTableID(e.table), e.index.ID, lookUpContents, indexRanges, keyOff2IdxOff, cwc) if err != nil { return nil, err } @@ -2012,17 +2012,40 @@ func (builder *dataReaderBuilder) buildIndexLookUpReaderForIndexJoin(ctx context } // buildKvRangesForIndexJoin builds kv ranges for index join when the inner plan is index scan plan. -func buildKvRangesForIndexJoin(sc *stmtctx.StatementContext, tableID, indexID int64, keyDatums [][]types.Datum, indexRanges []*ranger.Range, keyOff2IdxOff []int) ([]kv.KeyRange, error) { - kvRanges := make([]kv.KeyRange, 0, len(indexRanges)*len(keyDatums)) - for _, val := range keyDatums { - for _, ran := range indexRanges { +func buildKvRangesForIndexJoin(ctx sessionctx.Context, tableID, indexID int64, lookUpContents []*indexJoinLookUpContent, + ranges []*ranger.Range, keyOff2IdxOff []int, cwc *plannercore.ColWithCmpFuncManager) ([]kv.KeyRange, error) { + kvRanges := make([]kv.KeyRange, 0, len(ranges)*len(lookUpContents)) + lastPos := len(ranges[0].LowVal) - 1 + sc := ctx.GetSessionVars().StmtCtx + for _, content := range lookUpContents { + for _, ran := range ranges { for keyOff, idxOff := range keyOff2IdxOff { - ran.LowVal[idxOff] = val[keyOff] - ran.HighVal[idxOff] = val[keyOff] + ran.LowVal[idxOff] = content.keys[keyOff] + ran.HighVal[idxOff] = content.keys[keyOff] } } + if cwc != nil { + nextColRanges, err := cwc.BuildRangesByRow(ctx, content.row) + if err != nil { + return nil, err + } + for _, nextColRan := range nextColRanges { + for _, ran := range ranges { + ran.LowVal[lastPos] = nextColRan.LowVal[0] + ran.HighVal[lastPos] = nextColRan.HighVal[0] + ran.LowExclude = nextColRan.LowExclude + ran.HighExclude = nextColRan.HighExclude + } + tmpKvRanges, err := distsql.IndexRangesToKVRanges(sc, tableID, indexID, ranges, nil) + if err != nil { + return nil, errors.Trace(err) + } + kvRanges = append(kvRanges, tmpKvRanges...) + } + continue + } - tmpKvRanges, err := distsql.IndexRangesToKVRanges(sc, tableID, indexID, indexRanges, nil) + tmpKvRanges, err := distsql.IndexRangesToKVRanges(sc, tableID, indexID, ranges, nil) if err != nil { return nil, err } diff --git a/executor/executor_pkg_test.go b/executor/executor_pkg_test.go index c2c274455c7e2..b237236e9b31b 100644 --- a/executor/executor_pkg_test.go +++ b/executor/executor_pkg_test.go @@ -15,7 +15,6 @@ package executor import ( "context" - "time" . "github.com/pingcap/check" "github.com/pingcap/parser/ast" @@ -23,7 +22,6 @@ import ( "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/expression" - "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util" "github.com/pingcap/tidb/util/chunk" @@ -137,7 +135,7 @@ func buildSchema(names []string, ftypes []byte) *expression.Schema { return schema } -func (s *testExecSuite) TestBuildKvRangesForIndexJoin(c *C) { +func (s *testExecSuite) TestBuildKvRangesForIndexJoinWithoutCwc(c *C) { indexRanges := make([]*ranger.Range, 0, 6) indexRanges = append(indexRanges, generateIndexRange(1, 1, 1, 1, 1)) indexRanges = append(indexRanges, generateIndexRange(1, 1, 2, 1, 1)) @@ -146,16 +144,16 @@ func (s *testExecSuite) TestBuildKvRangesForIndexJoin(c *C) { indexRanges = append(indexRanges, generateIndexRange(2, 1, 1, 1, 1)) indexRanges = append(indexRanges, generateIndexRange(2, 1, 2, 1, 1)) - joinKeyRows := make([][]types.Datum, 0, 5) - joinKeyRows = append(joinKeyRows, generateDatumSlice(1, 1)) - joinKeyRows = append(joinKeyRows, generateDatumSlice(1, 2)) - joinKeyRows = append(joinKeyRows, generateDatumSlice(2, 1)) - joinKeyRows = append(joinKeyRows, generateDatumSlice(2, 2)) - joinKeyRows = append(joinKeyRows, generateDatumSlice(2, 3)) + joinKeyRows := make([]*indexJoinLookUpContent, 0, 5) + joinKeyRows = append(joinKeyRows, &indexJoinLookUpContent{keys: generateDatumSlice(1, 1)}) + joinKeyRows = append(joinKeyRows, &indexJoinLookUpContent{keys: generateDatumSlice(1, 2)}) + joinKeyRows = append(joinKeyRows, &indexJoinLookUpContent{keys: generateDatumSlice(2, 1)}) + joinKeyRows = append(joinKeyRows, &indexJoinLookUpContent{keys: generateDatumSlice(2, 2)}) + joinKeyRows = append(joinKeyRows, &indexJoinLookUpContent{keys: generateDatumSlice(2, 3)}) keyOff2IdxOff := []int{1, 3} - sc := &stmtctx.StatementContext{TimeZone: time.Local} - kvRanges, err := buildKvRangesForIndexJoin(sc, 0, 0, joinKeyRows, indexRanges, keyOff2IdxOff) + ctx := mock.NewContext() + kvRanges, err := buildKvRangesForIndexJoin(ctx, 0, 0, joinKeyRows, indexRanges, keyOff2IdxOff, nil) c.Assert(err, IsNil) // Check the kvRanges is in order. for i, kvRange := range kvRanges { diff --git a/executor/index_lookup_join.go b/executor/index_lookup_join.go index e49baf3c6290a..ac30d73745219 100644 --- a/executor/index_lookup_join.go +++ b/executor/index_lookup_join.go @@ -27,6 +27,7 @@ import ( "github.com/pingcap/parser/mysql" "github.com/pingcap/parser/terror" "github.com/pingcap/tidb/expression" + plannercore "github.com/pingcap/tidb/planner/core" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/types" @@ -73,6 +74,9 @@ type IndexLookUpJoin struct { keyOff2IdxOff []int innerPtrBytes [][]byte + // lastColHelper store the information for last col if there's complicated filter like col > x_col and col < x_col + 100. + lastColHelper *plannercore.ColWithCmpFuncManager + memTracker *memory.Tracker // track memory usage. } @@ -132,8 +136,9 @@ type innerWorker struct { ctx sessionctx.Context executorChk *chunk.Chunk - indexRanges []*ranger.Range - keyOff2IdxOff []int + indexRanges []*ranger.Range + nextColCompareFilters *plannercore.ColWithCmpFuncManager + keyOff2IdxOff []int } // Open implements the Executor interface. @@ -209,13 +214,14 @@ func (e *IndexLookUpJoin) newInnerWorker(taskCh chan *lookUpJoinTask) *innerWork copiedRanges = append(copiedRanges, ran.Clone()) } iw := &innerWorker{ - innerCtx: e.innerCtx, - outerCtx: e.outerCtx, - taskCh: taskCh, - ctx: e.ctx, - executorChk: chunk.NewChunkWithCapacity(e.innerCtx.rowTypes, e.maxChunkSize), - indexRanges: copiedRanges, - keyOff2IdxOff: e.keyOff2IdxOff, + innerCtx: e.innerCtx, + outerCtx: e.outerCtx, + taskCh: taskCh, + ctx: e.ctx, + executorChk: chunk.NewChunkWithCapacity(e.innerCtx.rowTypes, e.maxChunkSize), + indexRanges: copiedRanges, + keyOff2IdxOff: e.keyOff2IdxOff, + nextColCompareFilters: e.lastColHelper, } return iw } @@ -447,13 +453,18 @@ func (iw *innerWorker) run(ctx context.Context, wg *sync.WaitGroup) { } } +type indexJoinLookUpContent struct { + keys []types.Datum + row chunk.Row +} + func (iw *innerWorker) handleTask(ctx context.Context, task *lookUpJoinTask) error { - dLookUpKeys, err := iw.constructDatumLookupKeys(task) + lookUpContents, err := iw.constructLookupContent(task) if err != nil { return err } - dLookUpKeys = iw.sortAndDedupDatumLookUpKeys(dLookUpKeys) - err = iw.fetchInnerResults(ctx, task, dLookUpKeys) + lookUpContents = iw.sortAndDedupLookUpContents(lookUpContents) + err = iw.fetchInnerResults(ctx, task, lookUpContents) if err != nil { return err } @@ -464,8 +475,8 @@ func (iw *innerWorker) handleTask(ctx context.Context, task *lookUpJoinTask) err return nil } -func (iw *innerWorker) constructDatumLookupKeys(task *lookUpJoinTask) ([][]types.Datum, error) { - dLookUpKeys := make([][]types.Datum, 0, task.outerResult.NumRows()) +func (iw *innerWorker) constructLookupContent(task *lookUpJoinTask) ([]*indexJoinLookUpContent, error) { + lookUpContents := make([]*indexJoinLookUpContent, 0, task.outerResult.NumRows()) keyBuf := make([]byte, 0, 64) for i := 0; i < task.outerResult.NumRows(); i++ { dLookUpKey, err := iw.constructDatumLookupKey(task, i) @@ -484,11 +495,11 @@ func (iw *innerWorker) constructDatumLookupKeys(task *lookUpJoinTask) ([][]types } // Store the encoded lookup key in chunk, so we can use it to lookup the matched inners directly. task.encodedLookUpKeys.AppendBytes(0, keyBuf) - dLookUpKeys = append(dLookUpKeys, dLookUpKey) + lookUpContents = append(lookUpContents, &indexJoinLookUpContent{keys: dLookUpKey, row: task.outerResult.GetRow(i)}) } task.memTracker.Consume(task.encodedLookUpKeys.MemoryUsage()) - return dLookUpKeys, nil + return lookUpContents, nil } func (iw *innerWorker) constructDatumLookupKey(task *lookUpJoinTask, rowIdx int) ([]types.Datum, error) { @@ -525,20 +536,23 @@ func (iw *innerWorker) constructDatumLookupKey(task *lookUpJoinTask, rowIdx int) return dLookupKey, nil } -func (iw *innerWorker) sortAndDedupDatumLookUpKeys(dLookUpKeys [][]types.Datum) [][]types.Datum { - if len(dLookUpKeys) < 2 { - return dLookUpKeys +func (iw *innerWorker) sortAndDedupLookUpContents(lookUpContents []*indexJoinLookUpContent) []*indexJoinLookUpContent { + if len(lookUpContents) < 2 { + return lookUpContents } sc := iw.ctx.GetSessionVars().StmtCtx - sort.Slice(dLookUpKeys, func(i, j int) bool { - cmp := compareRow(sc, dLookUpKeys[i], dLookUpKeys[j]) - return cmp < 0 + sort.Slice(lookUpContents, func(i, j int) bool { + cmp := compareRow(sc, lookUpContents[i].keys, lookUpContents[j].keys) + if cmp != 0 || iw.nextColCompareFilters == nil { + return cmp < 0 + } + return iw.nextColCompareFilters.CompareRow(lookUpContents[i].row, lookUpContents[j].row) < 0 }) - deDupedLookupKeys := dLookUpKeys[:1] - for i := 1; i < len(dLookUpKeys); i++ { - cmp := compareRow(sc, dLookUpKeys[i], dLookUpKeys[i-1]) - if cmp != 0 { - deDupedLookupKeys = append(deDupedLookupKeys, dLookUpKeys[i]) + deDupedLookupKeys := lookUpContents[:1] + for i := 1; i < len(lookUpContents); i++ { + cmp := compareRow(sc, lookUpContents[i].keys, lookUpContents[i-1].keys) + if cmp != 0 || (iw.nextColCompareFilters != nil && iw.nextColCompareFilters.CompareRow(lookUpContents[i].row, lookUpContents[i-1].row) != 0) { + deDupedLookupKeys = append(deDupedLookupKeys, lookUpContents[i]) } } return deDupedLookupKeys @@ -558,8 +572,8 @@ func compareRow(sc *stmtctx.StatementContext, left, right []types.Datum) int { return 0 } -func (iw *innerWorker) fetchInnerResults(ctx context.Context, task *lookUpJoinTask, dLookUpKeys [][]types.Datum) error { - innerExec, err := iw.readerBuilder.buildExecutorForIndexJoin(ctx, dLookUpKeys, iw.indexRanges, iw.keyOff2IdxOff) +func (iw *innerWorker) fetchInnerResults(ctx context.Context, task *lookUpJoinTask, lookUpContent []*indexJoinLookUpContent) error { + innerExec, err := iw.readerBuilder.buildExecutorForIndexJoin(ctx, lookUpContent, iw.indexRanges, iw.keyOff2IdxOff, iw.nextColCompareFilters) if err != nil { return err } diff --git a/executor/index_lookup_join_test.go b/executor/index_lookup_join_test.go index 2eb60cc747856..0df5fa6055edd 100644 --- a/executor/index_lookup_join_test.go +++ b/executor/index_lookup_join_test.go @@ -70,7 +70,7 @@ func (s *testSuite1) TestIndexJoinUnionScan(c *C) { "└─UnionScan_12 0.00 root not(isnull(test.t2.a))", " └─IndexLookUp_11 0.00 root ", " ├─Selection_10 0.00 cop not(isnull(test.t2.a))", - " │ └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo", + " │ └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", " └─TableScan_9 0.00 cop table:t2, keep order:false, stats:pseudo", )) tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ * from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( @@ -88,7 +88,7 @@ func (s *testSuite1) TestIndexJoinUnionScan(c *C) { " └─UnionScan_11 0.00 root not(isnull(test.t2.a))", " └─IndexReader_10 0.00 root index:Selection_9", " └─Selection_9 0.00 cop not(isnull(test.t2.a))", - " └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo", + " └─IndexScan_8 10.00 cop table:t2, index:a, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", )) tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ t1.a, t2.a from t1 join t2 on t1.a = t2.a").Check(testkit.Rows( "2 2", @@ -117,7 +117,7 @@ func (s *testSuite1) TestBatchIndexJoinUnionScan(c *C) { " └─UnionScan_26 0.00 root not(isnull(test.t2.a))", " └─IndexReader_25 0.00 root index:Selection_24", " └─Selection_24 0.00 cop not(isnull(test.t2.a))", - " └─IndexScan_23 10.00 cop table:t2, index:a, range: decided by [test.t1.a], keep order:false, stats:pseudo", + " └─IndexScan_23 10.00 cop table:t2, index:a, range: decided by [eq(test.t2.a, test.t1.a)], keep order:false, stats:pseudo", )) tk.MustQuery("select /*+ TIDB_INLJ(t1, t2)*/ count(*) from t1 join t2 on t1.a = t2.id").Check(testkit.Rows( "4", diff --git a/planner/core/cbo_test.go b/planner/core/cbo_test.go index eef47f0f053ae..3a739fb9028c5 100644 --- a/planner/core/cbo_test.go +++ b/planner/core/cbo_test.go @@ -910,7 +910,7 @@ func (s *testAnalyzeSuite) TestIssue9562(c *C) { "│ └─TableScan_10 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo", "└─IndexReader_8 0.00 root index:Selection_7", " └─Selection_7 0.00 cop not(isnull(test.t2.a)), not(isnull(test.t2.c))", - " └─IndexScan_6 10.00 cop table:t2, index:a, b, c, range: decided by [test.t1.a test.t1.c], keep order:false, stats:pseudo", + " └─IndexScan_6 10.00 cop table:t2, index:a, b, c, range: decided by [eq(test.t2.a, test.t1.a) gt(test.t2.b, minus(test.t1.b, 1)) lt(test.t2.b, plus(test.t1.b, 1))], keep order:false, stats:pseudo", )) tk.MustExec("create table t(a int, b int, index idx_ab(a, b))") @@ -1036,7 +1036,7 @@ func (s *testAnalyzeSuite) TestLimitCrossEstimation(c *C) { " │ └─TopN_30 1.00 cop t1.a:asc, offset:0, count:1", " │ └─IndexScan_29 6.00 cop table:t1, index:b, range:[-inf,6], keep order:false", " └─IndexReader_57 1.04 root index:IndexScan_56", - " └─IndexScan_56 1.04 cop table:t2, index:b, range: decided by [t1.a], keep order:false", + " └─IndexScan_56 1.04 cop table:t2, index:b, range: decided by [eq(t2.b, t1.a)], keep order:false", )) // Desc TableScan. tk.MustExec("truncate table t") diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index 6c5d08e5140a5..e8076fbf96477 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -14,20 +14,25 @@ package core import ( + "bytes" + "context" "fmt" "math" "github.com/pingcap/parser/ast" "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" - "github.com/pingcap/parser/terror" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/expression/aggregation" "github.com/pingcap/tidb/planner/property" + "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/chunk" + "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/ranger" "github.com/pingcap/tidb/util/set" + "go.uber.org/zap" ) func (p *LogicalUnionScan) exhaustPhysicalPlans(prop *property.PhysicalProperty) []PhysicalPlan { @@ -317,10 +322,21 @@ func joinKeysMatchIndex(keys, indexCols []*expression.Column, colLengths []int) // When inner plan is TableReader, the parameter `ranges` will be nil. Because pk only have one column. So all of its range // is generated during execution time. -func (p *LogicalJoin) constructIndexJoin(prop *property.PhysicalProperty, innerJoinKeys, outerJoinKeys []*expression.Column, outerIdx int, - innerPlan PhysicalPlan, ranges []*ranger.Range, keyOff2IdxOff []int) []PhysicalPlan { +func (p *LogicalJoin) constructIndexJoin(prop *property.PhysicalProperty, outerIdx int, innerPlan PhysicalPlan, + ranges []*ranger.Range, keyOff2IdxOff []int, compareFilters *ColWithCmpFuncManager) []PhysicalPlan { joinType := p.JoinType outerSchema := p.children[outerIdx].Schema() + var ( + innerJoinKeys []*expression.Column + outerJoinKeys []*expression.Column + ) + if outerIdx == 0 { + outerJoinKeys = p.LeftJoinKeys + innerJoinKeys = p.RightJoinKeys + } else { + innerJoinKeys = p.LeftJoinKeys + outerJoinKeys = p.RightJoinKeys + } all, _ := prop.AllSameOrder() // If the order by columns are not all from outer child, index join cannot promise the order. if !prop.AllColsFromSchema(outerSchema) || !all { @@ -358,6 +374,7 @@ func (p *LogicalJoin) constructIndexJoin(prop *property.PhysicalProperty, innerJ innerPlan: innerPlan, KeyOff2IdxOff: newKeyOff, Ranges: ranges, + CompareFilters: compareFilters, }.Init(p.ctx, p.stats.ScaleByExpectCnt(prop.ExpectedCnt), chReqProps...) join.SetSchema(p.schema) return []PhysicalPlan{join} @@ -414,40 +431,77 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou innerPlan := p.constructInnerTableScan(ds, pkCol, outerJoinKeys, us) // Since the primary key means one value corresponding to exact one row, this will always be a no worse one // comparing to other index. - return p.constructIndexJoin(prop, innerJoinKeys, outerJoinKeys, outerIdx, innerPlan, nil, keyOff2IdxOff) + return p.constructIndexJoin(prop, outerIdx, innerPlan, nil, keyOff2IdxOff, nil) } } - var ( - bestIndexInfo *model.IndexInfo - rangesOfBest []*ranger.Range - maxUsedCols int - remainedOfBest []expression.Expression - keyOff2IdxOff []int - ) + helper := &indexJoinBuildHelper{join: p} for _, path := range ds.possibleAccessPaths { if path.isTablePath { continue } indexInfo := path.index - ranges, remained, tmpKeyOff2IdxOff := p.buildRangeForIndexJoin(indexInfo, ds, innerJoinKeys) - // We choose the index by the number of used columns of the range, the much the better. - // Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid. - // But obviously when the range is nil, we don't need index join. - if len(ranges) > 0 && len(ranges[0].LowVal) > maxUsedCols { - bestIndexInfo = indexInfo - maxUsedCols = len(ranges[0].LowVal) - rangesOfBest = ranges - remainedOfBest = remained - keyOff2IdxOff = tmpKeyOff2IdxOff - } - } - if bestIndexInfo != nil { - innerPlan := p.constructInnerIndexScan(ds, bestIndexInfo, remainedOfBest, outerJoinKeys, us) - return p.constructIndexJoin(prop, innerJoinKeys, outerJoinKeys, outerIdx, innerPlan, rangesOfBest, keyOff2IdxOff) + err := helper.analyzeLookUpFilters(indexInfo, ds, innerJoinKeys) + if err != nil { + logutil.Logger(context.Background()).Warn("build index join failed", zap.Error(err)) + } + } + if helper.chosenIndexInfo != nil { + keyOff2IdxOff := make([]int, len(innerJoinKeys)) + for i := range keyOff2IdxOff { + keyOff2IdxOff[i] = -1 + } + for idxOff, keyOff := range helper.idxOff2KeyOff { + if keyOff != -1 { + keyOff2IdxOff[keyOff] = idxOff + } + } + idxCols, _ := expression.IndexInfo2Cols(ds.schema.Columns, helper.chosenIndexInfo) + rangeInfo := helper.buildRangeDecidedByInformation(idxCols, outerJoinKeys) + innerPlan := p.constructInnerIndexScan(ds, helper.chosenIndexInfo, helper.chosenRemained, outerJoinKeys, us, rangeInfo) + return p.constructIndexJoin(prop, outerIdx, innerPlan, helper.chosenRanges, keyOff2IdxOff, helper.lastColManager) } return nil } +type indexJoinBuildHelper struct { + join *LogicalJoin + + chosenIndexInfo *model.IndexInfo + maxUsedCols int + chosenAccess []expression.Expression + chosenRemained []expression.Expression + idxOff2KeyOff []int + lastColManager *ColWithCmpFuncManager + chosenRanges []*ranger.Range + + curPossibleUsedKeys []*expression.Column + curNotUsedIndexCols []*expression.Column + curNotUsedColLens []int + curIdxOff2KeyOff []int +} + +func (ijHelper *indexJoinBuildHelper) buildRangeDecidedByInformation(idxCols []*expression.Column, outerJoinKeys []*expression.Column) string { + buffer := bytes.NewBufferString("[") + isFirst := true + for idxOff, keyOff := range ijHelper.idxOff2KeyOff { + if keyOff == -1 { + continue + } + if !isFirst { + buffer.WriteString(" ") + } else { + isFirst = false + } + buffer.WriteString(fmt.Sprintf("eq(%v, %v)", idxCols[idxOff], outerJoinKeys[keyOff])) + } + for _, access := range ijHelper.chosenAccess { + // Since now there must be eq/in condition so here we can just append space directly. + buffer.WriteString(fmt.Sprintf(" %v", access)) + } + buffer.WriteString("]") + return buffer.String() +} + // constructInnerTableScan is specially used to construct the inner plan for PhysicalIndexJoin. func (p *LogicalJoin) constructInnerTableScan(ds *DataSource, pk *expression.Column, outerJoinKeys []*expression.Column, us *LogicalUnionScan) PhysicalPlan { ranges := ranger.FullIntRange(mysql.HasUnsignedFlag(pk.RetType.Flag)) @@ -491,7 +545,8 @@ func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader Physi } // constructInnerIndexScan is specially used to construct the inner plan for PhysicalIndexJoin. -func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexInfo, remainedConds []expression.Expression, outerJoinKeys []*expression.Column, us *LogicalUnionScan) PhysicalPlan { +func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexInfo, filterConds []expression.Expression, + outerJoinKeys []*expression.Column, us *LogicalUnionScan, rangeInfo string) PhysicalPlan { is := PhysicalIndexScan{ Table: ds.tableInfo, TableAsName: ds.TableAsName, @@ -501,9 +556,8 @@ func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexIn dataSourceSchema: ds.schema, KeepOrder: false, Ranges: ranger.FullRange(), - rangeDecidedBy: outerJoinKeys, + rangeInfo: rangeInfo, }.Init(ds.ctx) - is.filterCondition = remainedConds var rowCount float64 idxHist, ok := ds.statisticTable.Indices[idx.ID] @@ -529,7 +583,7 @@ func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexIn } is.initSchema(ds.id, idx, cop.tablePlan != nil) - indexConds, tblConds := splitIndexFilterConditions(remainedConds, idx.Columns, ds.tableInfo) + indexConds, tblConds := splitIndexFilterConditions(filterConds, idx.Columns, ds.tableInfo) path := &accessPath{indexFilters: indexConds, tableFilters: tblConds, countAfterIndex: math.MaxFloat64} is.addPushedDownSelection(cop, ds, math.MaxFloat64, path) t := finishCopTask(ds.ctx, cop) @@ -537,92 +591,360 @@ func (p *LogicalJoin) constructInnerIndexScan(ds *DataSource, idx *model.IndexIn return p.constructInnerUnionScan(us, reader) } -// buildRangeForIndexJoin checks whether this index can be used for building index join and return the range if this index is ok. -// If this index is invalid, just return nil range. -func (p *LogicalJoin) buildRangeForIndexJoin(indexInfo *model.IndexInfo, innerPlan *DataSource, innerJoinKeys []*expression.Column) ( - []*ranger.Range, []expression.Expression, []int) { - idxCols, colLengths := expression.IndexInfo2Cols(innerPlan.Schema().Columns, indexInfo) - if len(idxCols) == 0 { - return nil, nil, nil - } +var symmetricOp = map[string]string{ + ast.LT: ast.GT, + ast.GE: ast.LE, + ast.GT: ast.LT, + ast.LE: ast.GE, +} - // Extract the filter to calculate access and the filters that must be remained ones. - access, eqConds, remained, keyOff2IdxOff := p.buildFakeEqCondsForIndexJoin(innerJoinKeys, idxCols, colLengths, innerPlan.pushedDownConds) +// ColWithCmpFuncManager is used in index join to handle the column with compare functions(>=, >, <, <=). +// It stores the compare functions and build ranges in execution phase. +type ColWithCmpFuncManager struct { + targetCol *expression.Column + colLength int + OpType []string + opArg []expression.Expression + tmpConstant []*expression.Constant + affectedColSchema *expression.Schema + compareFuncs []chunk.CompareFunc +} - if len(keyOff2IdxOff) == 0 { - return nil, nil, nil +func (cwc *ColWithCmpFuncManager) appendNewExpr(opName string, arg expression.Expression, affectedCols []*expression.Column) { + cwc.OpType = append(cwc.OpType, opName) + cwc.opArg = append(cwc.opArg, arg) + cwc.tmpConstant = append(cwc.tmpConstant, &expression.Constant{RetType: cwc.targetCol.RetType}) + for _, col := range affectedCols { + if cwc.affectedColSchema.Contains(col) { + continue + } + cwc.compareFuncs = append(cwc.compareFuncs, chunk.GetCompareFunc(col.RetType)) + cwc.affectedColSchema.Append(col) } +} - // In `buildFakeEqCondsForIndexJoin`, we construct the equal conditions for join keys and remove filters that contain the join keys' column. - // When t1.a = t2.a and t1.a > 1, we can also guarantee that t1.a > 1 won't be chosen as the access condition. - // So the equal conditions we built can be successfully used to build a range if they can be used. They won't be affected by the existing filters. - res, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, access, idxCols, colLengths) - if err != nil { - terror.Log(err) - return nil, nil, nil +// CompareRow compares the rows for deduplicate. +func (cwc *ColWithCmpFuncManager) CompareRow(lhs, rhs chunk.Row) int { + for i, col := range cwc.affectedColSchema.Columns { + ret := cwc.compareFuncs[i](lhs, col.Index, rhs, col.Index) + if ret != 0 { + return ret + } } + return 0 +} - // Guarantee res.AccessConds is not empty. - if len(res.AccessConds) == 0 { - return nil, nil, nil +// BuildRangesByRow will build range of the given row. It will eval each function's arg then call BuildRange. +func (cwc *ColWithCmpFuncManager) BuildRangesByRow(ctx sessionctx.Context, row chunk.Row) ([]*ranger.Range, error) { + exprs := make([]expression.Expression, len(cwc.OpType)) + for i, opType := range cwc.OpType { + constantArg, err := cwc.opArg[i].Eval(row) + if err != nil { + return nil, err + } + cwc.tmpConstant[i].Value = constantArg + newExpr, err := expression.NewFunction(ctx, opType, types.NewFieldType(mysql.TypeTiny), cwc.targetCol, cwc.tmpConstant[i]) + if err != nil { + return nil, err + } + exprs = append(exprs, newExpr) + } + ranges, err := ranger.BuildColumnRange(exprs, ctx.GetSessionVars().StmtCtx, cwc.targetCol.RetType, cwc.colLength) + if err != nil { + return nil, err } + return ranges, nil +} - // Find invalid fake condition and modify the joinKey's idxOff to -1. - var invalidFakeConds []expression.Expression - for i, eqCond := range eqConds { - if !expression.Contains(res.AccessConds, eqCond) { - keyOff2IdxOff[i] = -1 - invalidFakeConds = append(invalidFakeConds, eqCond) +func (cwc *ColWithCmpFuncManager) resolveIndices(schema *expression.Schema) (err error) { + for i := range cwc.opArg { + cwc.opArg[i], err = cwc.opArg[i].ResolveIndices(schema) + if err != nil { + return err } } + return nil +} - // Filter out invalidFakeConds from res.RemainedConds. - for _, cond := range res.RemainedConds { - if !expression.Contains(invalidFakeConds, cond) { - remained = append(remained, cond) +// String implements Stringer interface. +func (cwc *ColWithCmpFuncManager) String() string { + buffer := bytes.NewBufferString("") + for i := range cwc.OpType { + buffer.WriteString(fmt.Sprintf("%v(%v, %v)", cwc.OpType[i], cwc.targetCol, cwc.opArg[i])) + if i < len(cwc.OpType)-1 { + buffer.WriteString(" ") } } - - return res.Ranges, remained, keyOff2IdxOff + return buffer.String() } -func (p *LogicalJoin) buildFakeEqCondsForIndexJoin(keys, idxCols []*expression.Column, colLengths []int, - innerFilters []expression.Expression) (accesses, eqConds, remained []expression.Expression, keyOff2IdxOff []int) { - // Check whether all join keys match one column from index. - keyOff2IdxOff = joinKeysMatchIndex(keys, idxCols, colLengths) - if keyOff2IdxOff == nil { - return nil, nil, nil, nil +func (ijHelper *indexJoinBuildHelper) checkIndex(innerKeys []*expression.Column, idxCols []*expression.Column, colLens []int) bool { + tmpSchema := expression.NewSchema(innerKeys...) + ijHelper.curIdxOff2KeyOff = make([]int, len(idxCols)) + ijHelper.curNotUsedIndexCols = make([]*expression.Column, 0, len(idxCols)) + ijHelper.curNotUsedColLens = make([]int, 0, len(idxCols)) + keyMatched := false + for i, idxCol := range idxCols { + ijHelper.curIdxOff2KeyOff[i] = tmpSchema.ColumnIndex(idxCol) + if ijHelper.curIdxOff2KeyOff[i] >= 0 { + keyMatched = true + continue + } + ijHelper.curNotUsedIndexCols = append(ijHelper.curNotUsedIndexCols, idxCol) + ijHelper.curNotUsedColLens = append(ijHelper.curNotUsedColLens, colLens[i]) } + return keyMatched +} - usableKeys := make([]*expression.Column, 0, len(keys)) +// findUsefulEqAndInFilters analyzes the pushedDownConds held by inner child and split them to three parts. +// usefulEqOrInFilters is the continuous eq/in conditions on current unused index columns. +// uselessFilters is the conditions which cannot be used for building ranges. +// remainingRangeCandidates is the other conditions for future use. +func (ijHelper *indexJoinBuildHelper) findUsefulEqAndInFilters(innerPlan *DataSource) (usefulEqOrInFilters, uselessFilters, remainingRangeCandidates []expression.Expression) { + uselessFilters = make([]expression.Expression, 0, len(innerPlan.pushedDownConds)) + var remainedEqOrIn []expression.Expression + // Extract the eq/in functions of possible join key. + // you can see the comment of ExtractEqAndInCondition to get the meaning of the second return value. + usefulEqOrInFilters, remainedEqOrIn, remainingRangeCandidates, _ = ranger.ExtractEqAndInCondition( + innerPlan.ctx, innerPlan.pushedDownConds, + ijHelper.curNotUsedIndexCols, + ijHelper.curNotUsedColLens, + ) + uselessFilters = append(uselessFilters, remainedEqOrIn...) + return usefulEqOrInFilters, uselessFilters, remainingRangeCandidates +} - conds := make([]expression.Expression, 0, len(keys)+len(innerFilters)) - eqConds = make([]expression.Expression, 0, len(keys)) - // Construct a fake equal expression for every join key for calculating the range. - for i, key := range keys { - if keyOff2IdxOff[i] < 0 { +// buildLastColManager analyze the `OtherConditions` of join to see whether there're some filters can be used in manager. +// The returned value is just for outputting explain information +func (ijHelper *indexJoinBuildHelper) buildLastColManager(nextCol *expression.Column, + innerPlan *DataSource, cwc *ColWithCmpFuncManager) []expression.Expression { + var lastColAccesses []expression.Expression +loopOtherConds: + for _, filter := range ijHelper.join.OtherConditions { + sf, ok := filter.(*expression.ScalarFunction) + if !ok || !(sf.FuncName.L == ast.LE || sf.FuncName.L == ast.LT || sf.FuncName.L == ast.GE || sf.FuncName.L == ast.GT) { + continue + } + var funcName string + var anotherArg expression.Expression + if lCol, ok := sf.GetArgs()[0].(*expression.Column); ok && lCol.Equal(nil, nextCol) { + anotherArg = sf.GetArgs()[1] + funcName = sf.FuncName.L + } else if rCol, ok := sf.GetArgs()[1].(*expression.Column); ok && rCol.Equal(nil, nextCol) { + anotherArg = sf.GetArgs()[0] + // The column manager always build expression in the form of col op arg1. + // So we need use the symmetric one of the current function. + funcName = symmetricOp[sf.FuncName.L] + } else { continue } - usableKeys = append(usableKeys, key) - // Int datum 1 can convert to all column's type(numeric type, string type, json, time type, enum, set) safely. - fakeConstant := &expression.Constant{Value: types.NewIntDatum(1), RetType: key.GetType()} - eqFunc := expression.NewFunctionInternal(p.ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), key, fakeConstant) - conds = append(conds, eqFunc) - eqConds = append(eqConds, eqFunc) + affectedCols := expression.ExtractColumns(anotherArg) + if len(affectedCols) == 0 { + continue + } + for _, col := range affectedCols { + if innerPlan.schema.Contains(col) { + continue loopOtherConds + } + } + lastColAccesses = append(lastColAccesses, sf) + cwc.appendNewExpr(funcName, anotherArg, affectedCols) } + return lastColAccesses +} - // Look into every `innerFilter`, if it contains join keys' column, put this filter into `remained` part directly. - remained = make([]expression.Expression, 0, len(innerFilters)) - for _, filter := range innerFilters { - affectedCols := expression.ExtractColumns(filter) - if expression.ColumnSliceIsIntersect(affectedCols, usableKeys) { - remained = append(remained, filter) +// removeUselessEqAndInFunc removes the useless eq/in conditions. It's designed for the following case: +// t1 join t2 on t1.a=t2.a and t1.c=t2.c where t1.b > t2.b-10 and t1.b < t2.b+10 there's index(a, b, c) on t1. +// In this case the curIdxOff2KeyOff is [0 -1 1] and the notKeyEqAndIn is []. +// It's clearly that the column c cannot be used to access data. So we need to remove it and reset the IdxOff2KeyOff to +// [0 -1 -1]. +// So that we can use t1.a=t2.a and t1.b > t2.b-10 and t1.b < t2.b+10 to build ranges then access data. +func (ijHelper *indexJoinBuildHelper) removeUselessEqAndInFunc( + idxCols []*expression.Column, + notKeyEqAndIn []expression.Expression) ( + usefulEqAndIn, uselessOnes []expression.Expression, +) { + ijHelper.curPossibleUsedKeys = make([]*expression.Column, 0, len(idxCols)) + for idxColPos, notKeyColPos := 0, 0; idxColPos < len(idxCols); idxColPos++ { + if ijHelper.curIdxOff2KeyOff[idxColPos] != -1 { + ijHelper.curPossibleUsedKeys = append(ijHelper.curPossibleUsedKeys, idxCols[idxColPos]) + continue + } + if notKeyColPos < len(notKeyEqAndIn) && ijHelper.curNotUsedIndexCols[notKeyColPos].Equal(nil, idxCols[idxColPos]) { + notKeyColPos++ continue } - conds = append(conds, filter) + for i := idxColPos + 1; i < len(idxCols); i++ { + ijHelper.curIdxOff2KeyOff[i] = -1 + } + remained := make([]expression.Expression, 0, len(notKeyEqAndIn)-notKeyColPos) + remained = append(remained, notKeyEqAndIn[notKeyColPos:]...) + notKeyEqAndIn = notKeyEqAndIn[:notKeyColPos] + return notKeyEqAndIn, remained + } + return notKeyEqAndIn, nil +} + +func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(indexInfo *model.IndexInfo, innerPlan *DataSource, innerJoinKeys []*expression.Column) error { + idxCols, colLengths := expression.IndexInfo2Cols(innerPlan.schema.Columns, indexInfo) + if len(idxCols) == 0 { + return nil + } + accesses := make([]expression.Expression, 0, len(idxCols)) + // If no index column appears in join key, we just break. + // TODO: It may meet this case: There's no join key condition, but have compare filters. + // e.g. select * from t1, t2 on t1.a=t2.a and t2.b > t1.b-10 and t2.b < t1.b where t1.a=1 and t2.a=1. + // After constant propagation. The t1.a=t2.a is removed. And if we have index (t2.a, t2.b). It can apply index join + // to speed up. + if !ijHelper.checkIndex(innerJoinKeys, idxCols, colLengths) { + return nil + } + notKeyEqAndIn, remained, rangeFilterCandidates := ijHelper.findUsefulEqAndInFilters(innerPlan) + var remainedEqAndIn []expression.Expression + notKeyEqAndIn, remainedEqAndIn = ijHelper.removeUselessEqAndInFunc(idxCols, notKeyEqAndIn) + matchedKeyCnt := len(ijHelper.curPossibleUsedKeys) + if matchedKeyCnt <= 0 { + return nil + } + accesses = append(accesses, notKeyEqAndIn...) + remained = append(remained, remainedEqAndIn...) + lastColPos := matchedKeyCnt + len(notKeyEqAndIn) + // If all the index columns are covered by eq/in conditions, we don't need to consider other conditions anymore. + if lastColPos == len(idxCols) { + remained = append(remained, rangeFilterCandidates...) + ranges, err := ijHelper.buildTemplateRange(matchedKeyCnt, notKeyEqAndIn, nil, false) + if err != nil { + return err + } + ijHelper.updateBestChoice(ranges, indexInfo, accesses, remained, nil) + return nil + } + lastPossibleCol := idxCols[lastColPos] + lastColManager := &ColWithCmpFuncManager{ + targetCol: lastPossibleCol, + colLength: colLengths[lastColPos], + affectedColSchema: expression.NewSchema(), + } + lastColAccess := ijHelper.buildLastColManager(lastPossibleCol, innerPlan, lastColManager) + // If the column manager holds no expression, then we fallback to find whether there're useful normal filters + if len(lastColAccess) == 0 { + colAccesses, colRemained := ranger.DetachCondsForColumn(ijHelper.join.ctx, rangeFilterCandidates, lastPossibleCol) + var ranges, nextColRange []*ranger.Range + var err error + if len(colAccesses) > 0 { + nextColRange, err = ranger.BuildColumnRange(colAccesses, ijHelper.join.ctx.GetSessionVars().StmtCtx, lastPossibleCol.RetType, colLengths[lastColPos]) + if err != nil { + return err + } + } + ranges, err = ijHelper.buildTemplateRange(matchedKeyCnt, notKeyEqAndIn, nextColRange, false) + if err != nil { + return err + } + remained = append(remained, colRemained...) + if colLengths[lastColPos] != types.UnspecifiedLength { + remained = append(remained, colAccesses...) + } + accesses = append(accesses, colAccesses...) + ijHelper.updateBestChoice(ranges, indexInfo, accesses, remained, nil) + return nil } + accesses = append(accesses, lastColAccess...) + remained = append(remained, rangeFilterCandidates...) + ranges, err := ijHelper.buildTemplateRange(matchedKeyCnt, notKeyEqAndIn, nil, true) + if err != nil { + return err + } + ijHelper.updateBestChoice(ranges, indexInfo, accesses, remained, lastColManager) + return nil +} - return conds, eqConds, remained, keyOff2IdxOff +func (ijHelper *indexJoinBuildHelper) updateBestChoice(ranges []*ranger.Range, idxInfo *model.IndexInfo, accesses, + remained []expression.Expression, lastColManager *ColWithCmpFuncManager) { + // We choose the index by the number of used columns of the range, the much the better. + // Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid. + // But obviously when the range is nil, we don't need index join. + if len(ranges) > 0 && len(ranges[0].LowVal) > ijHelper.maxUsedCols { + ijHelper.chosenIndexInfo = idxInfo + ijHelper.maxUsedCols = len(ranges[0].LowVal) + ijHelper.chosenRanges = ranges + ijHelper.chosenAccess = accesses + ijHelper.chosenRemained = remained + ijHelper.idxOff2KeyOff = ijHelper.curIdxOff2KeyOff + ijHelper.lastColManager = lastColManager + } +} + +func (ijHelper *indexJoinBuildHelper) buildTemplateRange(matchedKeyCnt int, eqAndInFuncs []expression.Expression, nextColRange []*ranger.Range, haveExtraCol bool) (ranges []*ranger.Range, err error) { + pointLength := matchedKeyCnt + len(eqAndInFuncs) + if nextColRange != nil { + for _, colRan := range nextColRange { + // The range's exclude status is the same with last col's. + ran := &ranger.Range{ + LowVal: make([]types.Datum, pointLength, pointLength+1), + HighVal: make([]types.Datum, pointLength, pointLength+1), + LowExclude: colRan.LowExclude, + HighExclude: colRan.HighExclude, + } + ran.LowVal = append(ran.LowVal, colRan.LowVal[0]) + ran.HighVal = append(ran.HighVal, colRan.HighVal[0]) + ranges = append(ranges, ran) + } + } else if haveExtraCol { + // Reserve a position for the last col. + ranges = append(ranges, &ranger.Range{ + LowVal: make([]types.Datum, pointLength+1, pointLength+1), + HighVal: make([]types.Datum, pointLength+1, pointLength+1), + }) + } else { + ranges = append(ranges, &ranger.Range{ + LowVal: make([]types.Datum, pointLength, pointLength), + HighVal: make([]types.Datum, pointLength, pointLength), + }) + } + emptyRow := chunk.Row{} + for i, j := 0, 0; j < len(eqAndInFuncs); i++ { + // This position is occupied by join key. + if ijHelper.curIdxOff2KeyOff[i] != -1 { + continue + } + sf := eqAndInFuncs[j].(*expression.ScalarFunction) + // Deal with the first two args. + if _, ok := sf.GetArgs()[0].(*expression.Column); ok { + for _, ran := range ranges { + ran.LowVal[i], err = sf.GetArgs()[1].Eval(emptyRow) + if err != nil { + return nil, err + } + ran.HighVal[i] = ran.LowVal[i] + } + } else { + for _, ran := range ranges { + ran.LowVal[i], err = sf.GetArgs()[0].Eval(emptyRow) + if err != nil { + return nil, err + } + ran.HighVal[i] = ran.LowVal[i] + } + } + // If the length of in function's constant list is more than one, we will expand ranges. + curRangeLen := len(ranges) + for argIdx := 2; argIdx < len(sf.GetArgs()); argIdx++ { + newRanges := make([]*ranger.Range, 0, curRangeLen) + for oldRangeIdx := 0; oldRangeIdx < curRangeLen; oldRangeIdx++ { + newRange := ranges[oldRangeIdx].Clone() + newRange.LowVal[i], err = sf.GetArgs()[argIdx].Eval(emptyRow) + if err != nil { + return nil, err + } + newRange.HighVal[i] = newRange.LowVal[i] + newRanges = append(newRanges, newRange) + } + ranges = append(ranges, newRanges...) + } + j++ + } + return ranges, nil } // tryToGetIndexJoin will get index join by hints. If we can generate a valid index join by hint, the second return value @@ -651,10 +973,6 @@ func (p *LogicalJoin) tryToGetIndexJoin(prop *property.PhysicalProperty) (indexJ } }() - if len(p.EqualConditions) == 0 { - return nil, false - } - switch p.JoinType { case SemiJoin, AntiSemiJoin, LeftOuterSemiJoin, AntiLeftOuterSemiJoin, LeftOuterJoin: join := p.getIndexJoinByOuterIdx(prop, 0) diff --git a/planner/core/exhaust_physical_plans_test.go b/planner/core/exhaust_physical_plans_test.go new file mode 100644 index 0000000000000..a7cf2a21a9a1f --- /dev/null +++ b/planner/core/exhaust_physical_plans_test.go @@ -0,0 +1,238 @@ +// Copyright 2018 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package core + +import ( + "fmt" + + . "github.com/pingcap/check" + "github.com/pingcap/parser/ast" + "github.com/pingcap/parser/model" + "github.com/pingcap/parser/mysql" + "github.com/pingcap/tidb/expression" + "github.com/pingcap/tidb/types" +) + +func (s *testUnitTestSuit) rewriteSimpleExpr(str string, schema *expression.Schema) ([]expression.Expression, error) { + if str == "" { + return nil, nil + } + filters, err := expression.ParseSimpleExprsWithSchema(s.ctx, str, schema) + if err != nil { + return nil, err + } + if sf, ok := filters[0].(*expression.ScalarFunction); ok && sf.FuncName.L == ast.LogicAnd { + filters = expression.FlattenCNFConditions(sf) + } + return filters, nil +} + +func (s *testUnitTestSuit) TestIndexJoinAnalyzeLookUpFilters(c *C) { + s.ctx.GetSessionVars().PlanID = -1 + joinNode := LogicalJoin{}.Init(s.ctx) + dataSourceNode := DataSource{}.Init(s.ctx) + dsSchema := expression.NewSchema() + dsSchema.Append(&expression.Column{ + UniqueID: s.ctx.GetSessionVars().AllocPlanColumnID(), + ColName: model.NewCIStr("a"), + TblName: model.NewCIStr("t"), + DBName: model.NewCIStr("test"), + RetType: types.NewFieldType(mysql.TypeLonglong), + }) + dsSchema.Append(&expression.Column{ + UniqueID: s.ctx.GetSessionVars().AllocPlanColumnID(), + ColName: model.NewCIStr("b"), + TblName: model.NewCIStr("t"), + DBName: model.NewCIStr("test"), + RetType: types.NewFieldType(mysql.TypeLonglong), + }) + dsSchema.Append(&expression.Column{ + UniqueID: s.ctx.GetSessionVars().AllocPlanColumnID(), + ColName: model.NewCIStr("c"), + TblName: model.NewCIStr("t"), + DBName: model.NewCIStr("test"), + RetType: types.NewFieldType(mysql.TypeVarchar), + }) + dsSchema.Append(&expression.Column{ + UniqueID: s.ctx.GetSessionVars().AllocPlanColumnID(), + ColName: model.NewCIStr("d"), + TblName: model.NewCIStr("t"), + DBName: model.NewCIStr("test"), + RetType: types.NewFieldType(mysql.TypeLonglong), + }) + dataSourceNode.schema = dsSchema + outerChildSchema := expression.NewSchema() + outerChildSchema.Append(&expression.Column{ + UniqueID: s.ctx.GetSessionVars().AllocPlanColumnID(), + ColName: model.NewCIStr("e"), + TblName: model.NewCIStr("t1"), + DBName: model.NewCIStr("test"), + RetType: types.NewFieldType(mysql.TypeLonglong), + }) + outerChildSchema.Append(&expression.Column{ + UniqueID: s.ctx.GetSessionVars().AllocPlanColumnID(), + ColName: model.NewCIStr("f"), + TblName: model.NewCIStr("t1"), + DBName: model.NewCIStr("test"), + RetType: types.NewFieldType(mysql.TypeLonglong), + }) + outerChildSchema.Append(&expression.Column{ + UniqueID: s.ctx.GetSessionVars().AllocPlanColumnID(), + ColName: model.NewCIStr("g"), + TblName: model.NewCIStr("t1"), + DBName: model.NewCIStr("test"), + RetType: types.NewFieldType(mysql.TypeVarchar), + }) + outerChildSchema.Append(&expression.Column{ + UniqueID: s.ctx.GetSessionVars().AllocPlanColumnID(), + ColName: model.NewCIStr("h"), + TblName: model.NewCIStr("t1"), + DBName: model.NewCIStr("test"), + RetType: types.NewFieldType(mysql.TypeLonglong), + }) + joinNode.SetSchema(expression.MergeSchema(dsSchema, outerChildSchema)) + var idxCols []*model.IndexColumn + idxCols = append(idxCols, &model.IndexColumn{Name: model.NewCIStr("a"), Length: types.UnspecifiedLength}) + idxCols = append(idxCols, &model.IndexColumn{Name: model.NewCIStr("b"), Length: types.UnspecifiedLength}) + idxCols = append(idxCols, &model.IndexColumn{Name: model.NewCIStr("c"), Length: 2}) + idxCols = append(idxCols, &model.IndexColumn{Name: model.NewCIStr("d"), Length: types.UnspecifiedLength}) + idxInfo := &model.IndexInfo{Columns: idxCols} + + tests := []struct { + innerKeys []*expression.Column + pushedDownConds string + otherConds string + ranges string + idxOff2KeyOff string + accesses string + remained string + compareFilters string + }{ + // Join key not continuous and no pushed filter to match. + { + innerKeys: []*expression.Column{dsSchema.Columns[0], dsSchema.Columns[2]}, + pushedDownConds: "", + otherConds: "", + ranges: "[[NULL,NULL]]", + idxOff2KeyOff: "[0 -1 -1 -1]", + accesses: "[]", + remained: "[]", + compareFilters: "", + }, + // Join key and pushed eq filter not continuous. + { + innerKeys: []*expression.Column{dsSchema.Columns[2]}, + pushedDownConds: "a = 1", + otherConds: "", + ranges: "[]", + idxOff2KeyOff: "[]", + accesses: "[]", + remained: "[]", + compareFilters: "", + }, + // Keys are continuous. + { + innerKeys: []*expression.Column{dsSchema.Columns[1]}, + pushedDownConds: "a = 1", + otherConds: "", + ranges: "[[1 NULL,1 NULL]]", + idxOff2KeyOff: "[-1 0 -1 -1]", + accesses: "[eq(test.t.a, 1)]", + remained: "[]", + compareFilters: "", + }, + // Keys are continuous and there're correlated filters. + { + innerKeys: []*expression.Column{dsSchema.Columns[1]}, + pushedDownConds: "a = 1", + otherConds: "c > g and c < concat(g, \"ab\")", + ranges: "[[1 NULL NULL,1 NULL NULL]]", + idxOff2KeyOff: "[-1 0 -1 -1]", + accesses: "[eq(test.t.a, 1) gt(test.t.c, test.t1.g) lt(test.t.c, concat(test.t1.g, ab))]", + remained: "[]", + compareFilters: "gt(test.t.c, test.t1.g) lt(test.t.c, concat(test.t1.g, ab))", + }, + // cast function won't be involved. + { + innerKeys: []*expression.Column{dsSchema.Columns[1]}, + pushedDownConds: "a = 1", + otherConds: "c > g and c < g + 10", + ranges: "[[1 NULL NULL,1 NULL NULL]]", + idxOff2KeyOff: "[-1 0 -1 -1]", + accesses: "[eq(test.t.a, 1) gt(test.t.c, test.t1.g)]", + remained: "[]", + compareFilters: "gt(test.t.c, test.t1.g)", + }, + // Can deal with prefix index correctly. + { + innerKeys: []*expression.Column{dsSchema.Columns[1]}, + pushedDownConds: "a = 1 and c > 'a' and c < 'aaaaaa'", + otherConds: "", + ranges: "[(1 NULL \"a\",1 NULL \"[97 97]\"]]", + idxOff2KeyOff: "[-1 0 -1 -1]", + accesses: "[eq(test.t.a, 1) gt(test.t.c, a) lt(test.t.c, aaaaaa)]", + remained: "[gt(test.t.c, a) lt(test.t.c, aaaaaa)]", + compareFilters: "", + }, + // Can generate correct ranges for in functions. + { + innerKeys: []*expression.Column{dsSchema.Columns[1]}, + pushedDownConds: "a in (1, 2, 3) and c in ('a', 'b', 'c')", + otherConds: "", + ranges: "[[1 NULL \"a\",1 NULL \"a\"] [2 NULL \"a\",2 NULL \"a\"] [3 NULL \"a\",3 NULL \"a\"] [1 NULL \"b\",1 NULL \"b\"] [2 NULL \"b\",2 NULL \"b\"] [3 NULL \"b\",3 NULL \"b\"] [1 NULL \"c\",1 NULL \"c\"] [2 NULL \"c\",2 NULL \"c\"] [3 NULL \"c\",3 NULL \"c\"]]", + idxOff2KeyOff: "[-1 0 -1 -1]", + accesses: "[in(test.t.a, 1, 2, 3) in(test.t.c, a, b, c)]", + remained: "[in(test.t.c, a, b, c)]", + compareFilters: "", + }, + // Can generate correct ranges for in functions with correlated filters.. + { + innerKeys: []*expression.Column{dsSchema.Columns[1]}, + pushedDownConds: "a in (1, 2, 3) and c in ('a', 'b', 'c')", + otherConds: "d > h and d < h + 100", + ranges: "[[1 NULL \"a\" NULL,1 NULL \"a\" NULL] [2 NULL \"a\" NULL,2 NULL \"a\" NULL] [3 NULL \"a\" NULL,3 NULL \"a\" NULL] [1 NULL \"b\" NULL,1 NULL \"b\" NULL] [2 NULL \"b\" NULL,2 NULL \"b\" NULL] [3 NULL \"b\" NULL,3 NULL \"b\" NULL] [1 NULL \"c\" NULL,1 NULL \"c\" NULL] [2 NULL \"c\" NULL,2 NULL \"c\" NULL] [3 NULL \"c\" NULL,3 NULL \"c\" NULL]]", + idxOff2KeyOff: "[-1 0 -1 -1]", + accesses: "[in(test.t.a, 1, 2, 3) in(test.t.c, a, b, c) gt(test.t.d, test.t1.h) lt(test.t.d, plus(test.t1.h, 100))]", + remained: "[in(test.t.c, a, b, c)]", + compareFilters: "gt(test.t.d, test.t1.h) lt(test.t.d, plus(test.t1.h, 100))", + }, + // Join keys are not continuous and the pushed key connect the key but not eq/in functions. + { + innerKeys: []*expression.Column{dsSchema.Columns[0], dsSchema.Columns[2]}, + pushedDownConds: "b > 1", + otherConds: "", + ranges: "[(NULL 1,NULL +inf]]", + idxOff2KeyOff: "[0 -1 -1 -1]", + accesses: "[gt(test.t.b, 1)]", + remained: "[]", + compareFilters: "", + }, + } + for i, tt := range tests { + pushed, err := s.rewriteSimpleExpr(tt.pushedDownConds, dsSchema) + c.Assert(err, IsNil) + dataSourceNode.pushedDownConds = pushed + others, err := s.rewriteSimpleExpr(tt.otherConds, joinNode.schema) + c.Assert(err, IsNil) + joinNode.OtherConditions = others + helper := &indexJoinBuildHelper{join: joinNode, lastColManager: nil} + err = helper.analyzeLookUpFilters(idxInfo, dataSourceNode, tt.innerKeys) + c.Assert(err, IsNil) + c.Assert(fmt.Sprintf("%v", helper.chosenRanges), Equals, tt.ranges, Commentf("test case: #%v", i)) + c.Assert(fmt.Sprintf("%v", helper.idxOff2KeyOff), Equals, tt.idxOff2KeyOff) + c.Assert(fmt.Sprintf("%v", helper.chosenAccess), Equals, tt.accesses) + c.Assert(fmt.Sprintf("%v", helper.chosenRemained), Equals, tt.remained) + c.Assert(fmt.Sprintf("%v", helper.lastColManager), Equals, tt.compareFilters) + } +} diff --git a/planner/core/explain.go b/planner/core/explain.go index 18fe8f8f998df..ad801dbaecd8f 100644 --- a/planner/core/explain.go +++ b/planner/core/explain.go @@ -58,8 +58,8 @@ func (p *PhysicalIndexScan) ExplainInfo() string { break } } - if len(p.rangeDecidedBy) > 0 { - fmt.Fprintf(buffer, ", range: decided by %v", p.rangeDecidedBy) + if len(p.rangeInfo) > 0 { + fmt.Fprintf(buffer, ", range: decided by %v", p.rangeInfo) } else if haveCorCol { fmt.Fprintf(buffer, ", range: decided by %v", p.AccessCondition) } else if len(p.Ranges) > 0 { diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index 80df8bcf548c8..b71546145175b 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -477,7 +477,6 @@ func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty, candid IdxColLens: path.idxColLens, AccessCondition: path.accessConds, Ranges: path.ranges, - filterCondition: path.indexFilters, dataSourceSchema: ds.schema, isPartition: ds.isPartition, physicalTableID: ds.physicalTableID, @@ -734,7 +733,7 @@ func (ds *DataSource) crossEstimateRowCount(path *accessPath, expectedCnt float6 return 0, false, corr } sc := ds.ctx.GetSessionVars().StmtCtx - ranges, err := ranger.BuildColumnRange(accessConds, sc, col.RetType) + ranges, err := ranger.BuildColumnRange(accessConds, sc, col.RetType, types.UnspecifiedLength) if len(ranges) == 0 || err != nil { return 0, err == nil, corr } diff --git a/planner/core/physical_plans.go b/planner/core/physical_plans.go index bfa7598adf72a..81529c780bbd4 100644 --- a/planner/core/physical_plans.go +++ b/planner/core/physical_plans.go @@ -90,7 +90,6 @@ type PhysicalIndexScan struct { // AccessCondition is used to calculate range. AccessCondition []expression.Expression - filterCondition []expression.Expression Table *model.TableInfo Index *model.IndexInfo @@ -115,7 +114,7 @@ type PhysicalIndexScan struct { // It is used for query feedback. Hist *statistics.Histogram - rangeDecidedBy []*expression.Column + rangeInfo string // The index scan may be on a partition. isPartition bool @@ -235,6 +234,12 @@ type PhysicalIndexJoin struct { Ranges []*ranger.Range // KeyOff2IdxOff maps the offsets in join key to the offsets in the index. KeyOff2IdxOff []int + // CompareFilters stores the filters for last column if those filters need to be evaluated during execution. + // e.g. select * from t where t.a = t1.a and t.b > t1.b and t.b < t1.b+10 + // If there's index(t.a, t.b). All the filters can be used to construct index range but t.b > t1.b and t.b < t1.b=10 + // need to be evaluated after we fetch the data of t1. + // This struct stores them and evaluate them to ranges. + CompareFilters *ColWithCmpFuncManager } // PhysicalMergeJoin represents merge join for inner/ outer join. diff --git a/planner/core/resolve_indices.go b/planner/core/resolve_indices.go index 3cabf66819691..ec415eaf9bef6 100644 --- a/planner/core/resolve_indices.go +++ b/planner/core/resolve_indices.go @@ -193,6 +193,19 @@ func (p *PhysicalIndexJoin) ResolveIndices() (err error) { return err } } + if p.CompareFilters != nil { + err = p.CompareFilters.resolveIndices(p.children[p.OuterIndex].Schema()) + if err != nil { + return err + } + for i := range p.CompareFilters.affectedColSchema.Columns { + resolvedCol, err1 := p.CompareFilters.affectedColSchema.Columns[i].ResolveIndices(p.children[p.OuterIndex].Schema()) + if err1 != nil { + return err1 + } + p.CompareFilters.affectedColSchema.Columns[i] = resolvedCol.(*expression.Column) + } + } return } diff --git a/planner/core/rule_partition_processor.go b/planner/core/rule_partition_processor.go index c5d7ec75cb851..b3a6200151ce8 100644 --- a/planner/core/rule_partition_processor.go +++ b/planner/core/rule_partition_processor.go @@ -18,6 +18,7 @@ import ( "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/table/tables" + "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" "github.com/pingcap/tidb/util/ranger" ) @@ -186,7 +187,7 @@ func (s *partitionProcessor) canBePruned(sctx sessionctx.Context, partCol *expre // handle the null condition, while calculate range can prune something like: // "select * from t where t is null" accessConds := ranger.ExtractAccessConditionsForColumn(conds, partCol.UniqueID) - r, err := ranger.BuildColumnRange(accessConds, sctx.GetSessionVars().StmtCtx, partCol.RetType) + r, err := ranger.BuildColumnRange(accessConds, sctx.GetSessionVars().StmtCtx, partCol.RetType, types.UnspecifiedLength) if err != nil { return false, err } diff --git a/statistics/selectivity.go b/statistics/selectivity.go index 60cd6d80bff98..fef5c54907d25 100644 --- a/statistics/selectivity.go +++ b/statistics/selectivity.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/ranger" ) @@ -266,7 +267,7 @@ func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, ran switch rangeType { case ranger.ColumnRangeType: accessConds = ranger.ExtractAccessConditionsForColumn(exprs, cols[0].UniqueID) - ranges, err = ranger.BuildColumnRange(accessConds, sc, cols[0].RetType) + ranges, err = ranger.BuildColumnRange(accessConds, sc, cols[0].RetType, types.UnspecifiedLength) case ranger.IndexRangeType: var res *ranger.DetachRangeResult res, err = ranger.DetachCondAndBuildRangeForIndex(ctx, exprs, cols, lengths) diff --git a/util/ranger/detacher.go b/util/ranger/detacher.go index 6aabbfda3f841..671ea328a0c5b 100644 --- a/util/ranger/detacher.go +++ b/util/ranger/detacher.go @@ -146,7 +146,7 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex ) res := &DetachRangeResult{} - accessConds, filterConds, newConditions, emptyRange := extractEqAndInCondition(sctx, conditions, cols, lengths) + accessConds, filterConds, newConditions, emptyRange := ExtractEqAndInCondition(sctx, conditions, cols, lengths) if emptyRange { return res, nil } @@ -156,8 +156,6 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex break } } - // We should remove all accessConds, so that they will not be added to filter conditions. - newConditions = removeAccessConditions(newConditions, accessConds) eqOrInCount := len(accessConds) res.EqCondCount = eqCount res.EqOrInCount = eqOrInCount @@ -197,7 +195,13 @@ func detachCNFCondAndBuildRangeForIndex(sctx sessionctx.Context, conditions []ex return res, err } -func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, +// ExtractEqAndInCondition will split the given condition into three parts by the information of index columns and their lengths. +// accesses: The condition will be used to build range. +// filters: filters is the part that some access conditions need to be evaluate again since it's only the prefix part of char column. +// newConditions: We'll simplify the given conditions if there're multiple in conditions or eq conditions on the same column. +// e.g. if there're a in (1, 2, 3) and a in (2, 3, 4). This two will be combined to a in (2, 3) and pushed to newConditions. +// bool: indicate whether there's nil range when merging eq and in conditions. +func ExtractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Expression, cols []*expression.Column, lengths []int) ([]expression.Expression, []expression.Expression, []expression.Expression, bool) { var filters []expression.Expression rb := builder{sc: sctx.GetSessionVars().StmtCtx} @@ -246,6 +250,8 @@ func extractEqAndInCondition(sctx sessionctx.Context, conditions []expression.Ex filters = append(filters, cond) } } + // We should remove all accessConds, so that they will not be added to filter conditions. + newConditions = removeAccessConditions(newConditions, accesses) return accesses, filters, newConditions, false } diff --git a/util/ranger/ranger.go b/util/ranger/ranger.go index 5722c7a17e43e..015f6ea447eb7 100644 --- a/util/ranger/ranger.go +++ b/util/ranger/ranger.go @@ -243,7 +243,7 @@ func points2TableRanges(sc *stmtctx.StatementContext, rangePoints []point, tp *t } // buildColumnRange builds range from CNF conditions. -func buildColumnRange(accessConditions []expression.Expression, sc *stmtctx.StatementContext, tp *types.FieldType, tableRange bool) (ranges []*Range, err error) { +func buildColumnRange(accessConditions []expression.Expression, sc *stmtctx.StatementContext, tp *types.FieldType, tableRange bool, colLen int) (ranges []*Range, err error) { rb := builder{sc: sc} rangePoints := fullRange for _, cond := range accessConditions { @@ -261,20 +261,30 @@ func buildColumnRange(accessConditions []expression.Expression, sc *stmtctx.Stat if err != nil { return nil, errors.Trace(err) } + if colLen != types.UnspecifiedLength { + for _, ran := range ranges { + if fixRangeDatum(&ran.LowVal[0], colLen, tp) { + ran.LowExclude = false + } + if fixRangeDatum(&ran.HighVal[0], colLen, tp) { + ran.HighExclude = false + } + } + } return ranges, nil } // BuildTableRange builds range of PK column for PhysicalTableScan. func BuildTableRange(accessConditions []expression.Expression, sc *stmtctx.StatementContext, tp *types.FieldType) ([]*Range, error) { - return buildColumnRange(accessConditions, sc, tp, true) + return buildColumnRange(accessConditions, sc, tp, true, types.UnspecifiedLength) } // BuildColumnRange builds range from access conditions for general columns. -func BuildColumnRange(conds []expression.Expression, sc *stmtctx.StatementContext, tp *types.FieldType) ([]*Range, error) { +func BuildColumnRange(conds []expression.Expression, sc *stmtctx.StatementContext, tp *types.FieldType, colLen int) ([]*Range, error) { if len(conds) == 0 { return []*Range{{LowVal: []types.Datum{{}}, HighVal: []types.Datum{types.MaxValueDatum()}}}, nil } - return buildColumnRange(conds, sc, tp, false) + return buildColumnRange(conds, sc, tp, false, colLen) } // buildCNFIndexRange builds the range for index where the top layer is CNF. @@ -485,7 +495,7 @@ func newFieldType(tp *types.FieldType) *types.FieldType { // 1. 'expr' must be either 'EQUAL' or 'IN' function. // 2. 'points' should not be empty. func points2EqOrInCond(ctx sessionctx.Context, points []point, expr expression.Expression) expression.Expression { - // len(points) cannot be 0 here, since we impose early termination in extractEqAndInCondition + // len(points) cannot be 0 here, since we impose early termination in ExtractEqAndInCondition sf, _ := expr.(*expression.ScalarFunction) // Constant and Column args should have same RetType, simply get from first arg retType := sf.GetArgs()[0].GetType() diff --git a/util/ranger/ranger_test.go b/util/ranger/ranger_test.go index c967f70178f62..5d17fdb91e31e 100644 --- a/util/ranger/ranger_test.go +++ b/util/ranger/ranger_test.go @@ -29,6 +29,7 @@ import ( "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/store/mockstore" "github.com/pingcap/tidb/store/mockstore/mocktikv" + "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/ranger" "github.com/pingcap/tidb/util/testkit" "github.com/pingcap/tidb/util/testleak" @@ -956,7 +957,7 @@ func (s *testRangerSuite) TestColumnRange(c *C) { c.Assert(col, NotNil) conds = ranger.ExtractAccessConditionsForColumn(conds, col.UniqueID) c.Assert(fmt.Sprintf("%s", conds), Equals, tt.accessConds, Commentf("wrong access conditions for expr: %s", tt.exprStr)) - result, err := ranger.BuildColumnRange(conds, new(stmtctx.StatementContext), col.RetType) + result, err := ranger.BuildColumnRange(conds, new(stmtctx.StatementContext), col.RetType, types.UnspecifiedLength) c.Assert(err, IsNil) got := fmt.Sprintf("%v", result) c.Assert(got, Equals, tt.resultStr, Commentf("different for expr %s, col: %v", tt.exprStr, col))