Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plan: build anti semi join for NOT EXISTS #7842

Merged
merged 9 commits into from
Oct 9, 2018
Merged
2 changes: 2 additions & 0 deletions ast/expressions.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,8 @@ type ExistsSubqueryExpr struct {
exprNode
// Sel is the subquery, may be rewritten to other type of expression.
Sel ExprNode
// Not is true, the expression is "not exists".
Not bool
}

// Format the ExprNode into a Writer.
Expand Down
7 changes: 7 additions & 0 deletions cmd/explaintest/r/explain_easy.result
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,13 @@ Projection_7 12.50 root t1.a, t1.b, t2.a, t2.b
│ └─TableScan_10 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
└─TableReader_14 10000.00 root data:TableScan_13
└─TableScan_13 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
explain select * from t t1 where not exists (select * from t t2 where t1.b = t2.b);
id count task operator info
HashLeftJoin_9 8000.00 root anti semi join, inner:TableReader_13, equal:[eq(t1.b, t2.b)]
├─TableReader_11 10000.00 root data:TableScan_10
│ └─TableScan_10 10000.00 cop table:t1, range:[-inf,+inf], keep order:false, stats:pseudo
└─TableReader_13 10000.00 root data:TableScan_12
└─TableScan_12 10000.00 cop table:t2, range:[-inf,+inf], keep order:false, stats:pseudo
drop table if exists t;
create table t(a bigint primary key);
explain select * from t where a = 1 and a = 2;
Expand Down
64 changes: 31 additions & 33 deletions cmd/explaintest/r/tpch.result
Original file line number Diff line number Diff line change
Expand Up @@ -1215,31 +1215,30 @@ id count task operator info
Projection_25 100.00 root tpch.supplier.s_name, 17_col_0
└─TopN_28 100.00 root 17_col_0:desc, tpch.supplier.s_name:asc, offset:0, count:100
└─HashAgg_31 320000.00 root group by:tpch.supplier.s_name, funcs:count(1), firstrow(tpch.supplier.s_name)
└─Selection_32 3786715.90 root not(16_aux_0)
└─IndexJoin_38 4733394.87 root left outer semi join, inner:IndexLookUp_37, outer key:l1.l_orderkey, inner key:l3.l_orderkey, other cond:ne(l3.l_suppkey, l1.l_suppkey)
├─IndexJoin_82 4733394.87 root semi join, inner:IndexLookUp_81, outer key:l1.l_orderkey, inner key:l2.l_orderkey, other cond:ne(l2.l_suppkey, l1.l_suppkey), ne(l2.l_suppkey, tpch.supplier.s_suppkey)
│ ├─HashLeftJoin_88 5916743.59 root inner join, inner:TableReader_117, equal:[eq(tpch.supplier.s_nationkey, tpch.nation.n_nationkey)]
│ │ ├─HashLeftJoin_93 147918589.81 root inner join, inner:TableReader_114, equal:[eq(l1.l_suppkey, tpch.supplier.s_suppkey)]
│ │ │ ├─IndexJoin_100 147918589.81 root inner join, inner:IndexLookUp_99, outer key:tpch.orders.o_orderkey, inner key:l1.l_orderkey
│ │ │ │ ├─TableReader_109 36517371.00 root data:Selection_108
│ │ │ │ │ └─Selection_108 36517371.00 cop eq(tpch.orders.o_orderstatus, "F")
│ │ │ │ │ └─TableScan_107 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
│ │ │ │ └─IndexLookUp_99 240004648.80 root
│ │ │ │ ├─IndexScan_96 1.00 cop table:l1, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false
│ │ │ │ └─Selection_98 240004648.80 cop gt(l1.l_receiptdate, l1.l_commitdate)
│ │ │ │ └─TableScan_97 1.00 cop table:lineitem, keep order:false
│ │ │ └─TableReader_114 500000.00 root data:TableScan_113
│ │ │ └─TableScan_113 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false
│ │ └─TableReader_117 1.00 root data:Selection_116
│ │ └─Selection_116 1.00 cop eq(tpch.nation.n_name, "EGYPT")
│ │ └─TableScan_115 25.00 cop table:nation, range:[-inf,+inf], keep order:false
│ └─IndexLookUp_81 1.00 root
│ ├─IndexScan_79 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [l1.l_orderkey], keep order:false
│ └─TableScan_80 1.00 cop table:lineitem, keep order:false
└─IndexLookUp_37 240004648.80 root
├─IndexScan_34 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [l1.l_orderkey], keep order:false
└─Selection_36 240004648.80 cop gt(l3.l_receiptdate, l3.l_commitdate)
└─TableScan_35 1.00 cop table:lineitem, keep order:false
└─IndexJoin_37 3786715.90 root anti semi join, inner:IndexLookUp_36, outer key:l1.l_orderkey, inner key:l3.l_orderkey, other cond:ne(l3.l_suppkey, l1.l_suppkey), ne(l3.l_suppkey, tpch.supplier.s_suppkey)
├─IndexJoin_81 4733394.87 root semi join, inner:IndexLookUp_80, outer key:l1.l_orderkey, inner key:l2.l_orderkey, other cond:ne(l2.l_suppkey, l1.l_suppkey), ne(l2.l_suppkey, tpch.supplier.s_suppkey)
│ ├─HashLeftJoin_87 5916743.59 root inner join, inner:TableReader_116, equal:[eq(tpch.supplier.s_nationkey, tpch.nation.n_nationkey)]
│ │ ├─HashLeftJoin_92 147918589.81 root inner join, inner:TableReader_113, equal:[eq(l1.l_suppkey, tpch.supplier.s_suppkey)]
│ │ │ ├─IndexJoin_99 147918589.81 root inner join, inner:IndexLookUp_98, outer key:tpch.orders.o_orderkey, inner key:l1.l_orderkey
│ │ │ │ ├─TableReader_108 36517371.00 root data:Selection_107
│ │ │ │ │ └─Selection_107 36517371.00 cop eq(tpch.orders.o_orderstatus, "F")
│ │ │ │ │ └─TableScan_106 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
│ │ │ │ └─IndexLookUp_98 240004648.80 root
│ │ │ │ ├─IndexScan_95 1.00 cop table:l1, index:L_ORDERKEY, L_LINENUMBER, range: decided by [tpch.orders.o_orderkey], keep order:false
│ │ │ │ └─Selection_97 240004648.80 cop gt(l1.l_receiptdate, l1.l_commitdate)
│ │ │ │ └─TableScan_96 1.00 cop table:lineitem, keep order:false
│ │ │ └─TableReader_113 500000.00 root data:TableScan_112
│ │ │ └─TableScan_112 500000.00 cop table:supplier, range:[-inf,+inf], keep order:false
│ │ └─TableReader_116 1.00 root data:Selection_115
│ │ └─Selection_115 1.00 cop eq(tpch.nation.n_name, "EGYPT")
│ │ └─TableScan_114 25.00 cop table:nation, range:[-inf,+inf], keep order:false
│ └─IndexLookUp_80 1.00 root
│ ├─IndexScan_78 1.00 cop table:l2, index:L_ORDERKEY, L_LINENUMBER, range: decided by [l1.l_orderkey], keep order:false
│ └─TableScan_79 1.00 cop table:lineitem, keep order:false
└─IndexLookUp_36 240004648.80 root
├─IndexScan_33 1.00 cop table:l3, index:L_ORDERKEY, L_LINENUMBER, range: decided by [l1.l_orderkey], keep order:false
└─Selection_35 240004648.80 cop gt(l3.l_receiptdate, l3.l_commitdate)
└─TableScan_34 1.00 cop table:lineitem, keep order:false
/*
Q22 Global Sales Opportunity Query
The Global Sales Opportunity Query identifies geographies where there are customers who may be likely to make a
Expand Down Expand Up @@ -1291,11 +1290,10 @@ Sort_32 1.00 root custsale.cntrycode:asc
└─Projection_34 1.00 root custsale.cntrycode, 28_col_0, 28_col_1
└─HashAgg_37 1.00 root group by:custsale.cntrycode, funcs:count(1), sum(custsale.c_acctbal), firstrow(custsale.cntrycode)
└─Projection_38 0.00 root substring(tpch.customer.c_phone, 1, 2), tpch.customer.c_acctbal
└─Selection_39 0.00 root not(26_aux_0)
└─HashLeftJoin_40 0.00 root left outer semi join, inner:TableReader_46, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)]
├─Selection_41 0.00 root in(substring(tpch.customer.c_phone, 1, 2), "20", "40", "22", "30", "39", "42", "21")
│ └─TableReader_44 0.00 root data:Selection_43
│ └─Selection_43 0.00 cop gt(tpch.customer.c_acctbal, NULL)
│ └─TableScan_42 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false
└─TableReader_46 75000000.00 root data:TableScan_45
└─TableScan_45 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
└─HashLeftJoin_39 0.00 root anti semi join, inner:TableReader_45, equal:[eq(tpch.customer.c_custkey, tpch.orders.o_custkey)]
├─Selection_40 0.00 root in(substring(tpch.customer.c_phone, 1, 2), "20", "40", "22", "30", "39", "42", "21")
│ └─TableReader_43 0.00 root data:Selection_42
│ └─Selection_42 0.00 cop gt(tpch.customer.c_acctbal, NULL)
│ └─TableScan_41 7500000.00 cop table:customer, range:[-inf,+inf], keep order:false
└─TableReader_45 75000000.00 root data:TableScan_44
└─TableScan_44 75000000.00 cop table:orders, range:[-inf,+inf], keep order:false
1 change: 1 addition & 0 deletions cmd/explaintest/t/explain_easy.test
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ explain select * from t where a = 1 and a = 1;
explain select * from t where a = 1 and a = 2;
explain select * from t where b = 1 and b = 2;
explain select * from t t1 join t t2 where t1.b = t2.b and t2.b is null;
explain select * from t t1 where not exists (select * from t t2 where t1.b = t2.b);

drop table if exists t;
create table t(a bigint primary key);
Expand Down
8 changes: 7 additions & 1 deletion parser/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -2348,7 +2348,13 @@ Expression:
}
| "NOT" Expression %prec not
{
$$ = &ast.UnaryOperationExpr{Op: opcode.Not, V: $2}
expr, ok := $2.(*ast.ExistsSubqueryExpr)
if ok {
expr.Not = true
$$ = $2
} else {
$$ = &ast.UnaryOperationExpr{Op: opcode.Not, V: $2}
}
}
| BoolPri IsOrNotOp trueKwd %prec is
{
Expand Down
18 changes: 18 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2425,3 +2425,21 @@ func (s *testParserSuite) TestTablePartition(c *C) {
createTable := stmt.(*ast.CreateTableStmt)
c.Assert(createTable.Partition.Definitions[0].Comment, Equals, "check")
}

func (s *testParserSuite) TestNotExistsSubquery(c *C) {
defer testleak.AfterTest(c)()
table := []testCase{
{`select * from t1 where not exists (select * from t2 where t1.a = t2.a)`, true},
}

parser := New()
for _, tt := range table {
stmt, err := parser.Parse(tt.src, "", "")
c.Assert(err, IsNil)

sel := stmt[0].(*ast.SelectStmt)
exists, ok := sel.Where.(*ast.ExistsSubqueryExpr)
c.Assert(ok, IsTrue)
c.Assert(exists.Not, Equals, tt.ok)
}
}
4 changes: 2 additions & 2 deletions planner/core/expression_rewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ func (er *expressionRewriter) handleExistSubquery(v *ast.ExistsSubqueryExpr) (as
}
np = er.popExistsSubPlan(np)
if len(np.extractCorrelatedCols()) > 0 {
er.p, er.err = er.b.buildSemiApply(er.p, np, nil, er.asScalar, false)
er.p, er.err = er.b.buildSemiApply(er.p, np, nil, er.asScalar, v.Not)
if er.err != nil || !er.asScalar {
return v, true
}
Expand All @@ -562,7 +562,7 @@ func (er *expressionRewriter) handleExistSubquery(v *ast.ExistsSubqueryExpr) (as
er.err = errors.Trace(err)
return v, true
}
if len(rows) > 0 {
if (len(rows) > 0 && !v.Not) || (len(rows) == 0 && v.Not) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does this check mean?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rows is the result of the subquery evaluated by EvalSubquery(), it directly set the result of exists subquery according to rows.

er.ctxStack = append(er.ctxStack, expression.One.Clone())
} else {
er.ctxStack = append(er.ctxStack, expression.Zero.Clone())
Expand Down