Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX: re-enable the NL-index in ORCA and fix the Join2IndexApplyGeneric #807

Merged
merged 1 commit into from
Dec 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "gpopt/base/CDistributionSpecHashed.h"
#include "gpopt/base/CDistributionSpecNonSingleton.h"
#include "gpopt/base/CDistributionSpecReplicated.h"
#include "gpopt/exception.h"
#include "gpopt/operators/CExpressionHandle.h"
#include "gpopt/operators/CPredicateUtils.h"

Expand Down Expand Up @@ -119,13 +118,6 @@ CPhysicalInnerIndexNLJoin::Ped(CMemoryPool *mp, CExpressionHandle &exprhdl,
CEnfdDistribution::EDistributionMatching dmatch =
Edm(prppInput, child_index, pdrgpdpCtxt, ulDistrReq);

// FIXME: nestloop with inner index scan may produce wrong plan, see
// issue https://github.com/cloudberrydb/cloudberrydb/issues/567
// Fallback to postgres optimizer to avoid wrong plan. We should
// fix this issue and remove the following exception.
GPOS_RAISE(gpopt::ExmaGPOPT, gpopt::ExmiUnsupportedOp,
GPOS_WSZ_LIT("Fallback: InnerIndexNestLoopJoin may have wrong plan"));

if (1 == child_index)
{
// inner (index-scan side) is requested for Any distribution,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,14 @@ CXformJoin2IndexApplyGeneric::Transform(CXformContext *pxfctxt,
CLogicalDynamicGet::PopConvert(pexprCurrInnerChild->Pop());
ptabdescInner = popDynamicGet->Ptabdesc();
distributionCols = popDynamicGet->PcrsDist();
// issue https://github.com/apache/cloudberry/issues/567
// the DynamicGet also need check the group key contains the distributionCols
if (nullptr != groupingColsToCheck.Value() &&
!groupingColsToCheck->ContainsAll(distributionCols))
{
// the grouping columns are not a superset of the distribution columns
return;
}
pexprGet = pexprCurrInnerChild;
}
break;
Expand Down
44 changes: 18 additions & 26 deletions src/test/regress/expected/aggregates_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -1342,39 +1342,31 @@ explain (costs off) select a,c from t1 group by a,c,d;
explain (costs off) select *
from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z;
INFO: GPORCA failed to produce a plan, falling back to planner
DETAIL: Operator Fallback: InnerIndexNestLoopJoin may have wrong plan not supported
QUERY PLAN
------------------------------------------------------------
QUERY PLAN
-------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> HashAggregate
Group Key: t1.a, t1.b, t2.x, t2.y
-> Hash Join
Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
-> Seq Scan on t2
-> Hash
-> Seq Scan on t1
Optimizer: Postgres query optimizer
(9 rows)
-> Nested Loop
Join Filter: true
-> Seq Scan on t1
-> Index Scan using t2_pkey on t2
Index Cond: ((x = t1.a) AND (y = t1.b))
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)

-- Test case where t1 can be optimized but not t2
explain (costs off) select t1.*,t2.x,t2.z
from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y
group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
INFO: GPORCA failed to produce a plan, falling back to planner
DETAIL: Operator Fallback: InnerIndexNestLoopJoin may have wrong plan not supported
QUERY PLAN
------------------------------------------------------------
QUERY PLAN
-------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> HashAggregate
Group Key: t1.a, t1.b, t2.x, t2.z
-> Hash Join
Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
-> Seq Scan on t2
-> Hash
-> Seq Scan on t1
Optimizer: Postgres query optimizer
(9 rows)
-> Nested Loop
Join Filter: true
-> Seq Scan on t1
-> Index Scan using t2_pkey on t2
Index Cond: ((x = t1.a) AND (y = t1.b))
Optimizer: Pivotal Optimizer (GPORCA)
(7 rows)

-- Cannot optimize when PK is deferrable
explain (costs off) select * from t3 group by a,b,c;
Expand Down
208 changes: 90 additions & 118 deletions src/test/regress/expected/bfv_index_optimizer.out

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion src/test/regress/expected/bfv_partition_plans.out
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur
insert into mpp23195_t1 values (generate_series(1,19));
insert into mpp23195_t2 values (1);
-- TEST
-- Operator Fallback: InnerIndexNestLoopJoin may have wrong plan not supported
select find_operator('select * from mpp23195_t1,mpp23195_t2 where mpp23195_t1.i < mpp23195_t2.i;', 'Dynamic Index Scan');
find_operator
---------------
Expand Down
3 changes: 1 addition & 2 deletions src/test/regress/expected/bfv_partition_plans_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,10 @@ HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sur
insert into mpp23195_t1 values (generate_series(1,19));
insert into mpp23195_t2 values (1);
-- TEST
-- Operator Fallback: InnerIndexNestLoopJoin may have wrong plan not supported
select find_operator('select * from mpp23195_t1,mpp23195_t2 where mpp23195_t1.i < mpp23195_t2.i;', 'Dynamic Index Scan');
find_operator
---------------
['false']
['true']
(1 row)

select * from mpp23195_t1,mpp23195_t2 where mpp23195_t1.i < mpp23195_t2.i;
Expand Down
56 changes: 30 additions & 26 deletions src/test/regress/expected/co_nestloop_idxscan_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@ create index foo_id_idx on co_nestloop_idxscan.foo(id);
explain select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id;
QUERY PLAN
-------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=1.02..510.33 rows=6 width=8)
-> Hash Join (cost=1.02..510.25 rows=2 width=8)
Hash Cond: (f.id = b.id)
-> Seq Scan on foo f (cost=0.00..509.17 rows=17 width=8)
-> Hash (cost=1.01..1.01 rows=1 width=8)
-> Seq Scan on bar b (cost=0.00..1.01 rows=1 width=8)
Optimizer: Postgres query optimizer
(7 rows)
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..499.13 rows=1 width=8)
-> Nested Loop (cost=0.00..499.13 rows=1 width=8)
Join Filter: true
-> Seq Scan on bar (cost=0.00..431.00 rows=1 width=8)
-> Bitmap Heap Scan on foo (cost=0.00..68.13 rows=1 width=8)
Recheck Cond: (id = bar.id)
-> Bitmap Index Scan on foo_id_idx (cost=0.00..0.00 rows=0 width=0)
Index Cond: (id = bar.id)
Optimizer: Pivotal Optimizer (GPORCA) version 3.72.0
(9 rows)

select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id;
id
Expand All @@ -49,15 +51,16 @@ set enable_nestloop=on;
explain select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id;
QUERY PLAN
-------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=8.15..13.26 rows=6 width=8)
-> Nested Loop (cost=8.15..13.18 rows=2 width=8)
-> Seq Scan on bar b (cost=0.00..1.01 rows=1 width=8)
-> Bitmap Heap Scan on foo f (cost=8.15..12.16 rows=1 width=8)
Recheck Cond: (id = b.id)
-> Bitmap Index Scan on foo_id_idx (cost=0.00..8.15 rows=1 width=0)
Index Cond: (id = b.id)
Optimizer: Postgres query optimizer
(8 rows)
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..499.13 rows=1 width=8)
-> Nested Loop (cost=0.00..499.13 rows=1 width=8)
Join Filter: true
-> Seq Scan on bar (cost=0.00..431.00 rows=1 width=8)
-> Bitmap Heap Scan on foo (cost=0.00..68.13 rows=1 width=8)
Recheck Cond: (id = bar.id)
-> Bitmap Index Scan on foo_id_idx (cost=0.00..0.00 rows=0 width=0)
Index Cond: (id = bar.id)
Optimizer: Pivotal Optimizer (GPORCA) version 3.72.0
(9 rows)

select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id;
id
Expand All @@ -73,15 +76,16 @@ set enable_seqscan = off;
explain select f.id from co_nestloop_idxscan.bar b, co_nestloop_idxscan.foo f where f.id = b.id;
QUERY PLAN
-------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3) (cost=10000000008.15..10000000013.26 rows=6 width=8)
-> Nested Loop (cost=10000000008.15..10000000013.18 rows=2 width=8)
-> Seq Scan on bar b (cost=10000000000.00..10000000001.01 rows=1 width=8)
-> Bitmap Heap Scan on foo f (cost=8.15..12.16 rows=1 width=8)
Recheck Cond: (id = b.id)
-> Bitmap Index Scan on foo_id_idx (cost=0.00..8.15 rows=1 width=0)
Index Cond: (id = b.id)
Optimizer: Postgres query optimizer
(8 rows)
Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..499.13 rows=1 width=8)
-> Nested Loop (cost=0.00..499.13 rows=1 width=8)
Join Filter: true
-> Seq Scan on bar (cost=0.00..431.00 rows=1 width=8)
-> Bitmap Heap Scan on foo (cost=0.00..68.13 rows=1 width=8)
Recheck Cond: (id = bar.id)
-> Bitmap Index Scan on foo_id_idx (cost=0.00..0.00 rows=0 width=0)
Index Cond: (id = bar.id)
Optimizer: Pivotal Optimizer (GPORCA) version 3.72.0
(9 rows)

select f.id from co_nestloop_idxscan.foo f, co_nestloop_idxscan.bar b where f.id = b.id;
id
Expand Down
Loading
Loading