Skip to content

Commit

Permalink
ORCA: allow not enforce distribution key in 3-stage aggregate (#776)
Browse files Browse the repository at this point in the history
In the case where the group-by key is a distribution key, ORCA disables 3-stage aggregate by default.

Add a new GUC(optimizer_enable_use_distribution_in_dqa) to allow not enforce distribution key in 3-stage
aggregate in ORCA. which enables local deduplication before the 2-stage agg.
  • Loading branch information
jiaqizho authored Dec 18, 2024
1 parent 57a5320 commit 0deffcb
Show file tree
Hide file tree
Showing 9 changed files with 64 additions and 14 deletions.
6 changes: 5 additions & 1 deletion src/backend/gpopt/config/CConfigParamMapping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,11 @@ CConfigParamMapping::SConfigMappingElem CConfigParamMapping::m_elements[] = {
false, // m_negate_param
GPOS_WSZ_LIT(
"Explore a nested loop join even if a hash join is possible")},

{EopttraceEnableUseDistributionInDQA,
&optimizer_enable_use_distribution_in_dqa,
false, // m_negate_param
GPOS_WSZ_LIT(
"Enable use the distribution key in DQA")},
};

//---------------------------------------------------------------------------
Expand Down
17 changes: 11 additions & 6 deletions src/backend/gporca/libgpopt/src/operators/CPhysicalAgg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,17 @@ CPhysicalAgg::CPhysicalAgg(
ulDistrReqs = 2;
}

// Split DQA generates a 2-stage aggregate to handle the case where
// hash aggregate has a distinct agg func. Here we need to be careful
// not to prohibit distribution property enforcement.
m_should_enforce_distribution &= !(
isAggFromSplitDQA && aggStage == CLogicalGbAgg::EasTwoStageScalarDQA &&
colref_array->Size() > 0);
// Force enable distribution property in DQA
if (GPOS_FTRACE(EopttraceEnableUseDistributionInDQA)) {
m_should_enforce_distribution = false;
} else {
// Split DQA generates a 2-stage aggregate to handle the case where
// hash aggregate has a distinct agg func. Here we need to be careful
// not to prohibit distribution property enforcement.
m_should_enforce_distribution &= !(
isAggFromSplitDQA && aggStage == CLogicalGbAgg::EasTwoStageScalarDQA &&
colref_array->Size() > 0);
}

SetDistrRequests(ulDistrReqs);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,14 @@ enum EOptTraceFlag
// Use legacy (cdbhash) opfamilies for compatibility
EopttraceUseLegacyOpfamilies = 103039,

// enable NL Left Join plan alternatives where inner child is redistributed if possible
// Enable NL Left Join plan alternatives where inner child is redistributed if possible
EopttraceEnableRedistributeNLLOJInnerChild = 103040,

EopttraceForceComprehensiveJoinImplementation = 103041,

// Enable use the distribution key in DQA
EopttraceEnableUseDistributionInDQA = 103042,

///////////////////////////////////////////////////////
///////////////////// statistics flags ////////////////
//////////////////////////////////////////////////////
Expand Down
12 changes: 12 additions & 0 deletions src/backend/utils/misc/guc_gp.c
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ bool optimizer_enable_space_pruning;
bool optimizer_enable_associativity;
bool optimizer_enable_eageragg;
bool optimizer_enable_range_predicate_dpe;
bool optimizer_enable_use_distribution_in_dqa;

/* Analyze related GUCs for Optimizer */
bool optimizer_analyze_root_partition;
Expand Down Expand Up @@ -2952,6 +2953,17 @@ struct config_bool ConfigureNamesBool_gp[] =
NULL, NULL, NULL
},

{
{"optimizer_enable_use_distribution_in_dqa", PGC_USERSET, DEVELOPER_OPTIONS,
gettext_noop("Enable use the distribution key in DQA"),
NULL,
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE
},
&optimizer_enable_use_distribution_in_dqa,
false,
NULL, NULL, NULL
},

{
{"gp_pause_on_restore_point_replay", PGC_SIGHUP, DEVELOPER_OPTIONS,
gettext_noop("Pause recovery when a restore point is replayed."),
Expand Down
1 change: 1 addition & 0 deletions src/include/utils/guc.h
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ extern bool optimizer_cte_inlining;
extern bool optimizer_enable_space_pruning;
extern bool optimizer_enable_associativity;
extern bool optimizer_enable_range_predicate_dpe;
extern bool optimizer_enable_use_distribution_in_dqa;

/* Analyze related GUCs for Optimizer */
extern bool optimizer_analyze_root_partition;
Expand Down
1 change: 1 addition & 0 deletions src/include/utils/unsync_guc_name.h
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,7 @@
"optimizer_enable_partition_propagation",
"optimizer_enable_partition_selection",
"optimizer_enable_range_predicate_dpe",
"optimizer_enable_use_distribution_in_dqa",
"optimizer_enable_redistribute_nestloop_loj_inner_child",
"optimizer_enable_replicated_table",
"optimizer_enable_sort",
Expand Down
7 changes: 7 additions & 0 deletions src/test/regress/expected/gp_dqa.out
Original file line number Diff line number Diff line change
Expand Up @@ -2377,6 +2377,10 @@ select count(distinct a) from t_issue_659;
(1 row)

set gp_eager_distinct_dedup = on;
-- for ORCA
set optimizer_force_three_stage_scalar_dqa to on;
set optimizer_force_multistage_agg to on;
set optimizer_enable_use_distribution_in_dqa to on;
explain(costs off)
select count(distinct a) from t_issue_659;
QUERY PLAN
Expand All @@ -2397,4 +2401,7 @@ select count(distinct a) from t_issue_659;
(1 row)

reset gp_eager_distinct_dedup;
reset optimizer_force_three_stage_scalar_dqa;
reset optimizer_force_multistage_agg;
reset optimizer_enable_use_distribution_in_dqa;
drop table t_issue_659;
22 changes: 16 additions & 6 deletions src/test/regress/expected/gp_dqa_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -2524,16 +2524,23 @@ select count(distinct a) from t_issue_659;
(1 row)

set gp_eager_distinct_dedup = on;
-- for ORCA
set optimizer_force_three_stage_scalar_dqa to on;
set optimizer_force_multistage_agg to on;
set optimizer_enable_use_distribution_in_dqa to on;
explain(costs off)
select count(distinct a) from t_issue_659;
QUERY PLAN
------------------------------------------------
Finalize Aggregate
QUERY PLAN
-------------------------------------------------
Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Partial Aggregate
-> Seq Scan on t_issue_659
-> HashAggregate
Group Key: a
-> Streaming HashAggregate
Group Key: a
-> Seq Scan on t_issue_659
Optimizer: Pivotal Optimizer (GPORCA)
(5 rows)
(8 rows)

select count(distinct a) from t_issue_659;
count
Expand All @@ -2542,4 +2549,7 @@ select count(distinct a) from t_issue_659;
(1 row)

reset gp_eager_distinct_dedup;
reset optimizer_force_three_stage_scalar_dqa;
reset optimizer_force_multistage_agg;
reset optimizer_enable_use_distribution_in_dqa;
drop table t_issue_659;
7 changes: 7 additions & 0 deletions src/test/regress/sql/gp_dqa.sql
Original file line number Diff line number Diff line change
Expand Up @@ -419,8 +419,15 @@ explain(costs off)
select count(distinct a) from t_issue_659;
select count(distinct a) from t_issue_659;
set gp_eager_distinct_dedup = on;
-- for ORCA
set optimizer_force_three_stage_scalar_dqa to on;
set optimizer_force_multistage_agg to on;
set optimizer_enable_use_distribution_in_dqa to on;
explain(costs off)
select count(distinct a) from t_issue_659;
select count(distinct a) from t_issue_659;
reset gp_eager_distinct_dedup;
reset optimizer_force_three_stage_scalar_dqa;
reset optimizer_force_multistage_agg;
reset optimizer_enable_use_distribution_in_dqa;
drop table t_issue_659;

0 comments on commit 0deffcb

Please sign in to comment.