Skip to content

Commit

Permalink
Improve EXPLAIN output of Delim Joins and Delim Gets
Browse files Browse the repository at this point in the history
  • Loading branch information
kryonix committed Jul 15, 2024
1 parent e911104 commit 23ccd79
Show file tree
Hide file tree
Showing 16 changed files with 59 additions and 13 deletions.
12 changes: 9 additions & 3 deletions src/execution/operator/join/physical_delim_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ namespace duckdb {

PhysicalDelimJoin::PhysicalDelimJoin(PhysicalOperatorType type, vector<LogicalType> types,
unique_ptr<PhysicalOperator> original_join,
vector<const_reference<PhysicalOperator>> delim_scans, idx_t estimated_cardinality)
vector<const_reference<PhysicalOperator>> delim_scans, idx_t estimated_cardinality,
optional_idx delim_idx)
: PhysicalOperator(type, std::move(types), estimated_cardinality), join(std::move(original_join)),
delim_scans(std::move(delim_scans)) {
delim_scans(std::move(delim_scans)), delim_idx(delim_idx) {
D_ASSERT(type == PhysicalOperatorType::LEFT_DELIM_JOIN || type == PhysicalOperatorType::RIGHT_DELIM_JOIN);
}

Expand All @@ -23,7 +24,12 @@ vector<const_reference<PhysicalOperator>> PhysicalDelimJoin::GetChildren() const
}

string PhysicalDelimJoin::ParamsToString() const {
return join->ParamsToString();
string result = join->ParamsToString();
if (delim_idx.IsValid()) {
result += "\n[INFOSEPARATOR]\n";
result += StringUtil::Format("idx: %llu", delim_idx.GetIndex());
}
return result;
}

} // namespace duckdb
7 changes: 5 additions & 2 deletions src/execution/operator/join/physical_left_delim_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ namespace duckdb {

PhysicalLeftDelimJoin::PhysicalLeftDelimJoin(vector<LogicalType> types, unique_ptr<PhysicalOperator> original_join,
vector<const_reference<PhysicalOperator>> delim_scans,
idx_t estimated_cardinality)
idx_t estimated_cardinality, optional_idx delim_idx)
: PhysicalDelimJoin(PhysicalOperatorType::LEFT_DELIM_JOIN, std::move(types), std::move(original_join),
std::move(delim_scans), estimated_cardinality) {
std::move(delim_scans), estimated_cardinality, delim_idx) {
D_ASSERT(join->children.size() == 2);
// now for the original join
// we take its left child, this is the side that we will duplicate eliminate
Expand All @@ -24,6 +24,9 @@ PhysicalLeftDelimJoin::PhysicalLeftDelimJoin(vector<LogicalType> types, unique_p
// the actual chunk collection to scan will be created in the LeftDelimJoinGlobalState
auto cached_chunk_scan = make_uniq<PhysicalColumnDataScan>(
children[0]->GetTypes(), PhysicalOperatorType::COLUMN_DATA_SCAN, estimated_cardinality, nullptr);
if (delim_idx.IsValid()) {
cached_chunk_scan->cte_index = delim_idx.GetIndex();
}
join->children[0] = std::move(cached_chunk_scan);
}

Expand Down
4 changes: 2 additions & 2 deletions src/execution/operator/join/physical_right_delim_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ namespace duckdb {

PhysicalRightDelimJoin::PhysicalRightDelimJoin(vector<LogicalType> types, unique_ptr<PhysicalOperator> original_join,
vector<const_reference<PhysicalOperator>> delim_scans,
idx_t estimated_cardinality)
idx_t estimated_cardinality, optional_idx delim_idx)
: PhysicalDelimJoin(PhysicalOperatorType::RIGHT_DELIM_JOIN, std::move(types), std::move(original_join),
std::move(delim_scans), estimated_cardinality) {
std::move(delim_scans), estimated_cardinality, delim_idx) {
D_ASSERT(join->children.size() == 2);
// now for the original join
// we take its right child, this is the side that we will duplicate eliminate
Expand Down
6 changes: 6 additions & 0 deletions src/execution/operator/scan/physical_column_data_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline &current, MetaPipeline &met
string PhysicalColumnDataScan::ParamsToString() const {
string result = "";
switch (type) {
case PhysicalOperatorType::DELIM_SCAN:
if (delim_index.IsValid()) {
result += "\n[INFOSEPARATOR]\n";
result += StringUtil::Format("idx: %llu", delim_index.GetIndex());
}
break;
case PhysicalOperatorType::CTE_SCAN:
case PhysicalOperatorType::RECURSIVE_CTE_SCAN: {
result += "\n[INFOSEPARATOR]\n";
Expand Down
1 change: 1 addition & 0 deletions src/execution/physical_plan/plan_delim_get.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalDelimGet &
// create a PhysicalChunkScan without an owned_collection, the collection will be added later
auto chunk_scan = make_uniq<PhysicalColumnDataScan>(op.types, PhysicalOperatorType::DELIM_SCAN,
op.estimated_cardinality, nullptr);
chunk_scan->delim_index = op.delim_idx;
return std::move(chunk_scan);
}

Expand Down
8 changes: 5 additions & 3 deletions src/execution/physical_plan/plan_delim_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,16 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::PlanDelimJoin(LogicalCompari
// now create the duplicate eliminated join
unique_ptr<PhysicalDelimJoin> delim_join;
if (op.delim_flipped) {
delim_join =
make_uniq<PhysicalRightDelimJoin>(op.types, std::move(plan), delim_scans, op.estimated_cardinality);
delim_join = make_uniq<PhysicalRightDelimJoin>(op.types, std::move(plan), delim_scans, op.estimated_cardinality,
op.mark_index);
} else {
delim_join = make_uniq<PhysicalLeftDelimJoin>(op.types, std::move(plan), delim_scans, op.estimated_cardinality);
delim_join = make_uniq<PhysicalLeftDelimJoin>(op.types, std::move(plan), delim_scans, op.estimated_cardinality,
op.mark_index);
}
// we still have to create the DISTINCT clause that is used to generate the duplicate eliminated chunk
delim_join->distinct = make_uniq<PhysicalHashAggregate>(context, delim_types, std::move(distinct_expressions),
std::move(distinct_groups), op.estimated_cardinality);

return std::move(delim_join);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@ class PhysicalHashAggregate;
class PhysicalDelimJoin : public PhysicalOperator {
public:
PhysicalDelimJoin(PhysicalOperatorType type, vector<LogicalType> types, unique_ptr<PhysicalOperator> original_join,
vector<const_reference<PhysicalOperator>> delim_scans, idx_t estimated_cardinality);
vector<const_reference<PhysicalOperator>> delim_scans, idx_t estimated_cardinality,
optional_idx delim_idx);

unique_ptr<PhysicalOperator> join;
unique_ptr<PhysicalHashAggregate> distinct;
vector<const_reference<PhysicalOperator>> delim_scans;

optional_idx delim_idx;

public:
vector<const_reference<PhysicalOperator>> GetChildren() const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ class PhysicalLeftDelimJoin : public PhysicalDelimJoin {

public:
PhysicalLeftDelimJoin(vector<LogicalType> types, unique_ptr<PhysicalOperator> original_join,
vector<const_reference<PhysicalOperator>> delim_scans, idx_t estimated_cardinality);
vector<const_reference<PhysicalOperator>> delim_scans, idx_t estimated_cardinality,
optional_idx delim_idx);

public:
unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ class PhysicalRightDelimJoin : public PhysicalDelimJoin {

public:
PhysicalRightDelimJoin(vector<LogicalType> types, unique_ptr<PhysicalOperator> original_join,
vector<const_reference<PhysicalOperator>> delim_scans, idx_t estimated_cardinality);
vector<const_reference<PhysicalOperator>> delim_scans, idx_t estimated_cardinality,
optional_idx delim_idx);

public:
unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class PhysicalColumnDataScan : public PhysicalOperator {
optionally_owned_ptr<ColumnDataCollection> collection;

idx_t cte_index;
optional_idx delim_index;

public:
unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;
Expand Down
2 changes: 2 additions & 0 deletions src/include/duckdb/planner/operator/logical_delim_get.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class LogicalDelimGet : public LogicalOperator {
idx_t table_index;
//! The types of the chunk
vector<LogicalType> chunk_types;
//! Delim Join Index
optional_idx delim_idx;

public:
vector<ColumnBinding> GetColumnBindings() override {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ struct FlattenDependentJoins {
bool perform_delim;
bool any_join;

idx_t delim_root_idx;

private:
unique_ptr<LogicalOperator> PushDownDependentJoinInternal(unique_ptr<LogicalOperator> plan,
bool &parent_propagate_null_values, idx_t lateral_depth);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,12 @@
"id": 201,
"name": "chunk_types",
"type": "vector<LogicalType>"
},
{
"id": 202,
"name": "delim_idx",
"type": "optional_idx",
"default": "optional_idx()"
}
],
"constructor": ["table_index", "chunk_types"]
Expand Down
8 changes: 8 additions & 0 deletions src/planner/binder/query_node/plan_subquery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,14 +243,17 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
// the input value NULL will generate the value 42, and we need to join NULL on the LHS with NULL on the RHS
// the left side is the original plan
// this is the side that will be duplicate eliminated and pushed into the RHS
idx_t mark_index = binder.GenerateTableIndex();
auto delim_join =
CreateDuplicateEliminatedJoin(correlated_columns, JoinType::SINGLE, std::move(root), perform_delim);

delim_join->mark_index = mark_index;
// the right side initially is a DEPENDENT join between the duplicate eliminated scan and the subquery
// HOWEVER: we do not explicitly create the dependent join
// instead, we eliminate the dependent join by pushing it down into the right side of the plan
FlattenDependentJoins flatten(binder, correlated_columns, perform_delim);

flatten.delim_root_idx = mark_index;
// first we check which logical operators have correlated expressions in the first place
flatten.DetectCorrelatedExpressions(*plan);
// now we push the dependent join down
Expand All @@ -277,6 +280,7 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
delim_join->mark_index = mark_index;
// RHS
FlattenDependentJoins flatten(binder, correlated_columns, perform_delim, true);
flatten.delim_root_idx = mark_index;
flatten.DetectCorrelatedExpressions(*plan);
auto dependent_join = flatten.PushDownDependentJoin(std::move(plan));

Expand Down Expand Up @@ -305,6 +309,7 @@ static unique_ptr<Expression> PlanCorrelatedSubquery(Binder &binder, BoundSubque
delim_join->mark_index = mark_index;
// RHS
FlattenDependentJoins flatten(binder, correlated_columns, true, true);
flatten.delim_root_idx = mark_index;
flatten.DetectCorrelatedExpressions(*plan);
auto dependent_join = flatten.PushDownDependentJoin(std::move(plan));

Expand Down Expand Up @@ -422,9 +427,12 @@ unique_ptr<LogicalOperator> Binder::PlanLateralJoin(unique_ptr<LogicalOperator>
}

auto perform_delim = PerformDuplicateElimination(*this, correlated);
idx_t delim_idx = GenerateTableIndex();
auto delim_join = CreateDuplicateEliminatedJoin(correlated, join_type, std::move(left), perform_delim);
delim_join->mark_index = delim_idx;

FlattenDependentJoins flatten(*this, correlated, perform_delim);
flatten.delim_root_idx = delim_idx;

// first we check which logical operators have correlated expressions in the first place
flatten.DetectCorrelatedExpressions(*right, true);
Expand Down
2 changes: 2 additions & 0 deletions src/planner/subquery/flatten_dependent_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
delim_offset = left_columns;
data_offset = 0;
delim_scan = make_uniq<LogicalDelimGet>(delim_index, delim_types);
delim_scan->delim_idx = optional_idx(delim_root_idx);
if (plan->type == LogicalOperatorType::LOGICAL_PROJECTION) {
// we want to keep the logical projection for positionality.
exit_projection = true;
Expand Down Expand Up @@ -269,6 +270,7 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
}
auto left_index = binder.GenerateTableIndex();
delim_scan = make_uniq<LogicalDelimGet>(left_index, delim_types);
delim_scan->delim_idx = optional_idx(delim_root_idx);
join->children.push_back(std::move(delim_scan));
join->children.push_back(std::move(plan));
for (idx_t i = 0; i < new_group_count; i++) {
Expand Down
2 changes: 2 additions & 0 deletions src/storage/serialization/serialize_logical_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,12 +377,14 @@ void LogicalDelimGet::Serialize(Serializer &serializer) const {
LogicalOperator::Serialize(serializer);
serializer.WritePropertyWithDefault<idx_t>(200, "table_index", table_index);
serializer.WritePropertyWithDefault<vector<LogicalType>>(201, "chunk_types", chunk_types);
serializer.WritePropertyWithDefault<optional_idx>(202, "delim_idx", delim_idx, optional_idx());
}

unique_ptr<LogicalOperator> LogicalDelimGet::Deserialize(Deserializer &deserializer) {
auto table_index = deserializer.ReadPropertyWithDefault<idx_t>(200, "table_index");
auto chunk_types = deserializer.ReadPropertyWithDefault<vector<LogicalType>>(201, "chunk_types");
auto result = duckdb::unique_ptr<LogicalDelimGet>(new LogicalDelimGet(table_index, std::move(chunk_types)));
deserializer.ReadPropertyWithDefault<optional_idx>(202, "delim_idx", result->delim_idx, optional_idx());
return std::move(result);
}

Expand Down

0 comments on commit 23ccd79

Please sign in to comment.