-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: project rel to and from substrait to include pass through columns #135
base: main
Are you sure you want to change the base?
Changes from all commits
f013167
c7499d9
46d4c8e
48f42ea
7053a04
c9d28f1
ff0cc26
eebaf3d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -162,7 +162,7 @@ | |
interval_t interval {}; | ||
interval.months = 0; | ||
interval.days = literal.interval_day_to_second().days(); | ||
interval.micros = literal.interval_day_to_second().microseconds(); | ||
Check warning on line 165 in src/from_substrait.cpp GitHub Actions / Build extension binaries / MacOS (osx_amd64, x86_64, x64-osx)
Check warning on line 165 in src/from_substrait.cpp GitHub Actions / Build extension binaries / MacOS (osx_amd64, x86_64, x64-osx)
Check warning on line 165 in src/from_substrait.cpp GitHub Actions / Build extension binaries / MacOS (osx_arm64, arm64, arm64-osx)
|
||
return Value::INTERVAL(interval); | ||
} | ||
default: | ||
|
@@ -492,22 +492,59 @@ | |
return make_shared_ptr<FilterRelation>(TransformOp(sfilter.input()), TransformExpr(sfilter.condition())); | ||
} | ||
|
||
const google::protobuf::RepeatedField<int32_t>& GetOutputMapping(const substrait::Rel &sop) { | ||
const substrait::RelCommon* common = nullptr; | ||
switch (sop.rel_type_case()) { | ||
case substrait::Rel::RelTypeCase::kJoin: | ||
common = &sop.join().common(); | ||
break; | ||
case substrait::Rel::RelTypeCase::kProject: | ||
common = &sop.project().common(); | ||
break; | ||
default: | ||
throw InternalException("Unsupported relation type " + to_string(sop.rel_type_case())); | ||
} | ||
if (!common->has_emit()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It may be useful to break this into code to get the common table and code to get the emit from the mapping. That way you can use .the common structure to update direct and emit if necessary. You might be able to use templates to get the RelCommon which could reduce the overall amount of code. |
||
static google::protobuf::RepeatedField<int32_t> empty_mapping; | ||
return empty_mapping; | ||
} | ||
return common->emit().output_mapping(); | ||
} | ||
|
||
shared_ptr<Relation> | ||
SubstraitToDuckDB::TransformProjectOp(const substrait::Rel &sop, | ||
const google::protobuf::RepeatedPtrField<std::string> *names) { | ||
vector<unique_ptr<ParsedExpression>> expressions; | ||
RootNameIterator iterator(names); | ||
|
||
for (auto &sexpr : sop.project().expressions()) { | ||
expressions.push_back(TransformExpr(sexpr, &iterator)); | ||
auto input_rel = TransformOp(sop.project().input()); | ||
|
||
auto mapping = GetOutputMapping(sop); | ||
auto num_input_columns = input_rel->Columns().size(); | ||
if (mapping.empty()) { | ||
for (int i = 1; i <= num_input_columns; i++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The first column is numbered at zero. I found https://substrait.io/tutorial/sql_to_substrait/#field-indices to be useful (in addition to the individual relation pages). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The index |
||
expressions.push_back(make_uniq<PositionalReferenceExpression>(i)); | ||
} | ||
|
||
for (auto &sexpr : sop.project().expressions()) { | ||
expressions.push_back(TransformExpr(sexpr, &iterator)); | ||
} | ||
} else { | ||
expressions.resize(mapping.size()); | ||
for (size_t i = 0; i < mapping.size(); i++) { | ||
if (mapping[i] < num_input_columns) { | ||
expressions[i] = make_uniq<PositionalReferenceExpression>(mapping[i] + 1); | ||
} else { | ||
expressions[i] = TransformExpr(sop.project().expressions(mapping[i] - num_input_columns), &iterator); | ||
} | ||
} | ||
} | ||
|
||
vector<string> mock_aliases; | ||
for (size_t i = 0; i < expressions.size(); i++) { | ||
mock_aliases.push_back("expr_" + to_string(i)); | ||
} | ||
return make_shared_ptr<ProjectionRelation>(TransformOp(sop.project().input()), std::move(expressions), | ||
std::move(mock_aliases)); | ||
return make_shared_ptr<ProjectionRelation>(input_rel, std::move(expressions), std::move(mock_aliases)); | ||
} | ||
|
||
shared_ptr<Relation> SubstraitToDuckDB::TransformAggregateOp(const substrait::Rel &sop) { | ||
|
@@ -515,7 +552,7 @@ | |
|
||
if (sop.aggregate().groupings_size() > 0) { | ||
for (auto &sgrp : sop.aggregate().groupings()) { | ||
for (auto &sgrpexpr : sgrp.grouping_expressions()) { | ||
Check warning on line 555 in src/from_substrait.cpp GitHub Actions / Build extension binaries / MacOS (osx_amd64, x86_64, x64-osx)
|
||
groups.push_back(TransformExpr(sgrpexpr)); | ||
expressions.push_back(TransformExpr(sgrpexpr)); | ||
} | ||
|
@@ -615,8 +652,8 @@ | |
scan = rel->Alias(name); | ||
} else if (sget.has_virtual_table()) { | ||
// We need to handle a virtual table as a LogicalExpressionGet | ||
if (!sget.virtual_table().values().empty()) { | ||
Check warning on line 655 in src/from_substrait.cpp GitHub Actions / Build extension binaries / MacOS (osx_amd64, x86_64, x64-osx)
|
||
auto literal_values = sget.virtual_table().values(); | ||
Check warning on line 656 in src/from_substrait.cpp GitHub Actions / Build extension binaries / MacOS (osx_amd64, x86_64, x64-osx)
|
||
vector<vector<Value>> expression_rows; | ||
for (auto &row : literal_values) { | ||
auto values = row.fields(); | ||
|
@@ -739,15 +776,25 @@ | |
} | ||
auto input = TransformOp(swrite.input()); | ||
switch (swrite.op()) { | ||
case substrait::WriteRel::WriteOp::WriteRel_WriteOp_WRITE_OP_CTAS: | ||
return input->CreateRel(schema_name, table_name); | ||
case substrait::WriteRel::WriteOp::WriteRel_WriteOp_WRITE_OP_CTAS: | ||
return input->CreateRel(schema_name, table_name); | ||
case substrait::WriteRel::WriteOp::WriteRel_WriteOp_WRITE_OP_INSERT: | ||
return input->InsertRel(schema_name, table_name); | ||
case substrait::WriteRel::WriteOp::WriteRel_WriteOp_WRITE_OP_DELETE: { | ||
auto filter = std::move(input.get()->Cast<FilterRelation>()); | ||
auto context = filter.child->Cast<TableRelation>().context; | ||
case substrait::WriteRel::WriteOp::WriteRel_WriteOp_WRITE_OP_DELETE: { | ||
switch (input->type) { | ||
case RelationType::PROJECTION_RELATION: { | ||
auto project = std::move(input.get()->Cast<ProjectionRelation>()); | ||
auto filter = std::move(project.child->Cast<FilterRelation>()); | ||
return make_shared_ptr<DeleteRelation>(filter.context, std::move(filter.condition), schema_name, table_name); | ||
} | ||
} | ||
case RelationType::FILTER_RELATION: { | ||
auto filter = std::move(input.get()->Cast<FilterRelation>()); | ||
return make_shared_ptr<DeleteRelation>(filter.context, std::move(filter.condition), schema_name, table_name); | ||
} | ||
default: | ||
throw NotImplementedException("Unsupported relation type for delete operation"); | ||
} | ||
} | ||
default: | ||
throw NotImplementedException("Unsupported write operation " + to_string(swrite.op())); | ||
} | ||
|
@@ -822,6 +869,9 @@ | |
if (first_projection_or_table) { | ||
vector<ColumnDefinition> *column_definitions = &first_projection_or_table->Cast<ProjectionRelation>().columns; | ||
int32_t i = 0; | ||
if (column_definitions->size() > column_names.size()) { | ||
throw InvalidInputException("Number of column names less than number of column definitions"); | ||
} | ||
for (auto &column : *column_definitions) { | ||
aliases.push_back(column_names[i++]); | ||
auto column_type = column.GetType(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Almost every relation should be in this list. I'd consider calling anything missing not yet implemented.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah..! Thanks for pointing out, I missed this.
This also reminds me that I could avoid a project by using output mapping, whenever I only have to change the column order of a give relation.