From f013167e86dcead18b9255f4db6ada963f8b6456 Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Wed, 8 Jan 2025 20:24:24 +0530 Subject: [PATCH 1/8] fix: project rel to and from substrait to include pass through columns --- test/c/CMakeLists.txt | 2 +- test/c/test_substrait_c_api.cpp | 47 +--------------------------- test/c/test_substrait_c_utils.cpp | 52 +++++++++++++++++++++++++++++++ test/c/test_substrait_c_utils.hpp | 15 +++++++++ 4 files changed, 69 insertions(+), 47 deletions(-) create mode 100644 test/c/test_substrait_c_utils.cpp create mode 100644 test/c/test_substrait_c_utils.hpp diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 7a01a17..1c81985 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -12,7 +12,7 @@ include_directories(../../duckdb/src/include) include_directories(../../duckdb/test/include) include_directories(../../duckdb/third_party/catch) -set(ALL_SOURCES test_substrait_c_api.cpp) +set(ALL_SOURCES test_substrait_c_api.cpp test_substrait_c_utils.cpp) add_library_unity(test_substrait OBJECT ${ALL_SOURCES}) diff --git a/test/c/test_substrait_c_api.cpp b/test/c/test_substrait_c_api.cpp index 27ab432..5f61caa 100644 --- a/test/c/test_substrait_c_api.cpp +++ b/test/c/test_substrait_c_api.cpp @@ -1,6 +1,7 @@ #include "catch.hpp" #include "test_helpers.hpp" #include "duckdb/main/connection_manager.hpp" +#include "test_substrait_c_utils.hpp" #include #include @@ -47,52 +48,6 @@ TEST_CASE("Test C Get and To Json-Substrait API", "[substrait-api]") { REQUIRE_THROWS(con.FromSubstraitJSON("this is not valid")); } -duckdb::unique_ptr ExecuteViaSubstrait(Connection &con, const string &sql) { - auto proto = con.GetSubstrait(sql); - return con.FromSubstrait(proto); -} - -duckdb::unique_ptr ExecuteViaSubstraitJSON(Connection &con, const string &sql) { - auto json_str = con.GetSubstraitJSON(sql); - return con.FromSubstraitJSON(json_str); -} - -void CreateEmployeeTable(Connection& con) { - REQUIRE_NO_FAIL(con.Query("CREATE TABLE employees (" - "employee_id INTEGER PRIMARY KEY, " - "name VARCHAR(100), " - "department_id INTEGER, " - "salary DECIMAL(10, 2))")); - - REQUIRE_NO_FAIL(con.Query("INSERT INTO employees VALUES " - "(1, 'John Doe', 1, 120000), " - "(2, 'Jane Smith', 2, 80000), " - "(3, 'Alice Johnson', 1, 50000), " - "(4, 'Bob Brown', 3, 95000), " - "(5, 'Charlie Black', 2, 60000)")); -} - -void CreatePartTimeEmployeeTable(Connection& con) { - REQUIRE_NO_FAIL(con.Query("CREATE TABLE part_time_employees (" - "id INTEGER PRIMARY KEY, " - "name VARCHAR(100), " - "department_id INTEGER, " - "hourly_rate DECIMAL(10, 2))")); - - REQUIRE_NO_FAIL(con.Query("INSERT INTO part_time_employees VALUES " - "(6, 'David White', 1, 30000), " - "(7, 'Eve Green', 2, 40000)")); -} - -void CreateDepartmentsTable(Connection& con) { - REQUIRE_NO_FAIL(con.Query("CREATE TABLE departments (department_id INTEGER PRIMARY KEY, department_name VARCHAR(100))")); - - REQUIRE_NO_FAIL(con.Query("INSERT INTO departments VALUES " - "(1, 'HR'), " - "(2, 'Engineering'), " - "(3, 'Finance')")); -} - TEST_CASE("Test C CTAS Select columns with Substrait API", "[substrait-api]") { DuckDB db(nullptr); Connection con(db); diff --git a/test/c/test_substrait_c_utils.cpp b/test/c/test_substrait_c_utils.cpp new file mode 100644 index 0000000..619d2bf --- /dev/null +++ b/test/c/test_substrait_c_utils.cpp @@ -0,0 +1,52 @@ +#include "test_helpers.hpp" +#include "test_substrait_c_utils.hpp" + +using namespace duckdb; +using namespace std; + + +duckdb::unique_ptr ExecuteViaSubstrait(Connection &con, const string &sql) { + auto proto = con.GetSubstrait(sql); + return con.FromSubstrait(proto); +} + +duckdb::unique_ptr ExecuteViaSubstraitJSON(Connection &con, const string &sql) { + auto json_str = con.GetSubstraitJSON(sql); + return con.FromSubstraitJSON(json_str); +} + +void CreateEmployeeTable(Connection &con) { + REQUIRE_NO_FAIL(con.Query("CREATE TABLE employees (" + "employee_id INTEGER PRIMARY KEY, " + "name VARCHAR(100), " + "department_id INTEGER, " + "salary DECIMAL(10, 2))")); + + REQUIRE_NO_FAIL(con.Query("INSERT INTO employees VALUES " + "(1, 'John Doe', 1, 120000), " + "(2, 'Jane Smith', 2, 80000), " + "(3, 'Alice Johnson', 1, 50000), " + "(4, 'Bob Brown', 3, 95000), " + "(5, 'Charlie Black', 2, 60000)")); +} + +void CreatePartTimeEmployeeTable(Connection& con) { + REQUIRE_NO_FAIL(con.Query("CREATE TABLE part_time_employees (" + "id INTEGER PRIMARY KEY, " + "name VARCHAR(100), " + "department_id INTEGER, " + "hourly_rate DECIMAL(10, 2))")); + + REQUIRE_NO_FAIL(con.Query("INSERT INTO part_time_employees VALUES " + "(6, 'David White', 1, 30000), " + "(7, 'Eve Green', 2, 40000)")); +} + +void CreateDepartmentsTable(Connection& con) { + REQUIRE_NO_FAIL(con.Query("CREATE TABLE departments (department_id INTEGER PRIMARY KEY, department_name VARCHAR(100))")); + + REQUIRE_NO_FAIL(con.Query("INSERT INTO departments VALUES " + "(1, 'HR'), " + "(2, 'Engineering'), " + "(3, 'Finance')")); +} diff --git a/test/c/test_substrait_c_utils.hpp b/test/c/test_substrait_c_utils.hpp new file mode 100644 index 0000000..52665f4 --- /dev/null +++ b/test/c/test_substrait_c_utils.hpp @@ -0,0 +1,15 @@ +#ifndef TEST_SUBSTRAIT_C_UTILS_HPP +#define TEST_SUBSTRAIT_C_UTILS_HPP + +#include "duckdb.hpp" +#include "duckdb/main/connection_manager.hpp" + +using namespace duckdb; +void CreateEmployeeTable(Connection& con); +void CreatePartTimeEmployeeTable(Connection& con); +void CreateDepartmentsTable(Connection& con); + +duckdb::unique_ptr ExecuteViaSubstraitJSON(Connection &con, const std::string &query); +duckdb::unique_ptr ExecuteViaSubstrait(Connection &con, const std::string &query); + +#endif From c7499d90b6712e7a6c85734f2aa95fad1855c160 Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Wed, 8 Jan 2025 21:38:31 +0530 Subject: [PATCH 2/8] project rel to and from substrait to include pass through columns --- src/from_substrait.cpp | 45 ++++++++++-- src/include/to_substrait.hpp | 4 +- src/to_substrait.cpp | 66 ++++++++++++++++- test/c/CMakeLists.txt | 2 +- test/c/test_projection.cpp | 136 +++++++++++++++++++++++++++++++++++ 5 files changed, 246 insertions(+), 7 deletions(-) create mode 100644 test/c/test_projection.cpp diff --git a/src/from_substrait.cpp b/src/from_substrait.cpp index f995233..356f88b 100644 --- a/src/from_substrait.cpp +++ b/src/from_substrait.cpp @@ -492,22 +492,59 @@ shared_ptr SubstraitToDuckDB::TransformFilterOp(const substrait::Rel & return make_shared_ptr(TransformOp(sfilter.input()), TransformExpr(sfilter.condition())); } +const google::protobuf::RepeatedField& GetOutputMapping(const substrait::Rel &sop) { + const substrait::RelCommon* common = nullptr; + switch (sop.rel_type_case()) { + case substrait::Rel::RelTypeCase::kJoin: + common = &sop.join().common(); + break; + case substrait::Rel::RelTypeCase::kProject: + common = &sop.project().common(); + break; + default: + throw InternalException("Unsupported relation type " + to_string(sop.rel_type_case())); + } + if (!common->has_emit()) { + static google::protobuf::RepeatedField empty_mapping; + return empty_mapping; + } + return common->emit().output_mapping(); +} + shared_ptr SubstraitToDuckDB::TransformProjectOp(const substrait::Rel &sop, const google::protobuf::RepeatedPtrField *names) { vector> expressions; RootNameIterator iterator(names); - for (auto &sexpr : sop.project().expressions()) { - expressions.push_back(TransformExpr(sexpr, &iterator)); + auto input_rel = TransformOp(sop.project().input()); + + auto mapping = GetOutputMapping(sop); + auto num_input_columns = input_rel->Columns().size(); + if (mapping.empty()) { + for (int i = 1; i <= num_input_columns; i++) { + expressions.push_back(make_uniq(i)); + } + + for (auto &sexpr : sop.project().expressions()) { + expressions.push_back(TransformExpr(sexpr, &iterator)); + } + } else { + expressions.resize(mapping.size()); + for (size_t i = 0; i < mapping.size(); i++) { + if (mapping[i] < num_input_columns) { + expressions[i] = make_uniq(mapping[i] + 1); + } else { + expressions[i] = TransformExpr(sop.project().expressions(mapping[i] - num_input_columns), &iterator); + } + } } vector mock_aliases; for (size_t i = 0; i < expressions.size(); i++) { mock_aliases.push_back("expr_" + to_string(i)); } - return make_shared_ptr(TransformOp(sop.project().input()), std::move(expressions), - std::move(mock_aliases)); + return make_shared_ptr(input_rel, std::move(expressions), std::move(mock_aliases)); } shared_ptr SubstraitToDuckDB::TransformAggregateOp(const substrait::Rel &sop) { diff --git a/src/include/to_substrait.hpp b/src/include/to_substrait.hpp index 7466395..453f688 100644 --- a/src/include/to_substrait.hpp +++ b/src/include/to_substrait.hpp @@ -73,7 +73,9 @@ class DuckDBToSubstrait { substrait::Rel *TransformInsertTable(LogicalOperator &dop); substrait::Rel *TransformDeleteTable(LogicalOperator &dop); static substrait::Rel *TransformDummyScan(); - //! Methods to transform different LogicalGet Types (e.g., Table, Parquet) + static substrait::RelCommon *CreateOutputMapping(vector vector); + //! Methods to transform different LogicalGe:75 + //t Types (e.g., Table, Parquet) //! To Substrait; void TransformTableScanToSubstrait(LogicalGet &dget, substrait::ReadRel *sget) const; void TransformParquetScanToSubstrait(LogicalGet &dget, substrait::ReadRel *sget, BindInfo &bind_info, diff --git a/src/to_substrait.cpp b/src/to_substrait.cpp index b25a7db..13c8f0c 100644 --- a/src/to_substrait.cpp +++ b/src/to_substrait.cpp @@ -856,14 +856,71 @@ substrait::Rel *DuckDBToSubstrait::TransformFilter(LogicalOperator &dop) { return res; } +substrait::RelCommon *DuckDBToSubstrait::CreateOutputMapping(vector vector) { + auto rel_common = new substrait::RelCommon(); + auto output_mapping = rel_common->mutable_emit()->mutable_output_mapping(); + for (auto &col_idx : vector) { + output_mapping->Add(col_idx); + } + return rel_common; +} + substrait::Rel *DuckDBToSubstrait::TransformProjection(LogicalOperator &dop) { auto res = new substrait::Rel(); auto &dproj = dop.Cast(); + + auto child_column_count = dop.children[0]->types.size(); + auto num_passthrough_columns = 0; + auto need_output_mapping = true; + if (child_column_count <= dproj.expressions.size()) { + // check if the projection is just pass through of input columns with no reordering + auto exp_col_idx = 0; + auto is_passthrough = true; + for (auto &dexpr : dproj.expressions) { + if (dexpr->type != ExpressionType::BOUND_REF) { + is_passthrough = false; + break; + } + num_passthrough_columns++; + auto &dref = dexpr.get()->Cast(); + if (dref.index != exp_col_idx) { + is_passthrough = false; + break; + } + exp_col_idx++; + } + if (is_passthrough && child_column_count == exp_col_idx) { + // skip the projection + return TransformOp(*dop.children[0]); + } + if (child_column_count == exp_col_idx) { + // all input columns are projected, no need for output mapping + num_passthrough_columns = child_column_count; + need_output_mapping = false; + } + } + auto sproj = res->mutable_project(); sproj->set_allocated_input(TransformOp(*dop.children[0])); + auto t_index = 0; + vector output_mapping; for (auto &dexpr : dproj.expressions) { - TransformExpr(*dexpr, *sproj->add_expressions()); + switch (dexpr->type) { + case ExpressionType::BOUND_REF: { + auto &dref = dexpr.get()->Cast(); + output_mapping.push_back(dref.index); + break; + } + default: + TransformExpr(*dexpr.get(), *sproj->add_expressions()); + output_mapping.push_back(child_column_count + t_index); + t_index++; + } + } + if (need_output_mapping) { + auto rel_common = CreateOutputMapping(output_mapping); + sproj->set_allocated_common(rel_common); } return res; } @@ -998,6 +1055,13 @@ substrait::Rel *DuckDBToSubstrait::TransformComparisonJoin(LogicalOperator &dop) } } + auto child_column_count = dop.children[0]->types.size() + dop.children[1]->types.size(); + vector output_mapping; + for (idx_t i = 0; i < projection->expressions_size(); i++) { + output_mapping.push_back(child_column_count + i); + } + auto rel_common = CreateOutputMapping(output_mapping); + projection->set_allocated_common(rel_common); projection->set_allocated_input(res); return proj_rel; } diff --git a/test/c/CMakeLists.txt b/test/c/CMakeLists.txt index 1c81985..a230c7f 100644 --- a/test/c/CMakeLists.txt +++ b/test/c/CMakeLists.txt @@ -12,7 +12,7 @@ include_directories(../../duckdb/src/include) include_directories(../../duckdb/test/include) include_directories(../../duckdb/third_party/catch) -set(ALL_SOURCES test_substrait_c_api.cpp test_substrait_c_utils.cpp) +set(ALL_SOURCES test_substrait_c_api.cpp test_substrait_c_utils.cpp test_projection.cpp) add_library_unity(test_substrait OBJECT ${ALL_SOURCES}) diff --git a/test/c/test_projection.cpp b/test/c/test_projection.cpp new file mode 100644 index 0000000..391cb04 --- /dev/null +++ b/test/c/test_projection.cpp @@ -0,0 +1,136 @@ +#include "catch.hpp" +#include "test_helpers.hpp" +#include "duckdb/main/connection_manager.hpp" +#include "test_substrait_c_utils.hpp" + +#include +#include +#include + +using namespace duckdb; +using namespace std; + +TEST_CASE("Test C Project input columns with Substrait API", "[substrait-api]") { + DuckDB db(nullptr); + Connection con(db); + + REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER)")); + REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (10), (20), (30)")); + CreateEmployeeTable(con); + + auto expected_json_str = R"({"relations":[{"root":{"input":{"read":{"baseSchema":{"names":["i"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"namedTable":{"names":["integers"]}}},"names":["i"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}})"; + auto json_str = con.GetSubstraitJSON("SELECT i FROM integers"); + REQUIRE(json_str == expected_json_str); + auto result = con.FromSubstraitJSON(json_str); + REQUIRE(CHECK_COLUMN(result, 0, {10, 20, 30})); +} + +TEST_CASE("Test C Project 1 input column 1 transformation with Substrait API", "[substrait-api]") { + DuckDB db(nullptr); + Connection con(db); + + REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER)")); + REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (10), (20), (30)")); + CreateEmployeeTable(con); + + auto expected_json_str = R"({"extensionUris":[{"extensionUriAnchor":1,"uri":"https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml"}],"extensions":[{"extensionFunction":{"extensionUriReference":1,"functionAnchor":1,"name":"multiply:i32_i32"}}],"relations":[{"root":{"input":{"project":{"input":{"read":{"baseSchema":{"names":["i"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"namedTable":{"names":["integers"]}}},"expressions":[{"scalarFunction":{"functionReference":1,"outputType":{"i32":{"nullability":"NULLABILITY_NULLABLE"}},"arguments":[{"value":{"selection":{"directReference":{"structField":{}},"rootReference":{}}}},{"value":{"selection":{"directReference":{"structField":{}},"rootReference":{}}}}]}}]}},"names":["i","isquare"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}})"; + auto json_str = con.GetSubstraitJSON("SELECT i, i *i as isquare FROM integers"); + REQUIRE(json_str == expected_json_str); + auto result = con.FromSubstraitJSON(json_str); + REQUIRE(CHECK_COLUMN(result, 0, {10, 20, 30})); + REQUIRE(CHECK_COLUMN(result, 1, {100, 400, 900})); +} + +TEST_CASE("Test C Project all columns with Substrait API", "[substrait-api]") { + DuckDB db(nullptr); + Connection con(db); + + CreateEmployeeTable(con); + + // This should not have a ProjectRel node + auto json_str = con.GetSubstraitJSON("SELECT * FROM employees"); + auto expected_json_str = R"({"relations":[{"root":{"input":{"read":{"baseSchema":{"names":["employee_id","name","department_id","salary"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_NULLABLE"}},{"i32":{"nullability":"NULLABILITY_NULLABLE"}},{"decimal":{"scale":2,"precision":10,"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{},{"field":1},{"field":2},{"field":3}]},"maintainSingularStruct":true},"namedTable":{"names":["employees"]}}},"names":["employee_id","name","department_id","salary"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}})"; + REQUIRE(json_str == expected_json_str); + auto result = con.FromSubstraitJSON(json_str); + REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3, 4, 5})); + REQUIRE(CHECK_COLUMN(result, 1, {"John Doe", "Jane Smith", "Alice Johnson", "Bob Brown", "Charlie Black"})); + REQUIRE(CHECK_COLUMN(result, 2, {1, 2, 1, 3, 2})); + REQUIRE(CHECK_COLUMN(result, 3, {120000, 80000, 50000, 95000, 60000})); +} + +TEST_CASE("Test C Project two passthrough columns with Substrait API", "[substrait-api]") { + DuckDB db(nullptr); + Connection con(db); + + CreateEmployeeTable(con); + + // This should not have a ProjectRel node + auto json_str = con.GetSubstraitJSON("SELECT name, salary FROM employees"); + auto expected_json_str = R"({"relations":[{"root":{"input":{"read":{"baseSchema":{"names":["employee_id","name","department_id","salary"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_NULLABLE"}},{"i32":{"nullability":"NULLABILITY_NULLABLE"}},{"decimal":{"scale":2,"precision":10,"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{"field":1},{"field":3}]},"maintainSingularStruct":true},"namedTable":{"names":["employees"]}}},"names":["name","salary"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}})"; + REQUIRE(json_str == expected_json_str); + auto result = con.FromSubstraitJSON(json_str); + REQUIRE(CHECK_COLUMN(result, 0, {"John Doe", "Jane Smith", "Alice Johnson", "Bob Brown", "Charlie Black"})); + REQUIRE(CHECK_COLUMN(result, 1, {120000, 80000, 50000, 95000, 60000})); +} + +TEST_CASE("Test C Project two passthrough columns with filter", "[substrait-api]") { + DuckDB db(nullptr); + Connection con(db); + + CreateEmployeeTable(con); + + // This should not have a ProjectRel node + auto json_str = con.GetSubstraitJSON("SELECT name, salary FROM employees where department_id = 1"); + auto expected_json_str = R"({"extensionUris":[{"extensionUriAnchor":1,"uri":"https://github.com/substrait-io/substrait/blob/main/extensions/"}],"extensions":[{"extensionFunction":{"extensionUriReference":1,"functionAnchor":1,"name":"equal:i32_i32"}}],"relations":[{"root":{"input":{"read":{"baseSchema":{"names":["employee_id","name","department_id","salary"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_NULLABLE"}},{"i32":{"nullability":"NULLABILITY_NULLABLE"}},{"decimal":{"scale":2,"precision":10,"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"filter":{"scalarFunction":{"functionReference":1,"outputType":{"bool":{"nullability":"NULLABILITY_NULLABLE"}},"arguments":[{"value":{"selection":{"directReference":{"structField":{"field":2}},"rootReference":{}}}},{"value":{"literal":{"i32":1}}}]}},"projection":{"select":{"structItems":[{"field":1},{"field":3}]},"maintainSingularStruct":true},"namedTable":{"names":["employees"]}}},"names":["name","salary"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}})"; + REQUIRE(json_str == expected_json_str); + auto result = con.FromSubstraitJSON(json_str); + REQUIRE(CHECK_COLUMN(result, 0, {"John Doe", "Alice Johnson" })); + REQUIRE(CHECK_COLUMN(result, 1, {120000, 50000 })); +} + +TEST_CASE("Test C Project 1 passthrough column, 1 transformation with column elimination", "[substrait-api]") { + DuckDB db(nullptr); + Connection con(db); + + CreateEmployeeTable(con); + + auto json_str = con.GetSubstraitJSON("SELECT name, salary * 1.2 as new_salary FROM employees"); + auto expected_json_str = R"({"extensionUris":[{"extensionUriAnchor":1,"uri":"https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic_decimal.yaml"}],"extensions":[{"extensionFunction":{"extensionUriReference":1,"functionAnchor":1,"name":"multiply:decimal_decimal"}}],"relations":[{"root":{"input":{"project":{"common":{"emit":{"outputMapping":[0,2]}},"input":{"read":{"baseSchema":{"names":["employee_id","name","department_id","salary"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_NULLABLE"}},{"i32":{"nullability":"NULLABILITY_NULLABLE"}},{"decimal":{"scale":2,"precision":10,"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{"field":1},{"field":3}]},"maintainSingularStruct":true},"namedTable":{"names":["employees"]}}},"expressions":[{"scalarFunction":{"functionReference":1,"outputType":{"decimal":{"scale":3,"precision":12,"nullability":"NULLABILITY_NULLABLE"}},"arguments":[{"value":{"selection":{"directReference":{"structField":{"field":1}},"rootReference":{}}}},{"value":{"literal":{"decimal":{"value":"DAAAAAAAAAAAAAAAAAAAAA==","precision":12,"scale":1}}}}]}}]}},"names":["name","new_salary"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}})"; + REQUIRE(json_str == expected_json_str); + auto result = con.FromSubstraitJSON(json_str); + REQUIRE(CHECK_COLUMN(result, 0, {"John Doe", "Jane Smith", "Alice Johnson", "Bob Brown", "Charlie Black"})); + REQUIRE(CHECK_COLUMN(result, 1, {144000, 96000, 60000, 114000, 72000})); +} + +TEST_CASE("Test C Project 1 passthrough column and 1 aggregate transformation", "[substrait-api]") { + DuckDB db(nullptr); + Connection con(db); + + CreateEmployeeTable(con); + + auto json_str = con.GetSubstraitJSON("SELECT department_id, AVG(salary) AS avg_salary FROM employees GROUP BY department_id"); + auto expected_json_str = R"({"extensionUris":[{"extensionUriAnchor":1,"uri":"https://github.com/substrait-io/substrait/blob/main/extensions/"}],"extensions":[{"extensionFunction":{"extensionUriReference":1,"functionAnchor":1,"name":"avg:decimal"}}],"relations":[{"root":{"input":{"aggregate":{"input":{"read":{"baseSchema":{"names":["employee_id","name","department_id","salary"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_NULLABLE"}},{"i32":{"nullability":"NULLABILITY_NULLABLE"}},{"decimal":{"scale":2,"precision":10,"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{"field":2},{"field":3}]},"maintainSingularStruct":true},"namedTable":{"names":["employees"]}}},"groupings":[{"groupingExpressions":[{"selection":{"directReference":{"structField":{}},"rootReference":{}}}]}],"measures":[{"measure":{"functionReference":1,"outputType":{"fp64":{"nullability":"NULLABILITY_NULLABLE"}},"arguments":[{"value":{"selection":{"directReference":{"structField":{"field":1}},"rootReference":{}}}}]}}]}},"names":["department_id","avg_salary"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}})"; + REQUIRE(json_str == expected_json_str); + auto result = con.FromSubstraitJSON(json_str); + REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); + REQUIRE(CHECK_COLUMN(result, 1, {85000, 70000, 95000})); +} + +TEST_CASE("Test C Project on Join with Substrait API", "[substrait-api]") { + DuckDB db(nullptr); + Connection con(db); + + CreateEmployeeTable(con); + CreateDepartmentsTable(con); + + auto result = ExecuteViaSubstraitJSON(con, + "SELECT e.employee_id, e.name, d.department_name " + "FROM employees e " + "JOIN departments d " + "ON e.department_id = d.department_id" + ); + + REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3, 4, 5})); + REQUIRE(CHECK_COLUMN(result, 1, {"John Doe", "Jane Smith", "Alice Johnson", "Bob Brown", "Charlie Black"})); + REQUIRE(CHECK_COLUMN(result, 2, {"HR", "Engineering", "HR", "Finance", "Engineering"})); +} From 46d4c8ea3ff2133fc6f3087c82f909dad634b582 Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Thu, 9 Jan 2025 13:13:05 +0530 Subject: [PATCH 3/8] Handle project to not have zero expressions. Fix python tests. --- src/from_substrait.cpp | 3 +++ src/to_substrait.cpp | 4 ++++ test/c/test_projection.cpp | 23 +++++++++++++++++++++++ test/python/test_substrait_from_json.py | 4 ++-- test/python/test_substrait_json.py | 5 +++-- 5 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/from_substrait.cpp b/src/from_substrait.cpp index 356f88b..67bce71 100644 --- a/src/from_substrait.cpp +++ b/src/from_substrait.cpp @@ -859,6 +859,9 @@ shared_ptr SubstraitToDuckDB::TransformRootOp(const substrait::RelRoot if (first_projection_or_table) { vector *column_definitions = &first_projection_or_table->Cast().columns; int32_t i = 0; + if (column_definitions->size() != column_names.size()) { + throw InvalidInputException("Number of column names and column definitions do not match"); + } for (auto &column : *column_definitions) { aliases.push_back(column_names[i++]); auto column_type = column.GetType(); diff --git a/src/to_substrait.cpp b/src/to_substrait.cpp index 13c8f0c..fffd7f3 100644 --- a/src/to_substrait.cpp +++ b/src/to_substrait.cpp @@ -919,6 +919,10 @@ substrait::Rel *DuckDBToSubstrait::TransformProjection(LogicalOperator &dop) { } } if (need_output_mapping) { + if (sproj->expressions_size() == 0) { + // atleast one expression should be there, add zeroth column as dummy expression + CreateFieldRef(sproj->add_expressions(), 0); + } auto rel_common = CreateOutputMapping(output_mapping); sproj->set_allocated_common(rel_common); } diff --git a/test/c/test_projection.cpp b/test/c/test_projection.cpp index 391cb04..04a0b7b 100644 --- a/test/c/test_projection.cpp +++ b/test/c/test_projection.cpp @@ -134,3 +134,26 @@ TEST_CASE("Test C Project on Join with Substrait API", "[substrait-api]") { REQUIRE(CHECK_COLUMN(result, 1, {"John Doe", "Jane Smith", "Alice Johnson", "Bob Brown", "Charlie Black"})); REQUIRE(CHECK_COLUMN(result, 2, {"HR", "Engineering", "HR", "Finance", "Engineering"})); } + +TEST_CASE("Test Project with bad plan", "[substrait-api]") { + DuckDB db(nullptr); + Connection con(db); + con.EnableQueryVerification(); + REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER)")); + REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (1), (2), (3), (NULL)")); + + auto query_json = R"({"relations":[{"root":{"input":{"project":{"input":{"fetch":{"input":{"read":{"baseSchema":{"names":["i"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"namedTable":{"names":["integers"]}}},"count":"5"}},"expressions":[{"selection":{"directReference":{"structField":{}},"rootReference":{}}}]}},"names":["i"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}})"; + REQUIRE_THROWS(con.FromSubstraitJSON(query_json)); +} + +TEST_CASE("Test Project with duplicate columns", "[substrait-api]") { + DuckDB db(nullptr); + Connection con(db); + con.EnableQueryVerification(); + REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER)")); + REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (1), (2), (3), (NULL)")); + + auto query_json = R"({"relations":[{"root":{"input":{"project":{"input":{"fetch":{"input":{"read":{"baseSchema":{"names":["i"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"namedTable":{"names":["integers"]}}},"count":"5"}},"expressions":[{"selection":{"directReference":{"structField":{}},"rootReference":{}}}]}},"names":["i", "integers"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}})"; + auto res1 = con.FromSubstraitJSON(query_json); + REQUIRE(CHECK_COLUMN(res1, 0, {1, 2, 3, Value()})); +} \ No newline at end of file diff --git a/test/python/test_substrait_from_json.py b/test/python/test_substrait_from_json.py index d22dd79..479de12 100644 --- a/test/python/test_substrait_from_json.py +++ b/test/python/test_substrait_from_json.py @@ -9,8 +9,8 @@ def test_substrait_from_json(require): connection.execute('CREATE TABLE integers (i integer)') connection.execute('INSERT INTO integers values (0)') - query_json = '{"relations":[{"root":{"input":{"project":{"input":{"fetch":{"input":{"read":{"baseSchema":{"names":["i"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"namedTable":{"names":["integers"]}}},"count":"5"}},"expressions":[{"selection":{"directReference":{"structField":{}},"rootReference":{}}}]}},"names":["i"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}}' - + query_json = '{"relations":[{"root":{"input":{"project":{"input":{"fetch":{"input":{"read":{"baseSchema":{"names":["i"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"namedTable":{"names":["integers"]}}},"count":"5"}},"expressions":[{"selection":{"directReference":{"structField":{}},"rootReference":{}}}]}},"names":["i","integers"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}}' + assert connection.from_substrait_json(query_json).fetchone()[0] == 0 # Test malformed json diff --git a/test/python/test_substrait_json.py b/test/python/test_substrait_json.py index 1c9471a..b12c2e7 100644 --- a/test/python/test_substrait_json.py +++ b/test/python/test_substrait_json.py @@ -7,7 +7,8 @@ def test_substrait_json(require): return connection.execute('CREATE TABLE integers (i integer)') - json = connection.get_substrait_json("select * from integers limit 5").fetchone()[0] - expected_result = '{"relations":[{"root":{"input":{"project":{"input":{"fetch":{"input":{"read":{"baseSchema":{"names":["i"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"namedTable":{"names":["integers"]}}},"count":"5"}},"expressions":[{"selection":{"directReference":{"structField":{}},"rootReference":{}}}]}},"names":["i"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}}' + json = connection.get_substrait_json("select * from integers limit 5").fetchone()[0] + expected_result = '{"relations":[{"root":{"input":{"fetch":{"input":{"read":{"baseSchema":{"names":["i"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"namedTable":{"names":["integers"]}}},"count":"5"}},"names":["i"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}}' assert json == expected_result + From 48f42eade51be4f7a7385b0096fd4af78bf5ba36 Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Thu, 9 Jan 2025 21:03:15 +0530 Subject: [PATCH 4/8] change error condition: hack to support struct fields --- src/from_substrait.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/from_substrait.cpp b/src/from_substrait.cpp index 67bce71..d29e355 100644 --- a/src/from_substrait.cpp +++ b/src/from_substrait.cpp @@ -859,8 +859,8 @@ shared_ptr SubstraitToDuckDB::TransformRootOp(const substrait::RelRoot if (first_projection_or_table) { vector *column_definitions = &first_projection_or_table->Cast().columns; int32_t i = 0; - if (column_definitions->size() != column_names.size()) { - throw InvalidInputException("Number of column names and column definitions do not match"); + if (column_definitions->size() > column_names.size()) { + throw InvalidInputException("Number of column names less than number of column definitions"); } for (auto &column : *column_definitions) { aliases.push_back(column_names[i++]); From 7053a045e496be1058b9e67622fe20570f6556e8 Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Mon, 13 Jan 2025 07:59:12 +0530 Subject: [PATCH 5/8] fix some more tests with projection --- test/sql/test_struct_types.test | 6 +++--- test/sql/test_substrait.test | 4 ++-- test/sql/test_substrait_parquet.test | 2 +- test/sql/test_temporary_view.test | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/test/sql/test_struct_types.test b/test/sql/test_struct_types.test index 903016c..133391c 100644 --- a/test/sql/test_struct_types.test +++ b/test/sql/test_struct_types.test @@ -12,11 +12,11 @@ PRAGMA enable_verification query I CALL get_substrait_json('select * from parquet_scan(''data/bug-17/test_table.parquet'') ') ---- -{"relations":[{"root":{"input":{"project":{"input":{"read":{"baseSchema":{"names":["r","a","b"],"struct":{"types":[{"struct":{"types":[{"i64":{"nullability":"NULLABILITY_NULLABLE"}},{"string":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"localFiles":{"items":[{"uriFile":"data/bug-17/test_table.parquet","parquet":{}}]}}},"expressions":[{"selection":{"directReference":{"structField":{}},"rootReference":{}}}]}},"names":["r","a","b"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}} +{"relations":[{"root":{"input":{"read":{"baseSchema":{"names":["r","a","b"],"struct":{"types":[{"struct":{"types":[{"i64":{"nullability":"NULLABILITY_NULLABLE"}},{"string":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"localFiles":{"items":[{"uriFile":"data/bug-17/test_table.parquet","parquet":{}}]}}},"names":["r","a","b"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}} query I -CALL from_substrait_json('{"relations":[{"root":{"input":{"project":{"input":{"read":{"baseSchema":{"names":["r"],"struct":{"types":[{"struct":{"types":[{"i64":{"nullability":"NULLABILITY_NULLABLE"}},{"string":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"localFiles":{"items":[{"uriFile":"data/bug-17/test_table.parquet","parquet":{}}]}}},"expressions":[{"selection":{"directReference":{"structField":{}},"rootReference":{}}}]}},"names":["r"]}}],"version":{"minorNumber":48,"producer":"DuckDB"}}') +CALL from_substrait_json('{"relations":[{"root":{"input":{"read":{"baseSchema":{"names":["r"],"struct":{"types":[{"struct":{"types":[{"i64":{"nullability":"NULLABILITY_NULLABLE"}},{"string":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"localFiles":{"items":[{"uriFile":"data/bug-17/test_table.parquet","parquet":{}}]}}},"names":["r"]}}],"version":{"minorNumber":48,"producer":"DuckDB"}}') ---- {'a': 1, 'b': b} @@ -38,7 +38,7 @@ create table mytesttable as from parquet_scan('data/bug-17/test_table.parquet'); query I CALL get_substrait_json('select * from mytesttable ') ---- -{"relations":[{"root":{"input":{"project":{"input":{"read":{"baseSchema":{"names":["r","a","b"],"struct":{"types":[{"struct":{"types":[{"i64":{"nullability":"NULLABILITY_NULLABLE"}},{"string":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"namedTable":{"names":["mytesttable"]}}},"expressions":[{"selection":{"directReference":{"structField":{}},"rootReference":{}}}]}},"names":["r","a","b"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}} +{"relations":[{"root":{"input":{"read":{"baseSchema":{"names":["r","a","b"],"struct":{"types":[{"struct":{"types":[{"i64":{"nullability":"NULLABILITY_NULLABLE"}},{"string":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"namedTable":{"names":["mytesttable"]}}},"names":["r","a","b"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}} statement ok CALL get_substrait ('select r from mytesttable ') diff --git a/test/sql/test_substrait.test b/test/sql/test_substrait.test index 36ccacf..445be79 100644 --- a/test/sql/test_substrait.test +++ b/test/sql/test_substrait.test @@ -17,13 +17,13 @@ statement ok insert into crossfit values ('Push Ups', 3), ('Pull Ups', 5) , ('Push Jerk', 7), ('Bar Muscle Up', 10); query I -CALL from_substrait('\x12\x09\x1A\x07\x10\x01\x1A\x03lte\x12\x11\x1A\x0F\x10\x02\x1A\x0Bis_not_null\x12\x09\x1A\x07\x10\x03\x1A\x03and\x12\x0B\x1A\x09\x10\x04\x1A\x05count\x1A\xC1\x01\x12\xBE\x01\x0A\xB1\x01:\xAE\x01\x12\xA1\x01\x22\x9E\x01\x12\x8D\x01\x0A\x8A\x01\x12,\x0A\x08exercise\x0A\x0Fdificulty_level\x12\x0F\x0A\x07\xB2\x01\x04\x08\x0D\x18\x01\x0A\x02*\x00\x18\x02\x1AF\x1AD\x08\x03\x1A\x04\x0A\x02\x10\x01\x22 \x1A\x1E\x1A\x1C\x08\x01\x1A\x02*\x00\x22\x0C\x1A\x0A\x12\x08\x0A\x04\x12\x02\x08\x01\x22\x00\x22\x06\x1A\x04\x0A\x02(\x05\x22\x18\x1A\x16\x1A\x14\x08\x02\x1A\x02*\x00\x22\x0C\x1A\x0A\x12\x08\x0A\x04\x12\x02\x08\x01\x22\x00\x22\x06\x0A\x02\x0A\x00\x10\x01:\x0A\x0A\x08crossfit\x1A\x00\x22\x0A\x0A\x08\x08\x04*\x04:\x02\x10\x01\x1A\x08\x12\x06\x0A\x02\x12\x00\x22\x00\x12\x08exercise2\x0A\x10\x18*\x06DuckDB'::BLOB) +CALL from_substrait('\x12\x09\x1A\x07\x10\x01\x1A\x03lte\x12\x11\x1A\x0F\x10\x02\x1A\x0Bis_not_null\x12\x09\x1A\x07\x10\x03\x1A\x03and\x12\x0B\x1A\x09\x10\x04\x1A\x05count\x1A\xC8\x01\x12\xC5\x01\x0A\xB8\x01:\xB5\x01\x0A\x05\x12\x03\x0A\x01\x01\x12\xA1\x01\x22\x9E\x01\x12\x8D\x01\x0A\x8A\x01\x12,\x0A\x08exercise\x0A\x0Fdificulty_level\x12\x0F\x0A\x07\xB2\x01\x04\x08\x0D\x18\x01\x0A\x02*\x00\x18\x02\x1AF\x1AD\x08\x03\x1A\x04\x0A\x02\x10\x01\x22\x20\x1A\x1E\x1A\x1C\x08\x01\x1A\x02*\x00\x22\x0C\x1A\x0A\x12\x08\x0A\x04\x12\x02\x08\x01\x22\x00\x22\x06\x1A\x04\x0A\x02(\x05\x22\x18\x1A\x16\x1A\x14\x08\x02\x1A\x02*\x00\x22\x0C\x1A\x0A\x12\x08\x0A\x04\x12\x02\x08\x01\x22\x00\x22\x06\x0A\x02\x0A\x00\x10\x01:\x0A\x0A\x08crossfit\x1A\x00\x22\x0A\x0A\x08\x08\x04*\x04:\x02\x10\x01\x1A\x08\x12\x06\x0A\x02\x12\x00\x22\x00\x12\x08exercise2\x0A\x10\x18*\x06DuckDB'::BLOB) ---- 2 # Query with multiple columns query II -CALL from_substrait('\x1A\x81\x01\x12\x7F\x0Ab:`\x12H\x0AF\x12,\x0A\x08exercise\x0A\x0Fdificulty_level\x12\x0F\x0A\x07\xB2\x01\x04\x08\x0D\x18\x01\x0A\x02*\x00\x18\x02\x22\x0A\x0A\x06\x0A\x00\x0A\x02\x08\x01\x10\x01:\x0A\x0A\x08crossfit\x1A\x08\x12\x06\x0A\x02\x12\x00\x22\x00\x1A\x0A\x12\x08\x0A\x04\x12\x02\x08\x01\x22\x00\x12\x08exercise\x12\x0Fdificulty_level2\x0A\x10\x18*\x06DuckDB'::BLOB) +CALL from_substrait('\x1Ag\x12e\x0AH\x0AF\x12,\x0A\x08exercise\x0A\x0Fdificulty_level\x12\x0F\x0A\x07\xB2\x01\x04\x08\x0D\x18\x01\x0A\x02*\x00\x18\x02\x22\x0A\x0A\x06\x0A\x00\x0A\x02\x08\x01\x10\x01:\x0A\x0A\x08crossfit\x12\x08exercise\x12\x0Fdificulty_level2\x0A\x10\x18*\x06DuckDB'::BLOB) ---- Push Ups 3 Pull Ups 5 diff --git a/test/sql/test_substrait_parquet.test b/test/sql/test_substrait_parquet.test index e5d7855..4e624fb 100644 --- a/test/sql/test_substrait_parquet.test +++ b/test/sql/test_substrait_parquet.test @@ -25,7 +25,7 @@ CREATE VIEW lineitem_parquet AS SELECT * FROM parquet_scan('data/parquet-testing # Verify that we can re-use the query plan on a view instead query I -CALL from_substrait('\x12\x07\x1A\x05\x10\x01\x1A\x01*\x12\x09\x1A\x07\x10\x02\x1A\x03sum\x1A\xC7\x03\x12\xC4\x03\x0A\xB8\x03:\xB5\x03\x12\xA8\x03\x22\xA5\x03\x12\xEC\x02\x0A\xE9\x02\x12\xC4\x02\x0A\x0Al_orderkey\x0A\x09l_partkey\x0A\x09l_suppkey\x0A\x0Cl_linenumber\x0A\x0Al_quantity\x0A\x0Fl_extendedprice\x0A\x0Al_discount\x0A\x05l_tax\x0A\x0Cl_returnflag\x0A\x0Cl_linestatus\x0A\x0Al_shipdate\x0A\x0Cl_commitdate\x0A\x0Dl_receiptdate\x0A\x0El_shipinstruct\x0A\x0Al_shipmode\x0A\x09l_comment\x12v\x0A\x04:\x02\x10\x01\x0A\x04:\x02\x10\x01\x0A\x04:\x02\x10\x01\x0A\x02*\x00\x0A\x02*\x00\x0A\x04Z\x02\x10\x01\x0A\x04Z\x02\x10\x01\x0A\x04Z\x02\x10\x01\x0A\x07\xB2\x01\x04\x08\x01\x18\x01\x0A\x07\xB2\x01\x04\x08\x01\x18\x01\x0A\x07\xB2\x01\x04\x08\x0A\x18\x01\x0A\x07\xB2\x01\x04\x08\x0A\x18\x01\x0A\x07\xB2\x01\x04\x08\x0A\x18\x01\x0A\x07\xB2\x01\x04\x08\x11\x18\x01\x0A\x07\xB2\x01\x04\x08\x07\x18\x01\x0A\x07\xB2\x01\x04\x08+\x18\x01\x18\x02\x22\x0C\x0A\x08\x0A\x02\x08\x05\x0A\x02\x08\x06\x10\x01:\x12\x0A\x10lineitem_parquet\x1A\x00\x222\x0A0\x08\x02*\x04Z\x02\x10\x01:&\x1A$\x1A\x22\x08\x01\x1A\x04Z\x02\x10\x01\x22\x0A\x1A\x08\x12\x06\x0A\x02\x12\x00\x22\x00\x22\x0C\x1A\x0A\x12\x08\x0A\x04\x12\x02\x08\x01\x22\x00\x1A\x08\x12\x06\x0A\x02\x12\x00\x22\x00\x12\x07revenue2\x0A\x100*\x06DuckDB'::BLOB) +CALL from_substrait('\x12\x07\x1A\x05\x10\x01\x1A\x01*\x12\x09\x1A\x07\x10\x02\x1A\x03sum\x1A\xB7\x03\x12\xB4\x03\x0A\xA8\x03\x22\xA5\x03\x12\xEC\x02\x0A\xE9\x02\x12\xC4\x02\x0A\x0Al_orderkey\x0A\x09l_partkey\x0A\x09l_suppkey\x0A\x0Cl_linenumber\x0A\x0Al_quantity\x0A\x0Fl_extendedprice\x0A\x0Al_discount\x0A\x05l_tax\x0A\x0Cl_returnflag\x0A\x0Cl_linestatus\x0A\x0Al_shipdate\x0A\x0Cl_commitdate\x0A\x0Dl_receiptdate\x0A\x0El_shipinstruct\x0A\x0Al_shipmode\x0A\x09l_comment\x12v\x0A\x04:\x02\x10\x01\x0A\x04:\x02\x10\x01\x0A\x04:\x02\x10\x01\x0A\x02*\x00\x0A\x02*\x00\x0A\x04Z\x02\x10\x01\x0A\x04Z\x02\x10\x01\x0A\x04Z\x02\x10\x01\x0A\x07\xB2\x01\x04\x08\x01\x18\x01\x0A\x07\xB2\x01\x04\x08\x01\x18\x01\x0A\x07\xB2\x01\x04\x08\x0A\x18\x01\x0A\x07\xB2\x01\x04\x08\x0A\x18\x01\x0A\x07\xB2\x01\x04\x08\x0A\x18\x01\x0A\x07\xB2\x01\x04\x08\x11\x18\x01\x0A\x07\xB2\x01\x04\x08\x07\x18\x01\x0A\x07\xB2\x01\x04\x08+\x18\x01\x18\x02\x22\x0C\x0A\x08\x0A\x02\x08\x05\x0A\x02\x08\x06\x10\x01:\x12\x0A\x10lineitem_parquet\x1A\x00\x222\x0A0\x08\x02*\x04Z\x02\x10\x01:&\x1A$\x1A\x22\x08\x01\x1A\x04Z\x02\x10\x01\x22\x0A\x1A\x08\x12\x06\x0A\x02\x12\x00\x22\x00\x22\x0C\x1A\x0A\x12\x08\x0A\x04\x12\x02\x08\x01\x22\x00\x12\x07revenue2\x0A\x100*\x06DuckDB'::BLOB) ---- 19107076.83379995 diff --git a/test/sql/test_temporary_view.test b/test/sql/test_temporary_view.test index c2e6bbb..de37cc8 100644 --- a/test/sql/test_temporary_view.test +++ b/test/sql/test_temporary_view.test @@ -25,7 +25,7 @@ statement ok call get_substrait('SELECT * FROM mytempview') query II -call from_substrait('\x1Au\x12s\x0Af:d\x12L:J\x122\x0A0\x12\x1B\x0A\x03age\x0A\x04name\x12\x0E\x0A\x04*\x02\x10\x01\x0A\x04b\x02\x10\x01\x18\x02\x22\x0A\x0A\x06\x0A\x00\x0A\x02\x08\x01\x10\x01:\x05\x0A\x03tbl\x1A\x08\x12\x06\x0A\x02\x12\x00\x22\x00\x1A\x0A\x12\x08\x0A\x04\x12\x02\x08\x01\x22\x00\x1A\x08\x12\x06\x0A\x02\x12\x00\x22\x00\x1A\x0A\x12\x08\x0A\x04\x12\x02\x08\x01\x22\x00\x12\x03age\x12\x04name2\x0A\x105*\x06DuckDB'::BLOB) +call from_substrait('\x1AA\x12?\x0A2\x0A0\x12\x1B\x0A\x03age\x0A\x04name\x12\x0E\x0A\x04*\x02\x10\x01\x0A\x04b\x02\x10\x01\x18\x02\x22\x0A\x0A\x06\x0A\x00\x0A\x02\x08\x01\x10\x01:\x05\x0A\x03tbl\x12\x03age\x12\x04name2\x0A\x105*\x06DuckDB'::BLOB) ---- 1 Alice 2 Bob \ No newline at end of file From c9d28f191bae60ddd8ac50ca7a0512d8c79c05c5 Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Tue, 14 Jan 2025 03:39:51 +0530 Subject: [PATCH 6/8] fix SEMI_JOIN --- src/to_substrait.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/to_substrait.cpp b/src/to_substrait.cpp index fffd7f3..c256030 100644 --- a/src/to_substrait.cpp +++ b/src/to_substrait.cpp @@ -870,7 +870,6 @@ substrait::Rel *DuckDBToSubstrait::TransformProjection(LogicalOperator &dop) { auto &dproj = dop.Cast(); auto child_column_count = dop.children[0]->types.size(); - auto num_passthrough_columns = 0; auto need_output_mapping = true; if (child_column_count <= dproj.expressions.size()) { // check if the projection is just pass through of input columns with no reordering @@ -881,7 +880,6 @@ substrait::Rel *DuckDBToSubstrait::TransformProjection(LogicalOperator &dop) { is_passthrough = false; break; } - num_passthrough_columns++; auto &dref = dexpr.get()->Cast(); if (dref.index != exp_col_idx) { is_passthrough = false; @@ -895,7 +893,6 @@ substrait::Rel *DuckDBToSubstrait::TransformProjection(LogicalOperator &dop) { } if (child_column_count == exp_col_idx) { // all input columns are projected, no need for output mapping - num_passthrough_columns = child_column_count; need_output_mapping = false; } } @@ -1048,18 +1045,20 @@ substrait::Rel *DuckDBToSubstrait::TransformComparisonJoin(LogicalOperator &dop) djoin.right_projection_map.push_back(i); } } + // TODO this projection seems redundant but from_substrait does not work without it auto proj_rel = new substrait::Rel(); auto projection = proj_rel->mutable_project(); + auto child_column_count = dop.children[0]->types.size(); for (auto left_idx : djoin.left_projection_map) { CreateFieldRef(projection->add_expressions(), left_idx); } if (djoin.join_type != JoinType::SEMI) { + child_column_count += dop.children[1]->types.size(); for (auto right_idx : djoin.right_projection_map) { CreateFieldRef(projection->add_expressions(), right_idx + left_col_count); } } - auto child_column_count = dop.children[0]->types.size() + dop.children[1]->types.size(); vector output_mapping; for (idx_t i = 0; i < projection->expressions_size(); i++) { output_mapping.push_back(child_column_count + i); From ff0cc2666f688ffd5dd025432dc62fc831e14cf3 Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Fri, 17 Jan 2025 07:32:21 +0530 Subject: [PATCH 7/8] Fix 1. projection with filter and 2. Pushdown projection in ReadRel --- src/from_substrait.cpp | 22 ++++++++++++++------ src/include/to_substrait.hpp | 1 + src/to_substrait.cpp | 40 +++++++++++++++++++++++++++++++----- test/c/test_projection.cpp | 14 ++++++++++++- 4 files changed, 65 insertions(+), 12 deletions(-) diff --git a/src/from_substrait.cpp b/src/from_substrait.cpp index d29e355..1204f1f 100644 --- a/src/from_substrait.cpp +++ b/src/from_substrait.cpp @@ -776,15 +776,25 @@ shared_ptr SubstraitToDuckDB::TransformWriteOp(const substrait::Rel &s } auto input = TransformOp(swrite.input()); switch (swrite.op()) { - case substrait::WriteRel::WriteOp::WriteRel_WriteOp_WRITE_OP_CTAS: - return input->CreateRel(schema_name, table_name); + case substrait::WriteRel::WriteOp::WriteRel_WriteOp_WRITE_OP_CTAS: + return input->CreateRel(schema_name, table_name); case substrait::WriteRel::WriteOp::WriteRel_WriteOp_WRITE_OP_INSERT: return input->InsertRel(schema_name, table_name); - case substrait::WriteRel::WriteOp::WriteRel_WriteOp_WRITE_OP_DELETE: { - auto filter = std::move(input.get()->Cast()); - auto context = filter.child->Cast().context; + case substrait::WriteRel::WriteOp::WriteRel_WriteOp_WRITE_OP_DELETE: { + switch (input->type) { + case RelationType::PROJECTION_RELATION: { + auto project = std::move(input.get()->Cast()); + auto filter = std::move(project.child->Cast()); return make_shared_ptr(filter.context, std::move(filter.condition), schema_name, table_name); - } + } + case RelationType::FILTER_RELATION: { + auto filter = std::move(input.get()->Cast()); + return make_shared_ptr(filter.context, std::move(filter.condition), schema_name, table_name); + } + default: + throw NotImplementedException("Unsupported relation type for delete operation"); + } + } default: throw NotImplementedException("Unsupported write operation " + to_string(swrite.op())); } diff --git a/src/include/to_substrait.hpp b/src/include/to_substrait.hpp index 453f688..6deb429 100644 --- a/src/include/to_substrait.hpp +++ b/src/include/to_substrait.hpp @@ -72,6 +72,7 @@ class DuckDBToSubstrait { substrait::Rel *TransformCreateTable(LogicalOperator &dop); substrait::Rel *TransformInsertTable(LogicalOperator &dop); substrait::Rel *TransformDeleteTable(LogicalOperator &dop); + static vector::__alloc_traits::size_type GetColumnCount(LogicalOperator &dop); static substrait::Rel *TransformDummyScan(); static substrait::RelCommon *CreateOutputMapping(vector vector); //! Methods to transform different LogicalGe:75 diff --git a/src/to_substrait.cpp b/src/to_substrait.cpp index c256030..a5ff3a0 100644 --- a/src/to_substrait.cpp +++ b/src/to_substrait.cpp @@ -847,10 +847,17 @@ substrait::Rel *DuckDBToSubstrait::TransformFilter(LogicalOperator &dop) { if (!dfilter.projection_map.empty()) { auto projection = new substrait::Rel(); - projection->mutable_project()->set_allocated_input(res); + auto sproj = projection->mutable_project(); + sproj->set_allocated_input(res); + auto child_column_count = GetColumnCount(*dop.children[0]); + auto t_index = 0; + vector output_mapping; for (auto col_idx : dfilter.projection_map) { - CreateFieldRef(projection->mutable_project()->add_expressions(), col_idx); + CreateFieldRef(sproj->add_expressions(), col_idx); + output_mapping.push_back(child_column_count + t_index); } + auto rel_common = CreateOutputMapping(output_mapping); + sproj->set_allocated_common(rel_common); res = projection; } return res; @@ -869,7 +876,7 @@ substrait::Rel *DuckDBToSubstrait::TransformProjection(LogicalOperator &dop) { auto res = new substrait::Rel(); auto &dproj = dop.Cast(); - auto child_column_count = dop.children[0]->types.size(); + auto child_column_count = GetColumnCount(*dop.children[0]); auto need_output_mapping = true; if (child_column_count <= dproj.expressions.size()) { // check if the projection is just pass through of input columns with no reordering @@ -1048,12 +1055,12 @@ substrait::Rel *DuckDBToSubstrait::TransformComparisonJoin(LogicalOperator &dop) // TODO this projection seems redundant but from_substrait does not work without it auto proj_rel = new substrait::Rel(); auto projection = proj_rel->mutable_project(); - auto child_column_count = dop.children[0]->types.size(); + auto child_column_count = GetColumnCount(*dop.children[0]); for (auto left_idx : djoin.left_projection_map) { CreateFieldRef(projection->add_expressions(), left_idx); } if (djoin.join_type != JoinType::SEMI) { - child_column_count += dop.children[1]->types.size(); + child_column_count += GetColumnCount(*dop.children[1]); for (auto right_idx : djoin.right_projection_map) { CreateFieldRef(projection->add_expressions(), right_idx + left_col_count); } @@ -1391,6 +1398,25 @@ substrait::Rel *DuckDBToSubstrait::TransformGet(LogicalOperator &dop) { } projection->set_allocated_select(select); sget->set_allocated_projection(projection); + } else if (!dget.GetColumnIds().empty()) { + auto &column_ids = dget.GetColumnIds(); + vector column_indices; + for (auto &column_id : column_ids) { + if (!column_id.IsRowIdColumn()) { + column_indices.push_back(column_id.GetPrimaryIndex()); + } + } + if (!column_indices.empty() && column_indices.size() < dget.returned_types.size()) { + auto projection = new substrait::Expression_MaskExpression(); + projection->set_maintain_singular_struct(true); + auto select = new substrait::Expression_MaskExpression_StructSelect(); + for (auto col_idx : column_indices) { + auto struct_item = select->add_struct_items(); + struct_item->set_field(static_cast(col_idx)); + } + projection->set_allocated_select(select); + sget->set_allocated_projection(projection); + } } // Add Table Schema @@ -1607,6 +1633,10 @@ substrait::Rel *DuckDBToSubstrait::TransformDeleteTable(LogicalOperator &dop) { return rel; } +vector::__alloc_traits::size_type DuckDBToSubstrait::GetColumnCount(LogicalOperator &dop) { + return dop.types.size(); +} + substrait::Rel *DuckDBToSubstrait::TransformOp(LogicalOperator &dop) { switch (dop.type) { case LogicalOperatorType::LOGICAL_FILTER: diff --git a/test/c/test_projection.cpp b/test/c/test_projection.cpp index 04a0b7b..b581e95 100644 --- a/test/c/test_projection.cpp +++ b/test/c/test_projection.cpp @@ -156,4 +156,16 @@ TEST_CASE("Test Project with duplicate columns", "[substrait-api]") { auto query_json = R"({"relations":[{"root":{"input":{"project":{"input":{"fetch":{"input":{"read":{"baseSchema":{"names":["i"],"struct":{"types":[{"i32":{"nullability":"NULLABILITY_NULLABLE"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{}]},"maintainSingularStruct":true},"namedTable":{"names":["integers"]}}},"count":"5"}},"expressions":[{"selection":{"directReference":{"structField":{}},"rootReference":{}}}]}},"names":["i", "integers"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}})"; auto res1 = con.FromSubstraitJSON(query_json); REQUIRE(CHECK_COLUMN(res1, 0, {1, 2, 3, Value()})); -} \ No newline at end of file +} + +TEST_CASE("Test Project simple join on tables with multiple columns", "[substrait-api]") { + DuckDB db(nullptr); + Connection con(db); + con.EnableQueryVerification(); + REQUIRE_NO_FAIL(con.Query("CALL dbgen(sf=0.000001)")); + + auto query_text_2 = "SELECT extract(year FROM o_orderdate), l_extendedprice * (1 - l_discount) AS amount FROM lineitem, orders WHERE o_orderkey = l_orderkey"; + auto json2 = con.GetSubstraitJSON(query_text_2); + auto expected_json = R"cust_raw({"extensionUris":[{"extensionUriAnchor":1,"uri":"https://github.com/substrait-io/substrait/blob/main/extensions/"},{"extensionUriAnchor":2,"uri":"https://github.com/substrait-io/substrait/blob/main/extensions/functions_datetime.yaml"},{"extensionUriAnchor":3,"uri":"https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic_decimal.yaml"}],"extensions":[{"extensionFunction":{"extensionUriReference":1,"functionAnchor":1,"name":"equal:i64_i64"}},{"extensionFunction":{"extensionUriReference":2,"functionAnchor":2,"name":"extract:date"}},{"extensionFunction":{"extensionUriReference":3,"functionAnchor":3,"name":"subtract:decimal_decimal"}},{"extensionFunction":{"extensionUriReference":3,"functionAnchor":4,"name":"multiply:decimal_decimal"}}],"relations":[{"root":{"input":{"project":{"common":{"emit":{"outputMapping":[3,4]}},"input":{"project":{"common":{"emit":{"outputMapping":[26,27,28]}},"input":{"join":{"left":{"project":{"common":{"emit":{"outputMapping":[7,6,8,5,9,4,10,3,11,2,12,1,13,0,14]}},"input":{"project":{"common":{"emit":{"outputMapping":[3,2,4,1,5,0,6]}},"input":{"read":{"baseSchema":{"names":["l_orderkey","l_partkey","l_suppkey","l_linenumber","l_quantity","l_extendedprice","l_discount","l_tax","l_returnflag","l_linestatus","l_shipdate","l_commitdate","l_receiptdate","l_shipinstruct","l_shipmode","l_comment"],"struct":{"types":[{"i64":{"nullability":"NULLABILITY_REQUIRED"}},{"i64":{"nullability":"NULLABILITY_REQUIRED"}},{"i64":{"nullability":"NULLABILITY_REQUIRED"}},{"i64":{"nullability":"NULLABILITY_REQUIRED"}},{"decimal":{"scale":2,"precision":15,"nullability":"NULLABILITY_REQUIRED"}},{"decimal":{"scale":2,"precision":15,"nullability":"NULLABILITY_REQUIRED"}},{"decimal":{"scale":2,"precision":15,"nullability":"NULLABILITY_REQUIRED"}},{"decimal":{"scale":2,"precision":15,"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_REQUIRED"}},{"date":{"nullability":"NULLABILITY_REQUIRED"}},{"date":{"nullability":"NULLABILITY_REQUIRED"}},{"date":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_REQUIRED"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{},{"field":5},{"field":6}]},"maintainSingularStruct":true},"namedTable":{"names":["lineitem"]}}},"expressions":[{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}}]}},"expressions":[{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}}]}},"right":{"project":{"common":{"emit":{"outputMapping":[5,4,6,3,7,2,8,1,9,0,10]}},"input":{"project":{"common":{"emit":{"outputMapping":[2,1,3,0,4]}},"input":{"read":{"baseSchema":{"names":["o_orderkey","o_custkey","o_orderstatus","o_totalprice","o_orderdate","o_orderpriority","o_clerk","o_shippriority","o_comment"],"struct":{"types":[{"i64":{"nullability":"NULLABILITY_REQUIRED"}},{"i64":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_REQUIRED"}},{"decimal":{"scale":2,"precision":15,"nullability":"NULLABILITY_REQUIRED"}},{"date":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_REQUIRED"}},{"i32":{"nullability":"NULLABILITY_REQUIRED"}},{"string":{"nullability":"NULLABILITY_REQUIRED"}}],"nullability":"NULLABILITY_REQUIRED"}},"projection":{"select":{"structItems":[{},{"field":4}]},"maintainSingularStruct":true},"namedTable":{"names":["orders"]}}},"expressions":[{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}}]}},"expressions":[{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}},{"literal":{"null":{}}}]}},"expression":{"scalarFunction":{"functionReference":1,"outputType":{"bool":{"nullability":"NULLABILITY_NULLABLE"}},"arguments":[{"value":{"selection":{"directReference":{"structField":{"field":3}},"rootReference":{}}}},{"value":{"selection":{"directReference":{"structField":{"field":18}},"rootReference":{}}}}]}},"type":"JOIN_TYPE_INNER"}},"expressions":[{"selection":{"directReference":{"structField":{"field":7}},"rootReference":{}}},{"selection":{"directReference":{"structField":{"field":11}},"rootReference":{}}},{"selection":{"directReference":{"structField":{"field":22}},"rootReference":{}}}]}},"expressions":[{"scalarFunction":{"functionReference":2,"outputType":{"i64":{"nullability":"NULLABILITY_NULLABLE"}},"arguments":[{"enum":"year"},{"value":{"selection":{"directReference":{"structField":{"field":2}},"rootReference":{}}}}]}},{"scalarFunction":{"functionReference":4,"outputType":{"decimal":{"scale":4,"precision":18,"nullability":"NULLABILITY_NULLABLE"}},"arguments":[{"value":{"selection":{"directReference":{"structField":{}},"rootReference":{}}}},{"value":{"scalarFunction":{"functionReference":3,"outputType":{"decimal":{"scale":2,"precision":16,"nullability":"NULLABILITY_NULLABLE"}},"arguments":[{"value":{"literal":{"decimal":{"value":"ZAAAAAAAAAAAAAAAAAAAAA==","precision":16,"scale":2}}}},{"value":{"selection":{"directReference":{"structField":{"field":1}},"rootReference":{}}}}]}}}]}}]}},"names":["\"year\"(o_orderdate)","amount"]}}],"version":{"minorNumber":53,"producer":"DuckDB"}})cust_raw"; + REQUIRE(json2 == expected_json); +} From eebaf3db22ed5721e9837ab8263faab307eaf9f6 Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Fri, 17 Jan 2025 07:59:41 +0530 Subject: [PATCH 8/8] Fix build error --- src/include/to_substrait.hpp | 2 +- src/to_substrait.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/include/to_substrait.hpp b/src/include/to_substrait.hpp index 6deb429..b2264e6 100644 --- a/src/include/to_substrait.hpp +++ b/src/include/to_substrait.hpp @@ -72,7 +72,7 @@ class DuckDBToSubstrait { substrait::Rel *TransformCreateTable(LogicalOperator &dop); substrait::Rel *TransformInsertTable(LogicalOperator &dop); substrait::Rel *TransformDeleteTable(LogicalOperator &dop); - static vector::__alloc_traits::size_type GetColumnCount(LogicalOperator &dop); + static vector::size_type GetColumnCount(LogicalOperator &dop); static substrait::Rel *TransformDummyScan(); static substrait::RelCommon *CreateOutputMapping(vector vector); //! Methods to transform different LogicalGe:75 diff --git a/src/to_substrait.cpp b/src/to_substrait.cpp index a5ff3a0..1cce68f 100644 --- a/src/to_substrait.cpp +++ b/src/to_substrait.cpp @@ -1633,7 +1633,7 @@ substrait::Rel *DuckDBToSubstrait::TransformDeleteTable(LogicalOperator &dop) { return rel; } -vector::__alloc_traits::size_type DuckDBToSubstrait::GetColumnCount(LogicalOperator &dop) { +vector::size_type DuckDBToSubstrait::GetColumnCount(LogicalOperator &dop) { return dop.types.size(); }