From 5c10815d49a061c6311d85cb6092429d3947977d Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Wed, 2 Aug 2023 13:01:19 +0200 Subject: [PATCH 1/2] Disable Compressed Materialization --- duckdb | 2 +- src/substrait_extension.cpp | 3 ++ test/python/test_set_operation_relation.py | 34 ++++++++++------------ test/sql/test_except.test | 4 --- test/sql/test_intersect.test | 6 +--- test/sql/test_substrait_parquet.test | 3 -- test/sql/test_substrait_subqueries.test | 3 -- test/sql/test_substrait_tpch.test | 3 -- 8 files changed, 21 insertions(+), 37 deletions(-) diff --git a/duckdb b/duckdb index 811683f..02412e1 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 811683f695f8db22948950e25018d62ff3357237 +Subproject commit 02412e10151c893be139082cf3f1362f455308b0 diff --git a/src/substrait_extension.cpp b/src/substrait_extension.cpp index 06fd2b2..955806d 100644 --- a/src/substrait_extension.cpp +++ b/src/substrait_extension.cpp @@ -97,6 +97,9 @@ static void ToSubFunctionInternal(ClientContext &context, ToSubstraitFunctionDat // We might want to disable the optimizer of our new connection new_conn.context->config.enable_optimizer = data.enable_optimizer; new_conn.context->config.use_replacement_scans = false; + // We want for sure to disable the internal compression optimizations + // These are DuckDB specific, no other system implements these + new_conn.Query("SET disabled_optimizers to 'compressed_materialization';"); query_plan = new_conn.context->ExtractPlan(data.query); DuckDBToSubstrait transformer_d2s(context, *query_plan); serialized = transformer_d2s.SerializeToString(); diff --git a/test/python/test_set_operation_relation.py b/test/python/test_set_operation_relation.py index 51ba139..33458e8 100644 --- a/test/python/test_set_operation_relation.py +++ b/test/python/test_set_operation_relation.py @@ -46,15 +46,14 @@ def test_except(self, connection): (3, 4, 5, 6) ) as tbl(B, C, D, A) """) - # FIXME: test currently fails - # query = """ - # select * from tbl1 EXCEPT (select * from tbl2); - # """ - # expected = connection.sql(query).fetchall() - # json = connection.get_substrait_json(query).fetchall()[0][0] - # rel = connection.from_substrait_json(json) - # actual = rel.fetchall() - # assert expected == actual + query = """ + select * from tbl1 EXCEPT (select * from tbl2); + """ + expected = connection.sql(query).fetchall() + json = connection.get_substrait_json(query).fetchall()[0][0] + rel = connection.from_substrait_json(json) + actual = rel.fetchall() + assert expected == actual def test_intersect(self, connection): connection.execute(""" @@ -69,12 +68,11 @@ def test_intersect(self, connection): (3, 4, 5, 6) ) as tbl(B, C, D, A) """) - # FIXME: test currently fails - # query = """ - # select * from tbl1 INTERSECT (select * from tbl2); - # """ - # expected = connection.sql(query).fetchall() - # json = connection.get_substrait_json(query).fetchall()[0][0] - # rel = connection.from_substrait_json(json) - # actual = rel.fetchall() - # assert expected == actual + query = """ + select * from tbl1 INTERSECT (select * from tbl2); + """ + expected = connection.sql(query).fetchall() + json = connection.get_substrait_json(query).fetchall()[0][0] + rel = connection.from_substrait_json(json) + actual = rel.fetchall() + assert expected == actual diff --git a/test/sql/test_except.test b/test/sql/test_except.test index f7d991e..b10a59d 100644 --- a/test/sql/test_except.test +++ b/test/sql/test_except.test @@ -8,10 +8,6 @@ statement ok PRAGMA enable_verification # Create two tables to use in the EXCEPT - -# FIXME: currently broken -mode skip - statement ok create table tbl1 as select * from (VALUES (1, 2, 3, 4), diff --git a/test/sql/test_intersect.test b/test/sql/test_intersect.test index 6945928..6ca7bba 100644 --- a/test/sql/test_intersect.test +++ b/test/sql/test_intersect.test @@ -7,11 +7,7 @@ require substrait statement ok PRAGMA enable_verification -# Create two tables to use in the INTERSECTx - -# FIXME: currently broken with: -mode skip - +# Create two tables to use in the INTERSECT statement ok create table tbl1 as select * from (VALUES (1, 2, 3, 4), diff --git a/test/sql/test_substrait_parquet.test b/test/sql/test_substrait_parquet.test index ff662a8..9a2c710 100644 --- a/test/sql/test_substrait_parquet.test +++ b/test/sql/test_substrait_parquet.test @@ -34,9 +34,6 @@ CALL from_substrait('\x12\x09\x1A\x07\x10\x01\x1A\x03sum\x12\x07\x1A\x05\x10\x02 ---- 19107076.83379995 -# TODO FIXME -mode skip - # Test Globbing statement ok CALL get_substrait('select * from parquet_scan(''data/parquet-testing/glob*/t?.parquet'') order by i') diff --git a/test/sql/test_substrait_subqueries.test b/test/sql/test_substrait_subqueries.test index 6ca7e80..ba9b6bb 100644 --- a/test/sql/test_substrait_subqueries.test +++ b/test/sql/test_substrait_subqueries.test @@ -7,9 +7,6 @@ require substrait statement ok PRAGMA enable_verification -# FIXME: currently broken -mode skip - statement ok CREATE TABLE integers (i integer); diff --git a/test/sql/test_substrait_tpch.test b/test/sql/test_substrait_tpch.test index 7a68538..ffa2666 100644 --- a/test/sql/test_substrait_tpch.test +++ b/test/sql/test_substrait_tpch.test @@ -12,9 +12,6 @@ PRAGMA enable_verification statement ok CALL dbgen(sf=0.01) -# FIXME: currently broken -mode skip - #Q 01 statement ok CALL get_substrait('SELECT l_returnflag, l_linestatus, sum(l_quantity) AS sum_qty, sum(l_extendedprice) AS sum_base_price, sum(l_extendedprice * (1 - l_discount)) AS sum_disc_price, sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge, avg(l_quantity) AS avg_qty, avg(l_extendedprice) AS avg_price, avg(l_discount) AS avg_disc, count(*) AS count_order FROM lineitem WHERE l_shipdate <= CAST(''1998-09-02'' AS date) GROUP BY l_returnflag, l_linestatus ORDER BY l_returnflag, l_linestatus;') From 52fd072740883b3774e7faf0e738a4078728ccd1 Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Wed, 2 Aug 2023 15:22:49 +0200 Subject: [PATCH 2/2] Have to also disable compressed materialization in json export --- src/substrait_extension.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/substrait_extension.cpp b/src/substrait_extension.cpp index 955806d..a2f03b1 100644 --- a/src/substrait_extension.cpp +++ b/src/substrait_extension.cpp @@ -97,9 +97,9 @@ static void ToSubFunctionInternal(ClientContext &context, ToSubstraitFunctionDat // We might want to disable the optimizer of our new connection new_conn.context->config.enable_optimizer = data.enable_optimizer; new_conn.context->config.use_replacement_scans = false; - // We want for sure to disable the internal compression optimizations - // These are DuckDB specific, no other system implements these - new_conn.Query("SET disabled_optimizers to 'compressed_materialization';"); + // We want for sure to disable the internal compression optimizations + // These are DuckDB specific, no other system implements these + new_conn.Query("SET disabled_optimizers to 'compressed_materialization';"); query_plan = new_conn.context->ExtractPlan(data.query); DuckDBToSubstrait transformer_d2s(context, *query_plan); serialized = transformer_d2s.SerializeToString(); @@ -137,6 +137,9 @@ static void ToJsonFunctionInternal(ClientContext &context, ToSubstraitFunctionDa // We might want to disable the optimizer of our new connection new_conn.context->config.enable_optimizer = data.enable_optimizer; new_conn.context->config.use_replacement_scans = false; + // We want for sure to disable the internal compression optimizations + // These are DuckDB specific, no other system implements these + new_conn.Query("SET disabled_optimizers to 'compressed_materialization';"); query_plan = new_conn.context->ExtractPlan(data.query); DuckDBToSubstrait transformer_d2s(context, *query_plan); serialized = transformer_d2s.SerializeToJson();