From 28b19aa5585aedd9e5ed5eef174a3a9a2c15bacb Mon Sep 17 00:00:00 2001 From: Tishj Date: Tue, 2 Apr 2024 12:09:57 +0200 Subject: [PATCH 1/2] throw if an invalid struct is encountered --- src/function/table/arrow.cpp | 4 +++ .../tests/fast/arrow/test_arrow_types.py | 27 +++++++++++-------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/function/table/arrow.cpp b/src/function/table/arrow.cpp index a65257a8363b..345295739467 100644 --- a/src/function/table/arrow.cpp +++ b/src/function/table/arrow.cpp @@ -125,6 +125,10 @@ static unique_ptr GetArrowLogicalTypeNoDictionary(ArrowSchema &schema } else if (format == "+s") { child_list_t child_types; vector> children; + if (schema.n_children == 0) { + throw InvalidInputException( + "Attempted to convert a STRUCT with no fields to DuckDB which is not supported"); + } for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) { children.emplace_back(ArrowTableFunction::GetArrowLogicalType(*schema.children[type_idx])); child_types.emplace_back(schema.children[type_idx]->name, children.back()->GetDuckType()); diff --git a/tools/pythonpkg/tests/fast/arrow/test_arrow_types.py b/tools/pythonpkg/tests/fast/arrow/test_arrow_types.py index 379f517167ae..0aa6c9b24f40 100644 --- a/tools/pythonpkg/tests/fast/arrow/test_arrow_types.py +++ b/tools/pythonpkg/tests/fast/arrow/test_arrow_types.py @@ -1,23 +1,16 @@ import duckdb +import pytest -try: - import pyarrow as pa - import pyarrow.dataset as ds - - can_run = True -except: - can_run = False +pa = pytest.importorskip("pyarrow") +ds = pytest.importorskip("pyarrow.dataset") class TestArrowTypes(object): def test_null_type(self, duckdb_cursor): - if not can_run: - return schema = pa.schema([("data", pa.null())]) inputs = [pa.array([None, None, None], type=pa.null())] arrow_table = pa.Table.from_arrays(inputs, schema=schema) - duckdb_conn = duckdb.connect() - duckdb_conn.register("testarrow", arrow_table) + duckdb_cursor.register("testarrow", arrow_table) rel = duckdb.from_arrow(arrow_table).arrow() # We turn it to an array of int32 nulls schema = pa.schema([("data", pa.int32())]) @@ -25,3 +18,15 @@ def test_null_type(self, duckdb_cursor): arrow_table = pa.Table.from_arrays(inputs, schema=schema) assert rel['data'] == arrow_table['data'] + + def test_invalid_struct(self, duckdb_cursor): + empty_struct_type = pa.struct([]) + + # Create an empty array with the defined struct type + empty_array = pa.array([], type=empty_struct_type) + arrow_table = pa.Table.from_arrays([empty_array], schema=pa.schema([("data", empty_struct_type)])) + with pytest.raises( + duckdb.InvalidInputException, + match='Attempted to convert a STRUCT with no fields to DuckDB which is not supported', + ): + duckdb_cursor.register('invalid_struct', arrow_table) From 4f607b1b526b110051328c4cbedf98f3d9fc6642 Mon Sep 17 00:00:00 2001 From: Tishj Date: Tue, 2 Apr 2024 15:26:03 +0200 Subject: [PATCH 2/2] this same rule applies to UNION, we dont accept union types with no fields --- src/function/table/arrow.cpp | 3 +++ .../tests/fast/arrow/test_arrow_types.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/function/table/arrow.cpp b/src/function/table/arrow.cpp index 345295739467..dd742b78f14e 100644 --- a/src/function/table/arrow.cpp +++ b/src/function/table/arrow.cpp @@ -148,6 +148,9 @@ static unique_ptr GetArrowLogicalTypeNoDictionary(ArrowSchema &schema child_list_t members; vector> children; + if (schema.n_children == 0) { + throw InvalidInputException("Attempted to convert a UNION with no fields to DuckDB which is not supported"); + } for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) { auto type = schema.children[type_idx]; diff --git a/tools/pythonpkg/tests/fast/arrow/test_arrow_types.py b/tools/pythonpkg/tests/fast/arrow/test_arrow_types.py index 0aa6c9b24f40..24f10ea87b96 100644 --- a/tools/pythonpkg/tests/fast/arrow/test_arrow_types.py +++ b/tools/pythonpkg/tests/fast/arrow/test_arrow_types.py @@ -30,3 +30,18 @@ def test_invalid_struct(self, duckdb_cursor): match='Attempted to convert a STRUCT with no fields to DuckDB which is not supported', ): duckdb_cursor.register('invalid_struct', arrow_table) + + def test_invalid_union(self, duckdb_cursor): + # Create a sparse union array from dense arrays + types = pa.array([0, 1, 1], type=pa.int8()) + sparse_union_array = pa.UnionArray.from_sparse(types, [], type_codes=[]) + + arrow_table = pa.Table.from_arrays([sparse_union_array], schema=pa.schema([("data", sparse_union_array.type)])) + with pytest.raises( + duckdb.InvalidInputException, + match='Attempted to convert a UNION with no fields to DuckDB which is not supported', + ): + duckdb_cursor.register('invalid_union', arrow_table) + + res = duckdb_cursor.sql("select * from invalid_union").fetchall() + print(res)