Skip to content

Commit

Permalink
Merge pull request duckdb#11464 from Tishj/arrow_invalid_struct
Browse files Browse the repository at this point in the history
[Arrow] Throw on invalid STRUCT type
  • Loading branch information
Mytherin authored Apr 3, 2024
2 parents 0b4caed + 4f607b1 commit 5345a49
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 11 deletions.
7 changes: 7 additions & 0 deletions src/function/table/arrow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ static unique_ptr<ArrowType> GetArrowLogicalTypeNoDictionary(ArrowSchema &schema
} else if (format == "+s") {
child_list_t<LogicalType> child_types;
vector<unique_ptr<ArrowType>> children;
if (schema.n_children == 0) {
throw InvalidInputException(
"Attempted to convert a STRUCT with no fields to DuckDB which is not supported");
}
for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) {
children.emplace_back(ArrowTableFunction::GetArrowLogicalType(*schema.children[type_idx]));
child_types.emplace_back(schema.children[type_idx]->name, children.back()->GetDuckType());
Expand All @@ -144,6 +148,9 @@ static unique_ptr<ArrowType> GetArrowLogicalTypeNoDictionary(ArrowSchema &schema

child_list_t<LogicalType> members;
vector<unique_ptr<ArrowType>> children;
if (schema.n_children == 0) {
throw InvalidInputException("Attempted to convert a UNION with no fields to DuckDB which is not supported");
}
for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) {
auto type = schema.children[type_idx];

Expand Down
42 changes: 31 additions & 11 deletions tools/pythonpkg/tests/fast/arrow/test_arrow_types.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,47 @@
import duckdb
import pytest

try:
import pyarrow as pa
import pyarrow.dataset as ds

can_run = True
except:
can_run = False
pa = pytest.importorskip("pyarrow")
ds = pytest.importorskip("pyarrow.dataset")


class TestArrowTypes(object):
def test_null_type(self, duckdb_cursor):
if not can_run:
return
schema = pa.schema([("data", pa.null())])
inputs = [pa.array([None, None, None], type=pa.null())]
arrow_table = pa.Table.from_arrays(inputs, schema=schema)
duckdb_conn = duckdb.connect()
duckdb_conn.register("testarrow", arrow_table)
duckdb_cursor.register("testarrow", arrow_table)
rel = duckdb.from_arrow(arrow_table).arrow()
# We turn it to an array of int32 nulls
schema = pa.schema([("data", pa.int32())])
inputs = [pa.array([None, None, None], type=pa.null())]
arrow_table = pa.Table.from_arrays(inputs, schema=schema)

assert rel['data'] == arrow_table['data']

def test_invalid_struct(self, duckdb_cursor):
empty_struct_type = pa.struct([])

# Create an empty array with the defined struct type
empty_array = pa.array([], type=empty_struct_type)
arrow_table = pa.Table.from_arrays([empty_array], schema=pa.schema([("data", empty_struct_type)]))
with pytest.raises(
duckdb.InvalidInputException,
match='Attempted to convert a STRUCT with no fields to DuckDB which is not supported',
):
duckdb_cursor.register('invalid_struct', arrow_table)

def test_invalid_union(self, duckdb_cursor):
# Create a sparse union array from dense arrays
types = pa.array([0, 1, 1], type=pa.int8())
sparse_union_array = pa.UnionArray.from_sparse(types, [], type_codes=[])

arrow_table = pa.Table.from_arrays([sparse_union_array], schema=pa.schema([("data", sparse_union_array.type)]))
with pytest.raises(
duckdb.InvalidInputException,
match='Attempted to convert a UNION with no fields to DuckDB which is not supported',
):
duckdb_cursor.register('invalid_union', arrow_table)

res = duckdb_cursor.sql("select * from invalid_union").fetchall()
print(res)

0 comments on commit 5345a49

Please sign in to comment.