diff --git a/extension/json/json_functions/json_create.cpp b/extension/json/json_functions/json_create.cpp index 0abd23491a33..2167ae902e5b 100644 --- a/extension/json/json_functions/json_create.cpp +++ b/extension/json/json_functions/json_create.cpp @@ -61,6 +61,9 @@ static LogicalType GetJSONType(StructNames &const_struct_names, const LogicalTyp // The nested types need to conform as well case LogicalTypeId::LIST: return LogicalType::LIST(GetJSONType(const_struct_names, ListType::GetChildType(type))); + case LogicalTypeId::ARRAY: + return LogicalType::ARRAY(GetJSONType(const_struct_names, ArrayType::GetChildType(type)), + ArrayType::GetSize(type)); // Struct and MAP are treated as JSON values case LogicalTypeId::STRUCT: { child_list_t child_types; @@ -435,6 +438,9 @@ static void CreateValuesList(const StructNames &names, yyjson_mut_doc *doc, yyjs static void CreateValuesArray(const StructNames &names, yyjson_mut_doc *doc, yyjson_mut_val *vals[], Vector &value_v, idx_t count) { + + value_v.Flatten(count); + // Initialize array for the nested values auto &child_v = ArrayVector::GetEntry(value_v); auto array_size = ArrayType::GetSize(value_v.GetType()); diff --git a/extension/parquet/column_writer.cpp b/extension/parquet/column_writer.cpp index c09b04b446e1..dcdb1c31be3a 100644 --- a/extension/parquet/column_writer.cpp +++ b/extension/parquet/column_writer.cpp @@ -1826,9 +1826,102 @@ void ListColumnWriter::FinalizeWrite(ColumnWriterState &state_p) { child_writer->FinalizeWrite(*state.child_state); } +//===--------------------------------------------------------------------===// +// Array Column Writer +//===--------------------------------------------------------------------===// +class ArrayColumnWriter : public ListColumnWriter { +public: + ArrayColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, + idx_t max_define, unique_ptr child_writer_p, bool can_have_nulls) + : ListColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, + std::move(child_writer_p), can_have_nulls) { + } + ~ArrayColumnWriter() override = default; + +public: + void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override; + void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override; + void Write(ColumnWriterState &state, Vector &vector, idx_t count) override; +}; + +void ArrayColumnWriter::Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { + auto &state = state_p.Cast(); + auto &array_child = ArrayVector::GetEntry(vector); + auto array_size = ArrayType::GetSize(vector.GetType()); + child_writer->Analyze(*state.child_state, &state_p, array_child, array_size * count); +} + +void ArrayColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { + auto &state = state_p.Cast(); + + auto array_size = ArrayType::GetSize(vector.GetType()); + auto &validity = FlatVector::Validity(vector); + + // write definition levels and repeats + // the main difference between this and ListColumnWriter::Prepare is that we need to make sure to write out + // repetition levels and definitions for the child elements of the array even if the array itself is NULL. + idx_t start = 0; + idx_t vcount = parent ? parent->definition_levels.size() - state.parent_index : count; + idx_t vector_index = 0; + for (idx_t i = start; i < vcount; i++) { + idx_t parent_index = state.parent_index + i; + if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index]) { + state.definition_levels.push_back(parent->definition_levels[parent_index]); + state.repetition_levels.push_back(parent->repetition_levels[parent_index]); + state.is_empty.push_back(true); + continue; + } + auto first_repeat_level = + parent && !parent->repetition_levels.empty() ? parent->repetition_levels[parent_index] : max_repeat; + if (parent && parent->definition_levels[parent_index] != PARQUET_DEFINE_VALID) { + state.definition_levels.push_back(parent->definition_levels[parent_index]); + state.repetition_levels.push_back(first_repeat_level); + state.is_empty.push_back(false); + for (idx_t k = 1; k < array_size; k++) { + state.repetition_levels.push_back(max_repeat + 1); + state.definition_levels.push_back(parent->definition_levels[parent_index]); + state.is_empty.push_back(false); + } + } else if (validity.RowIsValid(vector_index)) { + // push the repetition levels + state.definition_levels.push_back(PARQUET_DEFINE_VALID); + state.is_empty.push_back(false); + + state.repetition_levels.push_back(first_repeat_level); + for (idx_t k = 1; k < array_size; k++) { + state.repetition_levels.push_back(max_repeat + 1); + state.definition_levels.push_back(PARQUET_DEFINE_VALID); + state.is_empty.push_back(false); + } + } else { + state.definition_levels.push_back(max_define - 1); + state.repetition_levels.push_back(first_repeat_level); + state.is_empty.push_back(false); + for (idx_t k = 1; k < array_size; k++) { + state.repetition_levels.push_back(max_repeat + 1); + state.definition_levels.push_back(max_define - 1); + state.is_empty.push_back(false); + } + } + vector_index++; + } + state.parent_index += vcount; + + auto &array_child = ArrayVector::GetEntry(vector); + child_writer->Prepare(*state.child_state, &state_p, array_child, count * array_size); +} + +void ArrayColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t count) { + auto &state = state_p.Cast(); + auto array_size = ArrayType::GetSize(vector.GetType()); + auto &array_child = ArrayVector::GetEntry(vector); + child_writer->Write(*state.child_state, array_child, count * array_size); +} + //===--------------------------------------------------------------------===// // Create Column Writer //===--------------------------------------------------------------------===// + unique_ptr ColumnWriter::CreateWriterRecursive(vector &schemas, ParquetWriter &writer, const LogicalType &type, const string &name, vector schema_path, @@ -1877,8 +1970,9 @@ unique_ptr ColumnWriter::CreateWriterRecursive(vector(writer, schema_idx, std::move(schema_path), max_repeat, max_define, std::move(child_writers), can_have_nulls); } - if (type.id() == LogicalTypeId::LIST) { - auto &child_type = ListType::GetChildType(type); + if (type.id() == LogicalTypeId::LIST || type.id() == LogicalTypeId::ARRAY) { + auto is_list = type.id() == LogicalTypeId::LIST; + auto &child_type = is_list ? ListType::GetChildType(type) : ArrayType::GetChildType(type); // set up the two schema elements for the list // for some reason we only set the converted type in the OPTIONAL element // first an OPTIONAL element @@ -1905,14 +1999,19 @@ unique_ptr ColumnWriter::CreateWriterRecursive(vector(writer, schema_idx, std::move(schema_path), max_repeat, max_define, - std::move(child_writer), can_have_nulls); + if (is_list) { + return make_uniq(writer, schema_idx, std::move(schema_path), max_repeat, max_define, + std::move(child_writer), can_have_nulls); + } else { + return make_uniq(writer, schema_idx, std::move(schema_path), max_repeat, max_define, + std::move(child_writer), can_have_nulls); + } } if (type.id() == LogicalTypeId::MAP) { // map type diff --git a/extension/parquet/parquet_writer.cpp b/extension/parquet/parquet_writer.cpp index 0999a8aa8b98..7c299fa793a1 100644 --- a/extension/parquet/parquet_writer.cpp +++ b/extension/parquet/parquet_writer.cpp @@ -170,6 +170,10 @@ CopyTypeSupport ParquetWriter::TypeIsSupported(const LogicalType &type) { auto &child_type = ListType::GetChildType(type); return TypeIsSupported(child_type); } + if (id == LogicalTypeId::ARRAY) { + auto &child_type = ArrayType::GetChildType(type); + return TypeIsSupported(child_type); + } if (id == LogicalTypeId::UNION) { auto count = UnionType::GetMemberCount(type); for (idx_t i = 0; i < count; i++) { diff --git a/src/catalog/catalog_entry/table_catalog_entry.cpp b/src/catalog/catalog_entry/table_catalog_entry.cpp index e328fc9a4abf..c4332497321c 100644 --- a/src/catalog/catalog_entry/table_catalog_entry.cpp +++ b/src/catalog/catalog_entry/table_catalog_entry.cpp @@ -217,6 +217,7 @@ static void BindExtraColumns(TableCatalogEntry &table, LogicalGet &get, LogicalP static bool TypeSupportsRegularUpdate(const LogicalType &type) { switch (type.id()) { case LogicalTypeId::LIST: + case LogicalTypeId::ARRAY: case LogicalTypeId::MAP: case LogicalTypeId::UNION: // lists and maps and unions don't support updates directly diff --git a/src/common/arrow/appender/CMakeLists.txt b/src/common/arrow/appender/CMakeLists.txt index f7d91fa84e33..a342652e23a3 100644 --- a/src/common/arrow/appender/CMakeLists.txt +++ b/src/common/arrow/appender/CMakeLists.txt @@ -1,5 +1,5 @@ add_library_unity(duckdb_common_arrow_appender OBJECT bool_data.cpp - struct_data.cpp union_data.cpp) + struct_data.cpp union_data.cpp fixed_size_list_data.cpp) set(ALL_OBJECT_FILES ${ALL_OBJECT_FILES} $ PARENT_SCOPE) diff --git a/src/common/arrow/appender/fixed_size_list_data.cpp b/src/common/arrow/appender/fixed_size_list_data.cpp new file mode 100644 index 000000000000..d546e16db11c --- /dev/null +++ b/src/common/arrow/appender/fixed_size_list_data.cpp @@ -0,0 +1,39 @@ +#include "duckdb/common/arrow/arrow_appender.hpp" +#include "duckdb/common/arrow/appender/fixed_size_list_data.hpp" + +namespace duckdb { + +//===--------------------------------------------------------------------===// +// Arrays +//===--------------------------------------------------------------------===// +void ArrowFixedSizeListData::Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) { + auto &child_type = ArrayType::GetChildType(type); + auto array_size = ArrayType::GetSize(type); + auto child_buffer = ArrowAppender::InitializeChild(child_type, capacity * array_size, result.options); + result.child_data.push_back(std::move(child_buffer)); +} + +void ArrowFixedSizeListData::Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, + idx_t input_size) { + UnifiedVectorFormat format; + input.ToUnifiedFormat(input_size, format); + idx_t size = to - from; + AppendValidity(append_data, format, from, to); + + auto array_size = ArrayType::GetSize(input.GetType()); + auto &child_vector = ArrayVector::GetEntry(input); + auto &child_data = *append_data.child_data[0]; + child_data.append_vector(child_data, child_vector, from * array_size, to * array_size, size * array_size); + append_data.row_count += size; +} + +void ArrowFixedSizeListData::Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result) { + result->n_buffers = 1; + auto &child_type = ArrayType::GetChildType(type); + ArrowAppender::AddChildren(append_data, 1); + result->children = append_data.child_pointers.data(); + result->n_children = 1; + append_data.child_arrays[0] = *ArrowAppender::FinalizeChild(child_type, std::move(append_data.child_data[0])); +} + +} // namespace duckdb diff --git a/src/common/arrow/arrow_appender.cpp b/src/common/arrow/arrow_appender.cpp index faad7d2693f6..6dc0c14b3d24 100644 --- a/src/common/arrow/arrow_appender.cpp +++ b/src/common/arrow/arrow_appender.cpp @@ -227,6 +227,9 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic case LogicalTypeId::STRUCT: InitializeAppenderForType(append_data); break; + case LogicalTypeId::ARRAY: + InitializeAppenderForType(append_data); + break; case LogicalTypeId::LIST: { if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) { InitializeAppenderForType>(append_data); diff --git a/src/common/arrow/arrow_converter.cpp b/src/common/arrow/arrow_converter.cpp index d57bcc471bcb..72249ba20a65 100644 --- a/src/common/arrow/arrow_converter.cpp +++ b/src/common/arrow/arrow_converter.cpp @@ -226,6 +226,23 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co } break; } + case LogicalTypeId::ARRAY: { + auto array_size = ArrayType::GetSize(type); + auto child_type = ArrayType::GetChildType(type); + auto format = "+w:" + to_string(array_size); + root_holder.owned_type_names.push_back(AddName(format)); + child.format = root_holder.owned_type_names.back().get(); + + child.n_children = 1; + root_holder.nested_children.emplace_back(); + root_holder.nested_children.back().resize(1); + root_holder.nested_children_ptr.emplace_back(); + root_holder.nested_children_ptr.back().push_back(&root_holder.nested_children.back()[0]); + InitializeChild(root_holder.nested_children.back()[0], root_holder); + child.children = &root_holder.nested_children_ptr.back()[0]; + SetArrowFormat(root_holder, **child.children, child_type, options); + break; + } case LogicalTypeId::MAP: { SetArrowMapFormat(root_holder, child, type, options); break; diff --git a/src/common/types/vector.cpp b/src/common/types/vector.cpp index 512a2535ba31..fb7d21c37f20 100644 --- a/src/common/types/vector.cpp +++ b/src/common/types/vector.cpp @@ -889,14 +889,14 @@ void Vector::Flatten(idx_t count) { break; } case PhysicalType::ARRAY: { - auto &child = ArrayVector::GetEntry(*this); + auto &original_child = ArrayVector::GetEntry(*this); auto array_size = ArrayType::GetSize(GetType()); auto flattened_buffer = make_uniq(GetType(), count); auto &new_child = flattened_buffer->GetChild(); // Make sure to initialize a validity mask for the new child vector with the correct size - if (!child.validity.AllValid()) { + if (!original_child.validity.AllValid()) { new_child.validity.Initialize(array_size * count); } @@ -909,7 +909,8 @@ void Vector::Flatten(idx_t count) { // | 2 | // ... - child.Flatten(count * array_size); + auto child_vec = make_uniq(original_child); + child_vec->Flatten(count * array_size); // Create a selection vector SelectionVector sel(count * array_size); @@ -917,7 +918,7 @@ void Vector::Flatten(idx_t count) { for (idx_t elem_idx = 0; elem_idx < array_size; elem_idx++) { auto position = array_idx * array_size + elem_idx; // Broadcast the validity - if (FlatVector::IsNull(child, elem_idx)) { + if (FlatVector::IsNull(*child_vec, elem_idx)) { FlatVector::SetNull(new_child, position, true); } sel.set_index(position, elem_idx); @@ -925,7 +926,7 @@ void Vector::Flatten(idx_t count) { } // Copy over the data to the new buffer - VectorOperations::Copy(child, new_child, sel, count * array_size, 0, 0); + VectorOperations::Copy(*child_vec, new_child, sel, count * array_size, 0, 0); auxiliary = shared_ptr(flattened_buffer.release()); } break; @@ -1067,6 +1068,7 @@ void Vector::Sequence(int64_t start, int64_t increment, idx_t count) { auxiliary.reset(); } +// FIXME: This should ideally be const void Vector::Serialize(Serializer &serializer, idx_t count) { auto &logical_type = GetType(); diff --git a/src/common/vector_operations/vector_copy.cpp b/src/common/vector_operations/vector_copy.cpp index 7921743aecda..3823cd39e302 100644 --- a/src/common/vector_operations/vector_copy.cpp +++ b/src/common/vector_operations/vector_copy.cpp @@ -203,11 +203,11 @@ void VectorOperations::Copy(const Vector &source_p, Vector &target, const Select auto array_size = ArrayType::GetSize(source->GetType()); // Create a selection vector for the child elements - SelectionVector child_sel(copy_count * array_size); + SelectionVector child_sel(source_count * array_size); for (idx_t i = 0; i < copy_count; i++) { auto source_idx = sel->get_index(source_offset + i); for (idx_t j = 0; j < array_size; j++) { - child_sel.set_index(i * array_size + j, source_idx * array_size + j); + child_sel.set_index((source_offset * array_size) + (i * array_size + j), source_idx * array_size + j); } } VectorOperations::Copy(source_child, target_child, child_sel, source_count * array_size, diff --git a/src/execution/expression_executor/execute_function.cpp b/src/execution/expression_executor/execute_function.cpp index 58207ed438b6..90442ce130a1 100644 --- a/src/execution/expression_executor/execute_function.cpp +++ b/src/execution/expression_executor/execute_function.cpp @@ -70,9 +70,9 @@ void ExpressionExecutor::Execute(const BoundFunctionExpression &expr, Expression } #endif } - arguments.Verify(); } arguments.SetCardinality(count); + arguments.Verify(); state->profiler.BeginSample(); D_ASSERT(expr.function.function); diff --git a/src/function/table/arrow.cpp b/src/function/table/arrow.cpp index 203ad8bfc317..e03865285c45 100644 --- a/src/function/table/arrow.cpp +++ b/src/function/table/arrow.cpp @@ -119,7 +119,7 @@ static unique_ptr GetArrowLogicalTypeNoDictionary(ArrowSchema &schema std::string parameters = format.substr(format.find(':') + 1); idx_t fixed_size = std::stoi(parameters); auto child_type = ArrowTableFunction::GetArrowLogicalType(*schema.children[0]); - auto list_type = make_uniq(LogicalType::LIST(child_type->GetDuckType()), fixed_size); + auto list_type = make_uniq(LogicalType::ARRAY(child_type->GetDuckType(), fixed_size), fixed_size); list_type->AddChild(std::move(child_type)); return list_type; } else if (format == "+s") { diff --git a/src/function/table/arrow_conversion.cpp b/src/function/table/arrow_conversion.cpp index d3289f475574..717951145135 100644 --- a/src/function/table/arrow_conversion.cpp +++ b/src/function/table/arrow_conversion.cpp @@ -129,20 +129,7 @@ static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowArrayScanS SetValidityMask(vector, array, scan_state, size, parent_offset, nested_offset); idx_t start_offset = 0; idx_t cur_offset = 0; - if (size_type == ArrowVariableSizeType::FIXED_SIZE) { - auto fixed_size = arrow_type.FixedSize(); - //! Have to check validity mask before setting this up - idx_t offset = GetEffectiveOffset(array, parent_offset, scan_state, nested_offset) * fixed_size; - start_offset = offset; - auto list_data = FlatVector::GetData(vector); - for (idx_t i = 0; i < size; i++) { - auto &le = list_data[i]; - le.offset = cur_offset; - le.length = fixed_size; - cur_offset += fixed_size; - } - list_size = start_offset + cur_offset; - } else if (size_type == ArrowVariableSizeType::NORMAL) { + if (size_type == ArrowVariableSizeType::NORMAL) { auto offsets = ArrowBufferData(array, 1) + GetEffectiveOffset(array, parent_offset, scan_state, nested_offset); start_offset = offsets[0]; @@ -209,6 +196,61 @@ static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowArrayScanS } } +static void ArrowToDuckDBArray(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size, + const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask, + int64_t parent_offset) { + + D_ASSERT(arrow_type.GetSizeType() == ArrowVariableSizeType::FIXED_SIZE); + auto &scan_state = array_state.state; + auto array_size = arrow_type.FixedSize(); + auto child_count = array_size * size; + auto child_offset = GetEffectiveOffset(array, parent_offset, scan_state, nested_offset) * array_size; + + SetValidityMask(vector, array, scan_state, size, parent_offset, nested_offset); + + auto &child_vector = ArrayVector::GetEntry(vector); + SetValidityMask(child_vector, *array.children[0], scan_state, child_count, array.offset, child_offset); + + auto &array_mask = FlatVector::Validity(vector); + if (parent_mask) { + //! Since this List is owned by a struct we must guarantee their validity map matches on Null + if (!parent_mask->AllValid()) { + for (idx_t i = 0; i < size; i++) { + if (!parent_mask->RowIsValid(i)) { + array_mask.SetInvalid(i); + } + } + } + } + + // Broadcast the validity mask to the child vector + if (!array_mask.AllValid()) { + auto &child_validity_mask = FlatVector::Validity(child_vector); + for (idx_t i = 0; i < size; i++) { + if (!array_mask.RowIsValid(i)) { + for (idx_t j = 0; j < array_size; j++) { + child_validity_mask.SetInvalid(i * array_size + j); + } + } + } + } + + auto &child_state = array_state.GetChild(0); + auto &child_array = *array.children[0]; + auto &child_type = arrow_type[0]; + if (child_count == 0 && child_offset == 0) { + D_ASSERT(!child_array.dictionary); + ColumnArrowToDuckDB(child_vector, child_array, child_state, child_count, child_type, -1); + } else { + if (child_array.dictionary) { + ColumnArrowToDuckDBDictionary(child_vector, child_array, child_state, child_count, child_type, + child_offset); + } else { + ColumnArrowToDuckDB(child_vector, child_array, child_state, child_count, child_type, child_offset); + } + } +} + static void ArrowToDuckDBBlob(Vector &vector, ArrowArray &array, const ArrowScanLocalState &scan_state, idx_t size, const ArrowType &arrow_type, int64_t nested_offset, int64_t parent_offset) { auto size_type = arrow_type.GetSizeType(); @@ -833,6 +875,10 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca ArrowToDuckDBList(vector, array, array_state, size, arrow_type, nested_offset, parent_mask, parent_offset); break; } + case LogicalTypeId::ARRAY: { + ArrowToDuckDBArray(vector, array, array_state, size, arrow_type, nested_offset, parent_mask, parent_offset); + break; + } case LogicalTypeId::MAP: { ArrowToDuckDBList(vector, array, array_state, size, arrow_type, nested_offset, parent_mask, parent_offset); ArrowToDuckDBMapVerify(vector, size); diff --git a/src/function/table/system/test_all_types.cpp b/src/function/table/system/test_all_types.cpp index 53e41672a1c6..94450345fd90 100644 --- a/src/function/table/system/test_all_types.cpp +++ b/src/function/table/system/test_all_types.cpp @@ -212,6 +212,70 @@ vector TestAllTypesFun::GetTestTypes(bool use_large_enum) { const Value &max = Value::UNION(members, 1, Value::SMALLINT(5)); result.emplace_back(union_type, "union", min, max); + // fixed int array + auto fixed_int_array_type = LogicalType::ARRAY(LogicalType::INTEGER, 3); + auto fixed_int_min_array_value = Value::ARRAY({Value(LogicalType::INTEGER), 2, 3}); + auto fixed_int_max_array_value = Value::ARRAY({4, 5, 6}); + result.emplace_back(fixed_int_array_type, "fixed_int_array", fixed_int_min_array_value, fixed_int_max_array_value); + + // fixed varchar array + auto fixed_varchar_array_type = LogicalType::ARRAY(LogicalType::VARCHAR, 3); + auto fixed_varchar_min_array_value = Value::ARRAY({Value("a"), Value(LogicalType::VARCHAR), Value("c")}); + auto fixed_varchar_max_array_value = Value::ARRAY({Value("d"), Value("e"), Value("f")}); + result.emplace_back(fixed_varchar_array_type, "fixed_varchar_array", fixed_varchar_min_array_value, + fixed_varchar_max_array_value); + + // fixed nested int array + auto fixed_nested_int_array_type = LogicalType::ARRAY(fixed_int_array_type, 3); + auto fixed_nested_int_min_array_value = + Value::ARRAY({fixed_int_min_array_value, Value(fixed_int_array_type), fixed_int_min_array_value}); + auto fixed_nested_int_max_array_value = + Value::ARRAY({fixed_int_max_array_value, fixed_int_min_array_value, fixed_int_max_array_value}); + result.emplace_back(fixed_nested_int_array_type, "fixed_nested_int_array", fixed_nested_int_min_array_value, + fixed_nested_int_max_array_value); + + // fixed nested varchar array + auto fixed_nested_varchar_array_type = LogicalType::ARRAY(fixed_varchar_array_type, 3); + auto fixed_nested_varchar_min_array_value = + Value::ARRAY({fixed_varchar_min_array_value, Value(fixed_varchar_array_type), fixed_varchar_min_array_value}); + auto fixed_nested_varchar_max_array_value = + Value::ARRAY({fixed_varchar_max_array_value, fixed_varchar_min_array_value, fixed_varchar_max_array_value}); + result.emplace_back(fixed_nested_varchar_array_type, "fixed_nested_varchar_array", + fixed_nested_varchar_min_array_value, fixed_nested_varchar_max_array_value); + + // fixed array of structs + auto fixed_struct_array_type = LogicalType::ARRAY(struct_type, 3); + auto fixed_struct_min_array_value = Value::ARRAY({min_struct_val, max_struct_val, min_struct_val}); + auto fixed_struct_max_array_value = Value::ARRAY({max_struct_val, min_struct_val, max_struct_val}); + result.emplace_back(fixed_struct_array_type, "fixed_struct_array", fixed_struct_min_array_value, + fixed_struct_max_array_value); + + // struct of fixed array + auto struct_of_fixed_array_type = + LogicalType::STRUCT({{"a", fixed_int_array_type}, {"b", fixed_varchar_array_type}}); + auto struct_of_fixed_array_min_value = + Value::STRUCT({{"a", fixed_int_min_array_value}, {"b", fixed_varchar_min_array_value}}); + auto struct_of_fixed_array_max_value = + Value::STRUCT({{"a", fixed_int_max_array_value}, {"b", fixed_varchar_max_array_value}}); + result.emplace_back(struct_of_fixed_array_type, "struct_of_fixed_array", struct_of_fixed_array_min_value, + struct_of_fixed_array_max_value); + + // fixed array of list of int + auto fixed_array_of_list_of_int_type = LogicalType::ARRAY(LogicalType::LIST(LogicalType::INTEGER), 3); + auto fixed_array_of_list_of_int_min_value = Value::ARRAY({empty_int_list, int_list, empty_int_list}); + auto fixed_array_of_list_of_int_max_value = Value::ARRAY({int_list, empty_int_list, int_list}); + result.emplace_back(fixed_array_of_list_of_int_type, "fixed_array_of_int_list", + fixed_array_of_list_of_int_min_value, fixed_array_of_list_of_int_max_value); + + // list of fixed array of int + auto list_of_fixed_array_of_int_type = LogicalType::LIST(fixed_int_array_type); + auto list_of_fixed_array_of_int_min_value = + Value::LIST({fixed_int_min_array_value, fixed_int_max_array_value, fixed_int_min_array_value}); + auto list_of_fixed_array_of_int_max_value = + Value::LIST({fixed_int_max_array_value, fixed_int_min_array_value, fixed_int_max_array_value}); + result.emplace_back(list_of_fixed_array_of_int_type, "list_of_fixed_int_array", + list_of_fixed_array_of_int_min_value, list_of_fixed_array_of_int_max_value); + return result; } diff --git a/src/include/duckdb.h b/src/include/duckdb.h index 802221963e09..6398f63a173a 100644 --- a/src/include/duckdb.h +++ b/src/include/duckdb.h @@ -123,6 +123,8 @@ typedef enum DUCKDB_TYPE { DUCKDB_TYPE_STRUCT = 25, // map type, only useful as logical type DUCKDB_TYPE_MAP = 26, + // duckdb_array, only useful as logical type + DUCKDB_TYPE_ARRAY = 33, // duckdb_hugeint DUCKDB_TYPE_UUID = 27, // union type, only useful as logical type @@ -1626,6 +1628,16 @@ Creates a list value from a type and an array of values of length `value_count` */ DUCKDB_API duckdb_value duckdb_create_list_value(duckdb_logical_type type, duckdb_value *values, idx_t value_count); +/*! +Creates a array value from a type and an array of values of length `value_count` + +* type: The type of the array +* values: The values for the array +* value_count: The number of values in the array +* returns: The value. This must be destroyed with `duckdb_destroy_value`. +*/ +DUCKDB_API duckdb_value duckdb_create_array_value(duckdb_logical_type type, duckdb_value *values, idx_t value_count); + /*! Obtains a string representation of the given value. The result must be destroyed with `duckdb_free`. @@ -1676,6 +1688,16 @@ The resulting type should be destroyed with `duckdb_destroy_logical_type`. */ DUCKDB_API duckdb_logical_type duckdb_create_list_type(duckdb_logical_type type); +/*! +Creates a array type from its child type. +The resulting type should be destroyed with `duckdb_destroy_logical_type`. + +* type: The child type of array type to create. +* array_size: The number of elements in the array. +* returns: The logical type. +*/ +DUCKDB_API duckdb_logical_type duckdb_create_array_type(duckdb_logical_type type, idx_t array_size); + /*! Creates a map type from its key type and value type. The resulting type should be destroyed with `duckdb_destroy_logical_type`. @@ -1798,6 +1820,24 @@ The result must be freed with `duckdb_destroy_logical_type`. */ DUCKDB_API duckdb_logical_type duckdb_list_type_child_type(duckdb_logical_type type); +/*! +Retrieves the child type of the given array type. + +The result must be freed with `duckdb_destroy_logical_type`. + +* type: The logical type object +* returns: The child type of the array type. Must be destroyed with `duckdb_destroy_logical_type`. +*/ +DUCKDB_API duckdb_logical_type duckdb_array_type_child_type(duckdb_logical_type type); + +/*! +Retrieves the array size of the given array type. + +* type: The logical type object +* returns: The fixed number of elements the values of this array type can store. +*/ +DUCKDB_API idx_t duckdb_array_type_array_size(duckdb_logical_type type); + /*! Retrieves the key type of the given map type. @@ -2073,6 +2113,17 @@ The resulting vector is valid as long as the parent vector is valid. */ DUCKDB_API duckdb_vector duckdb_struct_vector_get_child(duckdb_vector vector, idx_t index); +/*! +Retrieves the child vector of a array vector. + +The resulting vector is valid as long as the parent vector is valid. +The resulting vector has the size of the parent vector multiplied by the array size. + +* vector: The vector +* returns: The child vector +*/ +DUCKDB_API duckdb_vector duckdb_array_vector_get_child(duckdb_vector vector); + //===--------------------------------------------------------------------===// // Validity Mask Functions //===--------------------------------------------------------------------===// diff --git a/src/include/duckdb/common/arrow/appender/fixed_size_list_data.hpp b/src/include/duckdb/common/arrow/appender/fixed_size_list_data.hpp new file mode 100644 index 000000000000..4ad609441393 --- /dev/null +++ b/src/include/duckdb/common/arrow/appender/fixed_size_list_data.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include "duckdb/common/arrow/appender/append_data.hpp" + +namespace duckdb { + +struct ArrowFixedSizeListData { +public: + static void Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity); + static void Append(ArrowAppendData &append_data, Vector &input, idx_t from, idx_t to, idx_t input_size); + static void Finalize(ArrowAppendData &append_data, const LogicalType &type, ArrowArray *result); +}; + +} // namespace duckdb diff --git a/src/include/duckdb/common/arrow/appender/list.hpp b/src/include/duckdb/common/arrow/appender/list.hpp index 48b94def2465..54cc34dd4dab 100644 --- a/src/include/duckdb/common/arrow/appender/list.hpp +++ b/src/include/duckdb/common/arrow/appender/list.hpp @@ -1,5 +1,6 @@ #include "duckdb/common/arrow/appender/bool_data.hpp" #include "duckdb/common/arrow/appender/enum_data.hpp" +#include "duckdb/common/arrow/appender/fixed_size_list_data.hpp" #include "duckdb/common/arrow/appender/list_data.hpp" #include "duckdb/common/arrow/appender/map_data.hpp" #include "duckdb/common/arrow/appender/scalar_data.hpp" diff --git a/src/include/duckdb/function/scalar/list/contains_or_position.hpp b/src/include/duckdb/function/scalar/list/contains_or_position.hpp index c05e98a53f3a..7941da81c332 100644 --- a/src/include/duckdb/function/scalar/list/contains_or_position.hpp +++ b/src/include/duckdb/function/scalar/list/contains_or_position.hpp @@ -130,6 +130,7 @@ void ListContainsOrPosition(DataChunk &args, Vector &result) { break; case PhysicalType::STRUCT: case PhysicalType::LIST: + case PhysicalType::ARRAY: TemplatedContainsOrPosition(args, result, true); break; default: diff --git a/src/main/capi/data_chunk-c.cpp b/src/main/capi/data_chunk-c.cpp index fb744197a56f..255b343ebd70 100644 --- a/src/main/capi/data_chunk-c.cpp +++ b/src/main/capi/data_chunk-c.cpp @@ -155,6 +155,14 @@ duckdb_vector duckdb_struct_vector_get_child(duckdb_vector vector, idx_t index) return reinterpret_cast(duckdb::StructVector::GetEntries(*v)[index].get()); } +duckdb_vector duckdb_array_vector_get_child(duckdb_vector vector) { + if (!vector) { + return nullptr; + } + auto v = reinterpret_cast(vector); + return reinterpret_cast(&duckdb::ArrayVector::GetEntry(*v)); +} + bool duckdb_validity_row_is_valid(uint64_t *validity, idx_t row) { if (!validity) { return true; diff --git a/src/main/capi/duckdb_value-c.cpp b/src/main/capi/duckdb_value-c.cpp index c66184b897ad..3ef5a92c65fb 100644 --- a/src/main/capi/duckdb_value-c.cpp +++ b/src/main/capi/duckdb_value-c.cpp @@ -103,3 +103,30 @@ duckdb_value duckdb_create_list_value(duckdb_logical_type type, duckdb_value *va } return WrapValue(list_value); } + +duckdb_value duckdb_create_array_value(duckdb_logical_type type, duckdb_value *values, idx_t value_count) { + if (!type || !values) { + return nullptr; + } + if (value_count >= duckdb::ArrayType::MAX_ARRAY_SIZE) { + return nullptr; + } + auto <ype = UnwrapType(type); + duckdb::vector unwrapped_values; + + for (idx_t i = 0; i < value_count; i++) { + auto value = values[i]; + if (!value) { + return nullptr; + } + unwrapped_values.push_back(UnwrapValue(value)); + } + duckdb::Value *array_value = new duckdb::Value; + try { + *array_value = duckdb::Value::ARRAY(ltype, unwrapped_values); + } catch (...) { + delete array_value; + return nullptr; + } + return WrapValue(array_value); +} diff --git a/src/main/capi/helper-c.cpp b/src/main/capi/helper-c.cpp index b9caf94df38c..39cacbf2628f 100644 --- a/src/main/capi/helper-c.cpp +++ b/src/main/capi/helper-c.cpp @@ -126,6 +126,8 @@ duckdb_type ConvertCPPTypeToC(const LogicalType &sql_type) { return DUCKDB_TYPE_UNION; case LogicalTypeId::UUID: return DUCKDB_TYPE_UUID; + case LogicalTypeId::ARRAY: + return DUCKDB_TYPE_ARRAY; default: // LCOV_EXCL_START D_ASSERT(0); return DUCKDB_TYPE_INVALID; diff --git a/src/main/capi/logical_types-c.cpp b/src/main/capi/logical_types-c.cpp index 6a50fba7abed..bd349b55f9d3 100644 --- a/src/main/capi/logical_types-c.cpp +++ b/src/main/capi/logical_types-c.cpp @@ -35,6 +35,18 @@ duckdb_logical_type duckdb_create_list_type(duckdb_logical_type type) { return reinterpret_cast(ltype); } +duckdb_logical_type duckdb_create_array_type(duckdb_logical_type type, idx_t array_size) { + if (!type) { + return nullptr; + } + if (array_size >= duckdb::ArrayType::MAX_ARRAY_SIZE) { + return nullptr; + } + duckdb::LogicalType *ltype = new duckdb::LogicalType; + *ltype = duckdb::LogicalType::ARRAY(*reinterpret_cast(type), array_size); + return reinterpret_cast(ltype); +} + duckdb_logical_type duckdb_create_union_type(duckdb_logical_type *member_types_p, const char **member_names, idx_t member_count) { if (!member_types_p || !member_names) { @@ -204,6 +216,28 @@ duckdb_logical_type duckdb_list_type_child_type(duckdb_logical_type type) { return reinterpret_cast(new duckdb::LogicalType(duckdb::ListType::GetChildType(ltype))); } +duckdb_logical_type duckdb_array_type_child_type(duckdb_logical_type type) { + if (!AssertLogicalTypeId(type, duckdb::LogicalTypeId::ARRAY)) { + return nullptr; + } + auto <ype = *(reinterpret_cast(type)); + if (ltype.id() != duckdb::LogicalTypeId::ARRAY) { + return nullptr; + } + return reinterpret_cast(new duckdb::LogicalType(duckdb::ArrayType::GetChildType(ltype))); +} + +idx_t duckdb_array_type_array_size(duckdb_logical_type type) { + if (!AssertLogicalTypeId(type, duckdb::LogicalTypeId::ARRAY)) { + return 0; + } + auto <ype = *(reinterpret_cast(type)); + if (ltype.id() != duckdb::LogicalTypeId::ARRAY) { + return 0; + } + return duckdb::ArrayType::GetSize(ltype); +} + duckdb_logical_type duckdb_map_type_key_type(duckdb_logical_type type) { if (!AssertLogicalTypeId(type, duckdb::LogicalTypeId::MAP)) { return nullptr; diff --git a/src/planner/binder/statement/bind_export.cpp b/src/planner/binder/statement/bind_export.cpp index 380145eb53ac..1a0fa9489611 100644 --- a/src/planner/binder/statement/bind_export.cpp +++ b/src/planner/binder/statement/bind_export.cpp @@ -128,6 +128,11 @@ static LogicalType AlterLogicalType(const LogicalType &original, copy_supports_t auto child = AlterLogicalType(ListType::GetChildType(original), type_check); return LogicalType::LIST(child); } + case LogicalTypeId::ARRAY: { + // Attempt to convert the array to a list + auto child = ArrayType::GetChildType(original); + return AlterLogicalType(LogicalType::LIST(child), type_check); + } case LogicalTypeId::STRUCT: { auto &original_children = StructType::GetChildTypes(original); child_list_t new_children; diff --git a/test/api/capi/test_capi_complex_types.cpp b/test/api/capi/test_capi_complex_types.cpp index d5334dd881d7..ff84948beb93 100644 --- a/test/api/capi/test_capi_complex_types.cpp +++ b/test/api/capi/test_capi_complex_types.cpp @@ -465,3 +465,41 @@ TEST_CASE("Test Infinite Dates", "[capi]") { REQUIRE(ts.micros > 0); } } + +TEST_CASE("Array type construction") { + CAPITester tester; + REQUIRE(tester.OpenDatabase(nullptr)); + + auto child_type = duckdb_create_logical_type(DUCKDB_TYPE_INTEGER); + auto array_type = duckdb_create_array_type(child_type, 3); + + REQUIRE(duckdb_array_type_array_size(array_type) == 3); + + auto get_child_type = duckdb_array_type_child_type(array_type); + REQUIRE(duckdb_get_type_id(get_child_type) == DUCKDB_TYPE_INTEGER); + duckdb_destroy_logical_type(&get_child_type); + + duckdb_destroy_logical_type(&child_type); + duckdb_destroy_logical_type(&array_type); +} + +TEST_CASE("Array value construction") { + CAPITester tester; + REQUIRE(tester.OpenDatabase(nullptr)); + + auto child_type = duckdb_create_logical_type(DUCKDB_TYPE_INTEGER); + + duckdb::vector values; + values.push_back(duckdb_create_int64(42)); + values.push_back(duckdb_create_int64(43)); + values.push_back(duckdb_create_int64(44)); + + auto array_value = duckdb_create_array_value(child_type, values.data(), values.size()); + REQUIRE(array_value); + + duckdb_destroy_logical_type(&child_type); + for (auto &val : values) { + duckdb_destroy_value(&val); + } + duckdb_destroy_value(&array_value); +} diff --git a/test/api/capi/test_capi_data_chunk.cpp b/test/api/capi/test_capi_data_chunk.cpp index 7c518e83bfa6..9d5e82aa38b8 100644 --- a/test/api/capi/test_capi_data_chunk.cpp +++ b/test/api/capi/test_capi_data_chunk.cpp @@ -431,3 +431,30 @@ TEST_CASE("Test DataChunk populate ListVector in C API", "[capi]") { duckdb_destroy_logical_type(&list_type); duckdb_destroy_logical_type(&elem_type); } + +TEST_CASE("Test DataChunk populate ArrayVector in C API", "[capi]") { + + auto elem_type = duckdb_create_logical_type(duckdb_type::DUCKDB_TYPE_INTEGER); + auto array_type = duckdb_create_array_type(elem_type, 3); + duckdb_logical_type schema[] = {array_type}; + auto chunk = duckdb_create_data_chunk(schema, 1); + duckdb_data_chunk_set_size(chunk, 2); + auto array_vector = duckdb_data_chunk_get_vector(chunk, 0); + + auto child = duckdb_array_vector_get_child(array_vector); + for (int i = 0; i < 6; i++) { + ((int *)duckdb_vector_get_data(child))[i] = i; + } + + auto vec = (Vector &)(*array_vector); + for (int i = 0; i < 2; i++) { + auto child_vals = ArrayValue::GetChildren(vec.GetValue(i)); + for (int j = 0; j < 3; j++) { + REQUIRE(child_vals[j].GetValue() == i * 3 + j); + } + } + + duckdb_destroy_data_chunk(&chunk); + duckdb_destroy_logical_type(&array_type); + duckdb_destroy_logical_type(&elem_type); +} diff --git a/test/sql/storage_version/storage_version.db b/test/sql/storage_version/storage_version.db index 1d6077c5b886..c816c3c219ae 100644 Binary files a/test/sql/storage_version/storage_version.db and b/test/sql/storage_version/storage_version.db differ diff --git a/test/sql/types/test_all_types.test_slow b/test/sql/types/test_all_types.test_slow index 18fb121eefb0..7d6facc7a373 100644 --- a/test/sql/types/test_all_types.test_slow +++ b/test/sql/types/test_all_types.test_slow @@ -5,18 +5,18 @@ statement ok PRAGMA enable_verification -query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII +query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SELECT * FROM test_all_types(); ---- -False -128 -32768 -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 0 0 0 0 0 5877642-06-25 (BC) 00:00:00 290309-12-22 (BC) 00:00:00 290309-12-22 (BC) 00:00:00 290309-12-22 (BC) 00:00:00 1677-09-21 00:12:43.145225 00:00:00+15:59:59 290309-12-22 (BC) 00:00:00+00 -340282346638528859811704183484516925440.000000 -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 -999.9 -99999.9999 -999999999999.999999 -9999999999999999999999999999.9999999999 00000000-0000-0000-0000-000000000000 00:00:00 🦆🦆🦆🦆🦆🦆 thisisalongblob\x00withnullbytes 0010001001011100010101011010111 DUCK_DUCK_ENUM enum_0 enum_0 [] [] [] [] [] [] [] {'a': NULL, 'b': NULL} {'a': NULL, 'b': NULL} [] {} Frank -True 127 32767 2147483647 9223372036854775807 170141183460469231731687303715884105727 340282366920938463463374607431768211455 255 65535 4294967295 18446744073709551615 5881580-07-10 24:00:00 294247-01-10 04:00:54.775806 294247-01-10 04:00:54 294247-01-10 04:00:54.775 2262-04-11 23:47:16.854775 24:00:00-15:59:59 294247-01-10 04:00:54.775806+00 340282346638528859811704183484516925440.000000 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 999.9 99999.9999 999999999999.999999 9999999999999999999999999999.9999999999 ffffffff-ffff-ffff-ffff-ffffffffffff 83 years 3 months 999 days 00:16:39.999999 goo\0se \x00\x00\x00a 10101 GOOSE enum_299 enum_69999 [42, 999, NULL, NULL, -42] [42.0, nan, inf, -inf, NULL, -42.0] [1970-01-01, infinity, -infinity, NULL, 2022-05-12] [1970-01-01 00:00:00, infinity, -infinity, NULL, 2022-05-12 16:23:45] [1970-01-01 00:00:00+00, infinity, -infinity, NULL, 2022-05-12 23:23:45+00] [🦆🦆🦆🦆🦆🦆, goose, NULL, ] [[], [42, 999, NULL, NULL, -42], NULL, [], [42, 999, NULL, NULL, -42]] {'a': 42, 'b': 🦆🦆🦆🦆🦆🦆} {'a': [42, 999, NULL, NULL, -42], 'b': [🦆🦆🦆🦆🦆🦆, goose, NULL, ]} [{'a': NULL, 'b': NULL}, {'a': 42, 'b': 🦆🦆🦆🦆🦆🦆}, NULL] {key1=🦆🦆🦆🦆🦆🦆, key2=goose} 5 -NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +False -128 -32768 -2147483648 -9223372036854775808 -170141183460469231731687303715884105728 0 0 0 0 0 5877642-06-25 (BC) 00:00:00 290309-12-22 (BC) 00:00:00 290309-12-22 (BC) 00:00:00 290309-12-22 (BC) 00:00:00 1677-09-21 00:12:43.145225 00:00:00+15:59:59 290309-12-22 (BC) 00:00:00+00 -340282346638528859811704183484516925440.000000 -179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 -999.9 -99999.9999 -999999999999.999999 -9999999999999999999999999999.9999999999 00000000-0000-0000-0000-000000000000 00:00:00 🦆🦆🦆🦆🦆🦆 thisisalongblob\x00withnullbytes 0010001001011100010101011010111 DUCK_DUCK_ENUM enum_0 enum_0 [] [] [] [] [] [] [] {'a': NULL, 'b': NULL} {'a': NULL, 'b': NULL} [] {} Frank [NULL, 2, 3] [a, NULL, c] [[NULL, 2, 3], NULL, [NULL, 2, 3]] [[a, NULL, c], NULL, [a, NULL, c]] [{'a': NULL, 'b': NULL}, {'a': 42, 'b': 🦆🦆🦆🦆🦆🦆}, {'a': NULL, 'b': NULL}] {'a': [NULL, 2, 3], 'b': [a, NULL, c]} [[], [42, 999, NULL, NULL, -42], []] [[NULL, 2, 3], [4, 5, 6], [NULL, 2, 3]] +True 127 32767 2147483647 9223372036854775807 170141183460469231731687303715884105727 340282366920938463463374607431768211455 255 65535 4294967295 18446744073709551615 5881580-07-10 24:00:00 294247-01-10 04:00:54.775806 294247-01-10 04:00:54 294247-01-10 04:00:54.775 2262-04-11 23:47:16.854775 24:00:00-15:59:59 294247-01-10 04:00:54.775806+00 340282346638528859811704183484516925440.000000 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000 999.9 99999.9999 999999999999.999999 9999999999999999999999999999.9999999999 ffffffff-ffff-ffff-ffff-ffffffffffff 83 years 3 months 999 days 00:16:39.999999 goo\0se \x00\x00\x00a 10101 GOOSE enum_299 enum_69999 [42, 999, NULL, NULL, -42] [42.0, nan, inf, -inf, NULL, -42.0] [1970-01-01, infinity, -infinity, NULL, 2022-05-12] [1970-01-01 00:00:00, infinity, -infinity, NULL, 2022-05-12 16:23:45] [1970-01-01 00:00:00+00, infinity, -infinity, NULL, 2022-05-12 23:23:45+00] [🦆🦆🦆🦆🦆🦆, goose, NULL, ] [[], [42, 999, NULL, NULL, -42], NULL, [], [42, 999, NULL, NULL, -42]] {'a': 42, 'b': 🦆🦆🦆🦆🦆🦆} {'a': [42, 999, NULL, NULL, -42], 'b': [🦆🦆🦆🦆🦆🦆, goose, NULL, ]} [{'a': NULL, 'b': NULL}, {'a': 42, 'b': 🦆🦆🦆🦆🦆🦆}, NULL] {key1=🦆🦆🦆🦆🦆🦆, key2=goose} 5 [4, 5, 6] [d, e, f] [[4, 5, 6], [NULL, 2, 3], [4, 5, 6]] [[d, e, f], [a, NULL, c], [d, e, f]] [{'a': 42, 'b': 🦆🦆🦆🦆🦆🦆}, {'a': NULL, 'b': NULL}, {'a': 42, 'b': 🦆🦆🦆🦆🦆🦆}] {'a': [4, 5, 6], 'b': [d, e, f]} [[42, 999, NULL, NULL, -42], [], [42, 999, NULL, NULL, -42]] [[4, 5, 6], [NULL, 2, 3], [4, 5, 6]] +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL statement ok CREATE TABLE all_types AS SELECT * FROM test_all_types(); -query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII nosort r1 +query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII nosort r1 SELECT * FROM test_all_types(); -query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII nosort r1 +query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII nosort r1 SELECT * FROM all_types; diff --git a/tools/jdbc/src/test/java/org/duckdb/TestDuckDBJDBC.java b/tools/jdbc/src/test/java/org/duckdb/TestDuckDBJDBC.java index 583173369288..888e55cc34e7 100644 --- a/tools/jdbc/src/test/java/org/duckdb/TestDuckDBJDBC.java +++ b/tools/jdbc/src/test/java/org/duckdb/TestDuckDBJDBC.java @@ -3585,7 +3585,11 @@ public static void test_map() throws Exception { public static void test_getColumnClassName() throws Exception { try (Connection conn = DriverManager.getConnection(JDBC_URL); Statement s = conn.createStatement();) { - try (ResultSet rs = s.executeQuery("select * from test_all_types()")) { + try (ResultSet rs = s.executeQuery( + "select * exclude(fixed_int_array, fixed_varchar_array" + + + ", fixed_nested_int_array, fixed_nested_varchar_array, fixed_struct_array, struct_of_fixed_array, " + + "fixed_array_of_int_list, list_of_fixed_int_array) from test_all_types()")) { ResultSetMetaData rsmd = rs.getMetaData(); rs.next(); for (int i = 1; i <= rsmd.getColumnCount(); i++) { @@ -3773,7 +3777,10 @@ static Map mapOf(Object... pairs) { public static void test_all_types() throws Exception { Logger logger = Logger.getAnonymousLogger(); String sql = - "select * EXCLUDE(time, time_tz)" + "select * EXCLUDE(time, time_tz, " + + + "fixed_int_array, fixed_varchar_array, fixed_nested_int_array, fixed_nested_varchar_array, fixed_struct_array," + + "struct_of_fixed_array, fixed_array_of_int_list, list_of_fixed_int_array)" + "\n , CASE WHEN time = '24:00:00'::TIME THEN '23:59:59.999999'::TIME ELSE time END AS time" + "\n , CASE WHEN time_tz = '24:00:00-15:59:59'::TIMETZ THEN '23:59:59.999999-15:59:59'::TIMETZ ELSE time_tz END AS time_tz" diff --git a/tools/juliapkg/test/test_all_types.jl b/tools/juliapkg/test/test_all_types.jl index ece579fd151b..c2e0614ff8e9 100644 --- a/tools/juliapkg/test/test_all_types.jl +++ b/tools/juliapkg/test/test_all_types.jl @@ -9,7 +9,9 @@ df = DataFrame( DBInterface.execute( con, - """SELECT * EXCLUDE(time, time_tz) + """SELECT * EXCLUDE(time, time_tz, fixed_int_array, fixed_varchar_array, fixed_nested_int_array, + fixed_nested_varchar_array, fixed_struct_array, struct_of_fixed_array, fixed_array_of_int_list, + list_of_fixed_int_array) , CASE WHEN time = '24:00:00'::TIME THEN '23:59:59.999999'::TIME ELSE time END AS time , CASE WHEN time_tz = '24:00:00-15:59:59'::TIMETZ THEN '23:59:59.999999-15:59:59'::TIMETZ ELSE time_tz END AS time_tz FROM test_all_types() diff --git a/tools/pythonpkg/duckdb-stubs/__init__.pyi b/tools/pythonpkg/duckdb-stubs/__init__.pyi index 39c997782748..891a57717b08 100644 --- a/tools/pythonpkg/duckdb-stubs/__init__.pyi +++ b/tools/pythonpkg/duckdb-stubs/__init__.pyi @@ -319,8 +319,8 @@ class DuckDBPyConnection: def string_type(self, collation: str = "") -> DuckDBPyType: ... def enum_type(self, name: str, type: DuckDBPyType, values: List[Any]) -> DuckDBPyType: ... def decimal_type(self, width: int, scale: int) -> DuckDBPyType: ... - def array_type(self, type: DuckDBPyType) -> DuckDBPyType: ... def list_type(self, type: DuckDBPyType) -> DuckDBPyType: ... + def array_type(self, type: DuckDBPyType, size: int) -> DuckDBPyType: ... def map_type(self, key: DuckDBPyType, value: DuckDBPyType) -> DuckDBPyType: ... def __enter__(self) -> DuckDBPyConnection: ... def __exit__(self, exc_type: object, exc: object, traceback: object) -> None: ... @@ -712,6 +712,6 @@ def union_type(members: Union[Dict[str, DuckDBPyType], List[str]], connection: D def string_type(collation: str = "", connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... def enum_type(name: str, type: DuckDBPyType, values: List[Any], connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... def decimal_type(width: int, scale: int, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... -def array_type(type: DuckDBPyType, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... +def array_type(type: DuckDBPyType, size: int, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... def list_type(type: DuckDBPyType, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... def map_type(key: DuckDBPyType, value: DuckDBPyType, connection: DuckDBPyConnection = ...) -> DuckDBPyType: ... diff --git a/tools/pythonpkg/duckdb_python.cpp b/tools/pythonpkg/duckdb_python.cpp index c420274af31a..5ee2f7addb4d 100644 --- a/tools/pythonpkg/duckdb_python.cpp +++ b/tools/pythonpkg/duckdb_python.cpp @@ -96,9 +96,10 @@ static void InitializeConnectionMethods(py::module_ &m) { py::arg("type"), py::arg("values"), py::arg("connection") = py::none()) .def("decimal_type", &PyConnectionWrapper::DecimalType, "Create a decimal type with 'width' and 'scale'", py::arg("width"), py::arg("scale"), py::arg("connection") = py::none()); - DefineMethod({"array_type", "list_type"}, m, &PyConnectionWrapper::ArrayType, - "Create an array type object of 'type'", py::arg("type").none(false), - py::arg("connection") = py::none()); + m.def("array_type", &PyConnectionWrapper::ArrayType, "Create an array type object of 'type'", + py::arg("type").none(false), py::arg("size").none(false), py::arg("connection") = py::none()); + m.def("list_type", &PyConnectionWrapper::ListType, "Create a list type object of 'type'", + py::arg("type").none(false), py::arg("connection") = py::none()); m.def("map_type", &PyConnectionWrapper::MapType, "Create a map type object from 'key_type' and 'value_type'", py::arg("key").none(false), py::arg("value").none(false), py::arg("connection") = py::none()) .def("execute", &PyConnectionWrapper::Execute, diff --git a/tools/pythonpkg/src/include/duckdb_python/connection_wrapper.hpp b/tools/pythonpkg/src/include/duckdb_python/connection_wrapper.hpp index 34c6a93b2e2a..a6246f2f5f76 100644 --- a/tools/pythonpkg/src/include/duckdb_python/connection_wrapper.hpp +++ b/tools/pythonpkg/src/include/duckdb_python/connection_wrapper.hpp @@ -40,8 +40,10 @@ class PyConnectionWrapper { static shared_ptr UnregisterUDF(const string &name, shared_ptr conn = nullptr); - static shared_ptr ArrayType(const shared_ptr &type, + static shared_ptr ArrayType(const shared_ptr &type, idx_t size, shared_ptr conn = nullptr); + static shared_ptr ListType(const shared_ptr &type, + shared_ptr conn = nullptr); static shared_ptr MapType(const shared_ptr &key, const shared_ptr &value, shared_ptr conn = nullptr); static shared_ptr StructType(const py::object &fields, diff --git a/tools/pythonpkg/src/include/duckdb_python/pyconnection/pyconnection.hpp b/tools/pythonpkg/src/include/duckdb_python/pyconnection/pyconnection.hpp index b862bc780900..23aabdd56bb6 100644 --- a/tools/pythonpkg/src/include/duckdb_python/pyconnection/pyconnection.hpp +++ b/tools/pythonpkg/src/include/duckdb_python/pyconnection/pyconnection.hpp @@ -90,7 +90,8 @@ struct DuckDBPyConnection : public std::enable_shared_from_this MapType(const shared_ptr &key_type, const shared_ptr &value_type); shared_ptr StructType(const py::object &fields); - shared_ptr ArrayType(const shared_ptr &type); + shared_ptr ListType(const shared_ptr &type); + shared_ptr ArrayType(const shared_ptr &type, idx_t size); shared_ptr UnionType(const py::object &members); shared_ptr EnumType(const string &name, const shared_ptr &type, const py::list &values_p); diff --git a/tools/pythonpkg/src/native/python_conversion.cpp b/tools/pythonpkg/src/native/python_conversion.cpp index cfee943ab438..77993b88b41b 100644 --- a/tools/pythonpkg/src/native/python_conversion.cpp +++ b/tools/pythonpkg/src/native/python_conversion.cpp @@ -210,6 +210,29 @@ Value TransformListValue(py::handle ele, const LogicalType &target_type = Logica return Value::LIST(element_type, values); } +Value TransformArrayValue(py::handle ele, const LogicalType &target_type = LogicalType::UNKNOWN) { + auto size = py::len(ele); + + if (size == 0) { + return Value::EMPTYARRAY(LogicalType::SQLNULL, size); + } + + vector values; + values.reserve(size); + + bool array_target = target_type.id() == LogicalTypeId::ARRAY; + auto &child_type = array_target ? ArrayType::GetChildType(target_type) : LogicalType::UNKNOWN; + + LogicalType element_type = LogicalType::SQLNULL; + for (idx_t i = 0; i < size; i++) { + Value new_value = TransformPythonValue(ele.attr("__getitem__")(i), child_type); + element_type = LogicalType::ForceMaxLogicalType(element_type, new_value.type()); + values.push_back(std::move(new_value)); + } + + return Value::ARRAY(element_type, values); +} + Value TransformDictionary(const PyDictionary &dict) { //! DICT -> MAP FORMAT // keys() = [key, value] @@ -518,7 +541,11 @@ Value TransformPythonValue(py::handle ele, const LogicalType &target_type, bool } } case PythonObjectType::List: - return TransformListValue(ele, target_type); + if (target_type.id() == LogicalTypeId::ARRAY) { + return TransformArrayValue(ele, target_type); + } else { + return TransformListValue(ele, target_type); + } case PythonObjectType::Dict: { PyDictionary dict = PyDictionary(py::reinterpret_borrow(ele)); switch (target_type.id()) { @@ -537,6 +564,8 @@ Value TransformPythonValue(py::handle ele, const LogicalType &target_type, bool case LogicalTypeId::UNKNOWN: case LogicalTypeId::LIST: return TransformListValue(ele, target_type); + case LogicalTypeId::ARRAY: + return TransformArrayValue(ele, target_type); default: throw InvalidInputException("Can't convert tuple to a Value of type %s", target_type.ToString()); } diff --git a/tools/pythonpkg/src/native/python_objects.cpp b/tools/pythonpkg/src/native/python_objects.cpp index 4c5d7879cdea..02f0d2378c3d 100644 --- a/tools/pythonpkg/src/native/python_objects.cpp +++ b/tools/pythonpkg/src/native/python_objects.cpp @@ -510,6 +510,16 @@ py::object PythonObject::FromValue(const Value &val, const LogicalType &type, } return std::move(list); } + case LogicalTypeId::ARRAY: { + auto &array_values = ArrayValue::GetChildren(val); + auto array_size = ArrayType::GetSize(type); + auto &child_type = ArrayType::GetChildType(type); + py::tuple arr(array_size); + for (idx_t elem_idx = 0; elem_idx < array_size; elem_idx++) { + arr[elem_idx] = FromValue(array_values[elem_idx], child_type, client_properties); + } + return std::move(arr); + } case LogicalTypeId::MAP: { auto &list_values = ListValue::GetChildren(val); diff --git a/tools/pythonpkg/src/numpy/array_wrapper.cpp b/tools/pythonpkg/src/numpy/array_wrapper.cpp index 016544481bb8..a731276c1148 100644 --- a/tools/pythonpkg/src/numpy/array_wrapper.cpp +++ b/tools/pythonpkg/src/numpy/array_wrapper.cpp @@ -285,6 +285,22 @@ struct ListConvert { } }; +struct ArrayConvert { + static py::tuple ConvertValue(Vector &input, idx_t chunk_offset, const ClientProperties &client_properties) { + auto val = input.GetValue(chunk_offset); + auto &array_values = ArrayValue::GetChildren(val); + auto &array_type = input.GetType(); + auto array_size = ArrayType::GetSize(array_type); + auto &child_type = ArrayType::GetChildType(array_type); + + py::tuple arr(array_size); + for (idx_t elem_idx = 0; elem_idx < array_size; elem_idx++) { + arr[elem_idx] = PythonObject::FromValue(array_values[elem_idx], child_type, client_properties); + } + return arr; + } +}; + struct StructConvert { static py::dict ConvertValue(Vector &input, idx_t chunk_offset, const ClientProperties &client_properties) { py::dict py_struct; @@ -658,6 +674,9 @@ void ArrayWrapper::Append(idx_t current_offset, Vector &input, idx_t count) { case LogicalTypeId::LIST: may_have_null = ConvertNested(append_data); break; + case LogicalTypeId::ARRAY: + may_have_null = ConvertNested(append_data); + break; case LogicalTypeId::MAP: may_have_null = ConvertNested(append_data); break; diff --git a/tools/pythonpkg/src/numpy/raw_array_wrapper.cpp b/tools/pythonpkg/src/numpy/raw_array_wrapper.cpp index 8838bd1c85af..5d73685b20e6 100644 --- a/tools/pythonpkg/src/numpy/raw_array_wrapper.cpp +++ b/tools/pythonpkg/src/numpy/raw_array_wrapper.cpp @@ -13,43 +13,32 @@ namespace duckdb { -RawArrayWrapper::RawArrayWrapper(const LogicalType &type) : data(nullptr), type(type), count(0) { +static idx_t GetNumpyTypeWidth(const LogicalType &type) { switch (type.id()) { case LogicalTypeId::BOOLEAN: - type_width = sizeof(bool); - break; + return sizeof(bool); case LogicalTypeId::UTINYINT: - type_width = sizeof(uint8_t); - break; + return sizeof(uint8_t); case LogicalTypeId::USMALLINT: - type_width = sizeof(uint16_t); - break; + return sizeof(uint16_t); case LogicalTypeId::UINTEGER: - type_width = sizeof(uint32_t); - break; + return sizeof(uint32_t); case LogicalTypeId::UBIGINT: - type_width = sizeof(uint64_t); - break; + return sizeof(uint64_t); case LogicalTypeId::TINYINT: - type_width = sizeof(int8_t); - break; + return sizeof(int8_t); case LogicalTypeId::SMALLINT: - type_width = sizeof(int16_t); - break; + return sizeof(int16_t); case LogicalTypeId::INTEGER: - type_width = sizeof(int32_t); - break; + return sizeof(int32_t); case LogicalTypeId::BIGINT: - type_width = sizeof(int64_t); - break; + return sizeof(int64_t); case LogicalTypeId::FLOAT: - type_width = sizeof(float); - break; + return sizeof(float); case LogicalTypeId::HUGEINT: case LogicalTypeId::DOUBLE: case LogicalTypeId::DECIMAL: - type_width = sizeof(double); - break; + return sizeof(double); case LogicalTypeId::TIMESTAMP: case LogicalTypeId::TIMESTAMP_SEC: case LogicalTypeId::TIMESTAMP_MS: @@ -57,8 +46,7 @@ RawArrayWrapper::RawArrayWrapper(const LogicalType &type) : data(nullptr), type( case LogicalTypeId::DATE: case LogicalTypeId::INTERVAL: case LogicalTypeId::TIMESTAMP_TZ: - type_width = sizeof(int64_t); - break; + return sizeof(int64_t); case LogicalTypeId::TIME: case LogicalTypeId::TIME_TZ: case LogicalTypeId::VARCHAR: @@ -70,13 +58,17 @@ RawArrayWrapper::RawArrayWrapper(const LogicalType &type) : data(nullptr), type( case LogicalTypeId::STRUCT: case LogicalTypeId::UNION: case LogicalTypeId::UUID: - type_width = sizeof(PyObject *); - break; + case LogicalTypeId::ARRAY: + return sizeof(PyObject *); default: throw NotImplementedException("Unsupported type \"%s\" for DuckDB -> NumPy conversion", type.ToString()); } } +RawArrayWrapper::RawArrayWrapper(const LogicalType &type) : data(nullptr), type(type), count(0) { + type_width = GetNumpyTypeWidth(type); +} + string RawArrayWrapper::DuckDBToNumpyDtype(const LogicalType &type) { switch (type.id()) { case LogicalTypeId::BOOLEAN: @@ -127,6 +119,7 @@ string RawArrayWrapper::DuckDBToNumpyDtype(const LogicalType &type) { case LogicalTypeId::STRUCT: case LogicalTypeId::UNION: case LogicalTypeId::UUID: + case LogicalTypeId::ARRAY: return "object"; case LogicalTypeId::ENUM: { auto size = EnumType::GetSize(type); diff --git a/tools/pythonpkg/src/pyconnection.cpp b/tools/pythonpkg/src/pyconnection.cpp index 85b267704b9c..4fe5e1529c06 100644 --- a/tools/pythonpkg/src/pyconnection.cpp +++ b/tools/pythonpkg/src/pyconnection.cpp @@ -140,8 +140,11 @@ static void InitializeConnectionMethods(py::class_ DuckDBPyConnection::MapType(const shared_ptr(map_type); } -shared_ptr DuckDBPyConnection::ArrayType(const shared_ptr &type) { +shared_ptr DuckDBPyConnection::ListType(const shared_ptr &type) { auto array_type = LogicalType::LIST(type->Type()); return make_shared(array_type); } +shared_ptr DuckDBPyConnection::ArrayType(const shared_ptr &type, idx_t size) { + auto array_type = LogicalType::ARRAY(type->Type(), size); + return make_shared(array_type); +} + static child_list_t GetChildList(const py::object &container) { child_list_t types; if (py::isinstance(container)) { diff --git a/tools/pythonpkg/src/pyduckdb/connection_wrapper.cpp b/tools/pythonpkg/src/pyduckdb/connection_wrapper.cpp index 060cf9a8fd41..f150b4cc844f 100644 --- a/tools/pythonpkg/src/pyduckdb/connection_wrapper.cpp +++ b/tools/pythonpkg/src/pyduckdb/connection_wrapper.cpp @@ -33,12 +33,20 @@ shared_ptr PyConnectionWrapper::StringType(const string &collation return conn->StringType(collation); } -shared_ptr PyConnectionWrapper::ArrayType(const shared_ptr &type, +shared_ptr PyConnectionWrapper::ArrayType(const shared_ptr &type, idx_t size, shared_ptr conn) { if (!conn) { conn = DuckDBPyConnection::DefaultConnection(); } - return conn->ArrayType(type); + return conn->ArrayType(type, size); +} + +shared_ptr PyConnectionWrapper::ListType(const shared_ptr &type, + shared_ptr conn) { + if (!conn) { + conn = DuckDBPyConnection::DefaultConnection(); + } + return conn->ListType(type); } shared_ptr PyConnectionWrapper::MapType(const shared_ptr &key, diff --git a/tools/pythonpkg/tests/fast/arrow/test_arrow_list.py b/tools/pythonpkg/tests/fast/arrow/test_arrow_list.py index a4860a15c399..51e0e1eb1641 100644 --- a/tools/pythonpkg/tests/fast/arrow/test_arrow_list.py +++ b/tools/pythonpkg/tests/fast/arrow/test_arrow_list.py @@ -92,7 +92,7 @@ def test_fixedsize_list(self): ) create_and_register_comparison_result( [ - ('a', 'FLOAT[]', data), + ('a', f'FLOAT[{list_size}]', data), ], duckdb_conn, ) diff --git a/tools/pythonpkg/tests/fast/arrow/test_arrow_offsets.py b/tools/pythonpkg/tests/fast/arrow/test_arrow_offsets.py index e83c8c3794a1..fe34a7e1c06a 100644 --- a/tools/pythonpkg/tests/fast/arrow/test_arrow_offsets.py +++ b/tools/pythonpkg/tests/fast/arrow/test_arrow_offsets.py @@ -480,9 +480,9 @@ def test_struct_of_fixed_size_list(self, duckdb_cursor, col1_null, col2_null): if col2_null: res2 = None elif col1_null: - res2 = [None, None, None] + res2 = (None, None, None) else: - res2 = ['131072', '131072', '131072'] + res2 = ('131072', '131072', '131072') assert res == [(res1, res2)] @test_nulls() @@ -511,9 +511,9 @@ def test_struct_of_fixed_size_blob(self, duckdb_cursor, col1_null, col2_null): if col2_null: res2 = None elif col1_null: - res2 = [None, None, None] + res2 = (None, None, None) else: - res2 = [b'131072', b'131073', b'131074'] + res2 = (b'131072', b'131073', b'131074') assert res == [(res1, res2)] @test_nulls() diff --git a/tools/pythonpkg/tests/fast/arrow/test_nested_arrow.py b/tools/pythonpkg/tests/fast/arrow/test_nested_arrow.py index 113e3cc32fc5..592778146835 100644 --- a/tools/pythonpkg/tests/fast/arrow/test_nested_arrow.py +++ b/tools/pythonpkg/tests/fast/arrow/test_nested_arrow.py @@ -58,7 +58,7 @@ def test_list_types(self, duckdb_cursor): arrow_table = pa.Table.from_arrays([data], ['a']) rel = duckdb.from_arrow(arrow_table) res = rel.execute().fetchall() - assert res == [([1],), (None,), ([2],)] + assert res == [((1,),), (None,), ((2,),)] # Complex nested structures with different list types data = [ @@ -69,7 +69,7 @@ def test_list_types(self, duckdb_cursor): arrow_table = pa.Table.from_arrays([data[0], data[1], data[2]], ['a', 'b', 'c']) rel = duckdb.from_arrow(arrow_table) res = rel.project('a').execute().fetchall() - assert res == [([1],), (None,), ([2],)] + assert res == [((1,),), (None,), ((2,),)] res = rel.project('b').execute().fetchall() assert res == [([1],), (None,), ([2],)] res = rel.project('c').execute().fetchall() @@ -81,9 +81,9 @@ def test_list_types(self, duckdb_cursor): rel = duckdb.from_arrow(arrow_table) res = rel.execute().fetchall() assert res == [ - ({'fixed': [1], 'large': [1], 'normal': [1, 2, 3]},), + ({'fixed': (1,), 'large': [1], 'normal': [1, 2, 3]},), ({'fixed': None, 'large': None, 'normal': None},), - ({'fixed': [2], 'large': [2], 'normal': [2, 1]},), + ({'fixed': (2,), 'large': [2], 'normal': [2, 1]},), ] def test_lists_roundtrip(self, duckdb_cursor): diff --git a/tools/pythonpkg/tests/fast/spark/test_spark_types.py b/tools/pythonpkg/tests/fast/spark/test_spark_types.py index f900820d4905..5fac33b3120d 100644 --- a/tools/pythonpkg/tests/fast/spark/test_spark_types.py +++ b/tools/pythonpkg/tests/fast/spark/test_spark_types.py @@ -46,7 +46,15 @@ def test_all_types_schema(self, spark): small_enum, medium_enum, large_enum, - 'union' + 'union', + fixed_int_array, + fixed_varchar_array, + fixed_nested_int_array, + fixed_nested_varchar_array, + fixed_struct_array, + struct_of_fixed_array, + fixed_array_of_int_list, + list_of_fixed_int_array ) from test_all_types() """ ) diff --git a/tools/pythonpkg/tests/fast/test_all_types.py b/tools/pythonpkg/tests/fast/test_all_types.py index 5b904fdfab7e..89a0f67b1267 100644 --- a/tools/pythonpkg/tests/fast/test_all_types.py +++ b/tools/pythonpkg/tests/fast/test_all_types.py @@ -89,6 +89,14 @@ def recursive_equality(o1, o2): "array_of_structs", "map", "union", + "fixed_int_array", + "fixed_varchar_array", + "fixed_nested_int_array", + "fixed_nested_varchar_array", + "fixed_struct_array", + "struct_of_fixed_array", + "fixed_array_of_int_list", + "list_of_fixed_int_array", ] @@ -220,6 +228,38 @@ def test_fetchall(self, cur_type): 'timestamp_ms': [(datetime.datetime(1990, 1, 1, 0, 0),)], 'timestamp_tz': [(datetime.datetime(1990, 1, 1, 0, 0, tzinfo=pytz.UTC),)], 'union': [('Frank',), (5,), (None,)], + 'fixed_int_array': [((None, 2, 3),), ((4, 5, 6),), (None,)], + 'fixed_varchar_array': [(('a', None, 'c'),), (('d', 'e', 'f'),), (None,)], + 'fixed_nested_int_array': [ + (((None, 2, 3), None, (None, 2, 3)),), + (((4, 5, 6), (None, 2, 3), (4, 5, 6)),), + (None,), + ], + 'fixed_nested_varchar_array': [ + ((('a', None, 'c'), None, ('a', None, 'c')),), + ((('d', 'e', 'f'), ('a', None, 'c'), ('d', 'e', 'f')),), + (None,), + ], + 'fixed_struct_array': [ + (({'a': None, 'b': None}, {'a': 42, 'b': '🦆🦆🦆🦆🦆🦆'}, {'a': None, 'b': None}),), + (({'a': 42, 'b': '🦆🦆🦆🦆🦆🦆'}, {'a': None, 'b': None}, {'a': 42, 'b': '🦆🦆🦆🦆🦆🦆'}),), + (None,), + ], + 'struct_of_fixed_array': [ + ({'a': (None, 2, 3), 'b': ('a', None, 'c')},), + ({'a': (4, 5, 6), 'b': ('d', 'e', 'f')},), + (None,), + ], + 'fixed_array_of_int_list': [ + (([], [42, 999, None, None, -42], []),), + (([42, 999, None, None, -42], [], [42, 999, None, None, -42]),), + (None,), + ], + 'list_of_fixed_int_array': [ + ([(None, 2, 3), (4, 5, 6), (None, 2, 3)],), + ([(4, 5, 6), (None, 2, 3), (4, 5, 6)],), + (None,), + ], } if cur_type in replacement_values: result = conn.execute("select " + replacement_values[cur_type]).fetchall() diff --git a/tools/pythonpkg/tests/fast/test_type.py b/tools/pythonpkg/tests/fast/test_type.py index 2655fabb0f71..9350a4088149 100644 --- a/tools/pythonpkg/tests/fast/test_type.py +++ b/tools/pythonpkg/tests/fast/test_type.py @@ -71,10 +71,14 @@ def test_primitive_types(self): assert str(BIT) == 'BIT' assert str(INTERVAL) == 'INTERVAL' - def test_array_type(self): - type = duckdb.array_type(BIGINT) + def test_list_type(self): + type = duckdb.list_type(BIGINT) assert str(type) == 'BIGINT[]' + def test_array_type(self): + type = duckdb.array_type(BIGINT, 3) + assert str(type) == 'BIGINT[3]' + def test_struct_type(self): type = duckdb.struct_type({'a': BIGINT, 'b': BOOLEAN}) assert str(type) == 'STRUCT(a BIGINT, b BOOLEAN)' diff --git a/tools/pythonpkg/tests/fast/test_type_explicit.py b/tools/pythonpkg/tests/fast/test_type_explicit.py new file mode 100644 index 000000000000..23dcddc31a5a --- /dev/null +++ b/tools/pythonpkg/tests/fast/test_type_explicit.py @@ -0,0 +1,20 @@ +import duckdb + + +class TestMap(object): + + def test_array_list_tuple_ambiguity(self): + con = duckdb.connect() + res = con.sql("SELECT $arg", params={'arg': (1, 2)}).fetchall()[0][0] + assert res == [1, 2] + + # By using an explicit duckdb.Value with an array type, we should convert the input as an array + # and get an array (tuple) back + typ = duckdb.array_type(duckdb.typing.BIGINT, 2) + val = duckdb.Value((1, 2), typ) + res = con.sql("SELECT $arg", params={'arg': val}).fetchall()[0][0] + assert res == (1, 2) + + val = duckdb.Value([3, 4], typ) + res = con.sql("SELECT $arg", params={'arg': val}).fetchall()[0][0] + assert res == (3, 4)