diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc index 930702e7cb8d2..def01379e0634 100644 --- a/cpp/src/arrow/array/array_list_test.cc +++ b/cpp/src/arrow/array/array_list_test.cc @@ -716,6 +716,30 @@ TEST_F(TestMapArray, BuildingStringToInt) { ASSERT_ARRAYS_EQUAL(*actual, expected); } +TEST_F(TestMapArray, BuildingWithFieldNames) { + // Builder should preserve field names in output Array + ASSERT_OK_AND_ASSIGN(auto map_type, + MapType::Make(field("some_entries", + struct_({field("some_key", int16(), false), + field("some_value", int16())}), + false))); + + auto key_builder = std::make_shared(); + auto item_builder = std::make_shared(); + MapBuilder map_builder(default_memory_pool(), key_builder, item_builder, map_type); + + std::shared_ptr actual; + ASSERT_OK(map_builder.Append()); + ASSERT_OK(key_builder->AppendValues({0, 1, 2, 3, 4, 5})); + ASSERT_OK(item_builder->AppendValues({1, 1, 2, 3, 5, 8})); + ASSERT_OK(map_builder.AppendNull()); + ASSERT_OK(map_builder.Finish(&actual)); + ASSERT_OK(actual->ValidateFull()); + + ASSERT_EQ(actual->type()->ToString(), map_type->ToString()); + ASSERT_EQ(map_builder.type()->ToString(), map_type->ToString()); +} + TEST_F(TestMapArray, ValidateErrorNullStruct) { ASSERT_OK_AND_ASSIGN( auto values, diff --git a/cpp/src/arrow/array/builder_nested.cc b/cpp/src/arrow/array/builder_nested.cc index a3bcde0381a4a..fbba1fd056430 100644 --- a/cpp/src/arrow/array/builder_nested.cc +++ b/cpp/src/arrow/array/builder_nested.cc @@ -38,6 +38,10 @@ MapBuilder::MapBuilder(MemoryPool* pool, const std::shared_ptr& ke const std::shared_ptr& type) : ArrayBuilder(pool), key_builder_(key_builder), item_builder_(item_builder) { auto map_type = internal::checked_cast(type.get()); + entries_name_ = map_type->field(0)->name(); + key_name_ = map_type->key_field()->name(); + item_name_ = map_type->item_field()->name(); + item_nullable_ = map_type->item_field()->nullable(); keys_sorted_ = map_type->keys_sorted(); std::vector> child_builders{key_builder, item_builder}; @@ -59,6 +63,10 @@ MapBuilder::MapBuilder(MemoryPool* pool, const std::shared_ptr& type) : ArrayBuilder(pool) { auto map_type = internal::checked_cast(type.get()); + entries_name_ = map_type->field(0)->name(); + key_name_ = map_type->key_field()->name(); + item_name_ = map_type->item_field()->name(); + item_nullable_ = map_type->item_field()->nullable(); keys_sorted_ = map_type->keys_sorted(); key_builder_ = struct_builder->child_builder(0); item_builder_ = struct_builder->child_builder(1); diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h index 9e3a4458d8da1..78efc0e2cbb9a 100644 --- a/cpp/src/arrow/array/builder_nested.h +++ b/cpp/src/arrow/array/builder_nested.h @@ -330,7 +330,14 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder { ArrayBuilder* value_builder() const { return list_builder_->value_builder(); } std::shared_ptr type() const override { - return map(key_builder_->type(), item_builder_->type(), keys_sorted_); + // Key and Item builder may update types, but they don't contain the field names, + // so we need to reconstruct the type. (See ARROW-13735.) + return std::make_shared( + field(entries_name_, + struct_({field(key_name_, key_builder_->type(), false), + field(item_name_, item_builder_->type(), item_nullable_)}), + false), + keys_sorted_); } Status ValidateOverflow(int64_t new_elements) { @@ -342,6 +349,10 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder { protected: bool keys_sorted_ = false; + bool item_nullable_ = false; + std::string entries_name_; + std::string key_name_; + std::string item_name_; std::shared_ptr list_builder_; std::shared_ptr key_builder_; std::shared_ptr item_builder_; diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 6b87a0ba9b0e3..aeeb6314c83d1 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -907,6 +907,15 @@ def test_list_from_arrays(list_array_type, list_type_factory): result.validate(full=True) +def test_map_labelled(): + # ARROW-13735 + t = pa.map_(pa.field("name", "string", nullable=False), "int64") + arr = pa.array([[('a', 1), ('b', 2)], [('c', 3)]], type=t) + assert arr.type.key_field == pa.field("name", pa.utf8(), nullable=False) + assert arr.type.item_field == pa.field("value", pa.int64()) + assert len(arr) == 2 + + def test_map_from_arrays(): offsets_arr = np.array([0, 2, 5, 8], dtype='i4') offsets = pa.array(offsets_arr, type='int32')