From c725394497a63fb9c4eba6ee0985599c0eceec57 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Nov 2024 09:59:41 -0800 Subject: [PATCH] Fix handling for schema mismatching in case of `column_view` input Signed-off-by: Nghia Truong --- src/main/cpp/src/from_json_to_structs.cu | 25 ++++++++++-------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/main/cpp/src/from_json_to_structs.cu b/src/main/cpp/src/from_json_to_structs.cu index fa5d0c266..ddfdcc4c4 100644 --- a/src/main/cpp/src/from_json_to_structs.cu +++ b/src/main/cpp/src/from_json_to_structs.cu @@ -832,12 +832,8 @@ std::unique_ptr convert_data_type(InputType&& input, std::vector> new_children; new_children.emplace_back( std::move(input_content.children[cudf::lists_column_view::offsets_column_index])); - new_children.emplace_back(convert_data_type(std::move(child), - schema.child_types.front().second, - allow_nonnumeric_numbers, - is_us_locale, - stream, - mr)); + new_children.emplace_back(convert_data_type( + std::move(child), child_schema, allow_nonnumeric_numbers, is_us_locale, stream, mr)); // Do not use `cudf::make_lists_column` since we do not need to call `purge_nonempty_nulls` // on the child column as it does not have non-empty nulls. @@ -875,18 +871,19 @@ std::unique_ptr convert_data_type(InputType&& input, auto const num_children = input.num_children(); if (schema.type.id() == cudf::type_id::LIST) { - CUDF_EXPECTS(d_type == cudf::type_id::LIST, "Input column should be LIST."); + auto const& child_schema = schema.child_types.front().second; + auto const child = input.child(cudf::lists_column_view::child_column_index); + + // Handle mismatched child schema. + if (cudf::is_nested(child_schema.type) && (child_schema.type.id() != child.type().id())) { + return make_all_nulls_column(schema, num_rows, stream, mr); + } std::vector> new_children; new_children.emplace_back( std::make_unique(input.child(cudf::lists_column_view::offsets_column_index))); new_children.emplace_back( - convert_data_type(input.child(cudf::lists_column_view::child_column_index), - schema.child_types.front().second, - allow_nonnumeric_numbers, - is_us_locale, - stream, - mr)); + convert_data_type(child, child_schema, allow_nonnumeric_numbers, is_us_locale, stream, mr)); // Do not use `cudf::make_lists_column` since we do not need to call `purge_nonempty_nulls` // on the child column as it does not have non-empty nulls. @@ -899,8 +896,6 @@ std::unique_ptr convert_data_type(InputType&& input, } if (schema.type.id() == cudf::type_id::STRUCT) { - CUDF_EXPECTS(d_type == cudf::type_id::STRUCT, "Input column should be STRUCT."); - std::vector> new_children; new_children.reserve(num_children); for (cudf::size_type i = 0; i < num_children; ++i) {