From 1e7b22d73168dbb6092d5066816963ad201c7c70 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 22 Sep 2023 02:13:30 +0530 Subject: [PATCH 1/2] fix missing null mask for parse error in string column names --- cpp/src/io/json/json_column.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 5d7fb9d6b43..06c097ecced 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -347,13 +347,15 @@ std::vector copy_strings_to_host(device_span input, cudf::io::parse_options_view options_view{}; options_view.quotechar = '\0'; // no quotes options_view.keepquotes = true; + auto nulls = cudf::detail::create_null_mask( + num_strings, mask_state::ALL_VALID, stream, rmm::mr::get_current_device_resource()); auto d_offset_length_it = thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin()); auto d_column_names = parse_data(input.data(), d_offset_length_it, num_strings, data_type{type_id::STRING}, - rmm::device_buffer{}, + std::move(nulls), 0, options_view, stream, From aadf7c3dddbbcca228eb8956cf900fed51c8f8c3 Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Fri, 22 Sep 2023 15:56:07 +0530 Subject: [PATCH 2/2] fix missing null mask if not passed in parse_data --- cpp/src/io/json/json_column.cu | 4 +--- cpp/src/io/utilities/data_casting.cu | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 06c097ecced..5d7fb9d6b43 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -347,15 +347,13 @@ std::vector copy_strings_to_host(device_span input, cudf::io::parse_options_view options_view{}; options_view.quotechar = '\0'; // no quotes options_view.keepquotes = true; - auto nulls = cudf::detail::create_null_mask( - num_strings, mask_state::ALL_VALID, stream, rmm::mr::get_current_device_resource()); auto d_offset_length_it = thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin()); auto d_column_names = parse_data(input.data(), d_offset_length_it, num_strings, data_type{type_id::STRING}, - std::move(nulls), + rmm::device_buffer{}, 0, options_view, stream, diff --git a/cpp/src/io/utilities/data_casting.cu b/cpp/src/io/utilities/data_casting.cu index 1772e5e43fa..d16237d7afe 100644 --- a/cpp/src/io/utilities/data_casting.cu +++ b/cpp/src/io/utilities/data_casting.cu @@ -924,6 +924,9 @@ std::unique_ptr parse_data( if (col_size == 0) { return make_empty_column(col_type); } auto d_null_count = rmm::device_scalar(null_count, stream); auto null_count_data = d_null_count.data(); + if (null_mask.is_empty()) { + null_mask = cudf::detail::create_null_mask(col_size, mask_state::ALL_VALID, stream, mr); + } // Prepare iterator that returns (string_ptr, string_length)-pairs needed by type conversion auto str_tuples = thrust::make_transform_iterator(offset_length_begin, to_string_view_pair{data});