From 426f5bcc15c119a96f377ac77cbf04dadbdd20cf Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Tue, 5 Mar 2024 19:20:03 +0000 Subject: [PATCH 1/2] applying patch for mixed type perf improvement --- cpp/src/io/json/json_column.cu | 3 +++ cpp/src/io/json/nested_json.hpp | 2 ++ 2 files changed, 5 insertions(+) diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 56da1095b81..a15b39029fc 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -606,6 +606,7 @@ void make_device_json_column(device_span input, reinitialize_as_string(old_col_id, col); // all its children (which are already inserted) are ignored later. } + col.forced_as_string_column = true; columns.try_emplace(this_col_id, columns.at(old_col_id)); continue; } @@ -856,6 +857,8 @@ std::pair, std::vector> device_json_co : "n/a"); #endif target_type = schema.value().type; + } else if (json_col.forced_as_string_column) { + target_type = data_type{type_id::STRING}; } // Infer column type, if we don't have an explicit type for it else { diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index c13daf9b9f5..96992706b11 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -156,6 +156,8 @@ struct device_json_column { std::vector column_order; // Counting the current number of items in this column row_offset_t num_rows = 0; + // Force as string column + bool forced_as_string_column{false}; /** * @brief Construct a new d json column object From 3b3a1433ca893acf7d9430f5d6ec15747fb9e016 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Fri, 8 Mar 2024 13:41:57 +0530 Subject: [PATCH 2/2] fix merge issue. --- cpp/src/io/json/json_column.cu | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index ed060555188..6576d41dd72 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -674,12 +674,10 @@ void make_device_json_column(device_span input, reinitialize_as_string(old_col_id, col); // all its children (which are already inserted) are ignored later. } + col.forced_as_string_column = true; columns.try_emplace(this_col_id, columns.at(old_col_id)); continue; } - col.forced_as_string_column = true; - columns.try_emplace(this_col_id, columns.at(old_col_id)); - continue; } if (column_categories[this_col_id] == NC_VAL || column_categories[this_col_id] == NC_STR) {