From 073582fc65534aa16aa9d8ff7b189da1a154e4bc Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 25 Apr 2024 13:01:24 -0400 Subject: [PATCH] Use experimental make_strings_children for json/csv writers --- cpp/benchmarks/json/json.cu | 9 +++++---- cpp/src/io/csv/writer_impl.cu | 11 ++++++----- cpp/src/io/json/write_json.cu | 13 +++++++------ 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/cpp/benchmarks/json/json.cu b/cpp/benchmarks/json/json.cu index a54d7d48dc4..c65db187f42 100644 --- a/cpp/benchmarks/json/json.cu +++ b/cpp/benchmarks/json/json.cu @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include @@ -77,8 +77,9 @@ struct json_benchmark_row_builder { cudf::column_device_view const d_book_pct; // Book percentage cudf::column_device_view const d_misc_order; // Misc-Store order cudf::column_device_view const d_store_order; // Books-Bicycles order - int32_t* d_offsets{}; + cudf::size_type* d_sizes{}; char* d_chars{}; + cudf::detail::input_offsetalator d_offsets; thrust::minstd_rand rng{5236}; thrust::uniform_int_distribution dist{}; @@ -155,7 +156,7 @@ struct json_benchmark_row_builder { output_str += Misc; } output_str += brace2; - if (!output_str.ptr) d_offsets[idx] = output_str.bytes; + if (!output_str.ptr) { d_sizes[idx] = output_str.bytes; } } }; @@ -177,7 +178,7 @@ auto build_json_string_column(int desired_bytes, int num_rows) auto d_store_order = cudf::column_device_view::create(float_2bool_columns->get_column(2)); json_benchmark_row_builder jb{ desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order}; - auto [offsets, chars] = cudf::strings::detail::make_strings_children( + auto [offsets, chars] = cudf::strings::detail::experimental::make_strings_children( jb, num_rows, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); return cudf::make_strings_column(num_rows, std::move(offsets), chars.release(), 0, {}); } diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index 335ce77e3e3..58a74654405 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include #include @@ -75,8 +75,9 @@ namespace { struct escape_strings_fn { column_device_view const d_column; string_view const d_delimiter; // check for column delimiter - size_type* d_offsets{}; + size_type* d_sizes{}; char* d_chars{}; + cudf::detail::input_offsetalator d_offsets; __device__ void write_char(char_utf8 chr, char*& d_buffer, size_type& bytes) { @@ -89,7 +90,7 @@ struct escape_strings_fn { __device__ void operator()(size_type idx) { if (d_column.is_null(idx)) { - if (!d_chars) d_offsets[idx] = 0; + if (!d_chars) { d_sizes[idx] = 0; } return; } @@ -115,7 +116,7 @@ struct escape_strings_fn { } if (quote_row) write_char(quote, d_buffer, bytes); - if (!d_chars) d_offsets[idx] = bytes; + if (!d_chars) { d_sizes[idx] = bytes; } } }; @@ -182,7 +183,7 @@ struct column_to_strings_fn { auto d_column = column_device_view::create(column_v, stream_); escape_strings_fn fn{*d_column, delimiter.value(stream_)}; auto [offsets_column, chars] = - cudf::strings::detail::make_strings_children(fn, column_v.size(), stream_, mr_); + cudf::strings::detail::experimental::make_strings_children(fn, column_v.size(), stream_, mr_); return make_strings_column(column_v.size(), std::move(offsets_column), diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index 596b3381eaf..cac7149dabe 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include @@ -78,8 +78,9 @@ namespace { struct escape_strings_fn { column_device_view const d_column; bool const append_colon{false}; - size_type* d_offsets{}; + size_type* d_sizes{}; char* d_chars{}; + cudf::detail::input_offsetalator d_offsets; __device__ void write_char(char_utf8 chr, char*& d_buffer, size_type& bytes) { @@ -123,7 +124,7 @@ struct escape_strings_fn { __device__ void operator()(size_type idx) { if (d_column.is_null(idx)) { - if (!d_chars) d_offsets[idx] = 0; + if (!d_chars) { d_sizes[idx] = 0; } return; } @@ -163,15 +164,15 @@ struct escape_strings_fn { constexpr char_utf8 const colon = ':'; // append colon if (append_colon) write_char(colon, d_buffer, bytes); - if (!d_chars) d_offsets[idx] = bytes; + if (!d_chars) { d_sizes[idx] = bytes; } } std::unique_ptr get_escaped_strings(column_view const& column_v, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - auto [offsets_column, chars] = - cudf::strings::detail::make_strings_children(*this, column_v.size(), stream, mr); + auto [offsets_column, chars] = cudf::strings::detail::experimental::make_strings_children( + *this, column_v.size(), stream, mr); return make_strings_column(column_v.size(), std::move(offsets_column),