Skip to content

Commit

Permalink
Use experimental make_strings_children for json/csv writers (#15599)
Browse files Browse the repository at this point in the history
Updates the JSON and CSV writer functions to use the new experimental make_strings_children.
Also included is an update to the JSON_BENCH benchmark for get_json_object.

Reference #15579

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Bradley Dice (https://github.com/bdice)

URL: #15599
  • Loading branch information
davidwendt authored May 1, 2024
1 parent f5c7778 commit 4aabf51
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 15 deletions.
9 changes: 5 additions & 4 deletions cpp/benchmarks/json/json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

#include <cudf/column/column_factories.hpp>
#include <cudf/json/json.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/string_view.hpp>
#include <cudf/strings/strings_column_view.hpp>
Expand Down Expand Up @@ -77,8 +77,9 @@ struct json_benchmark_row_builder {
cudf::column_device_view const d_book_pct; // Book percentage
cudf::column_device_view const d_misc_order; // Misc-Store order
cudf::column_device_view const d_store_order; // Books-Bicycles order
int32_t* d_offsets{};
cudf::size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;
thrust::minstd_rand rng{5236};
thrust::uniform_int_distribution<int> dist{};

Expand Down Expand Up @@ -155,7 +156,7 @@ struct json_benchmark_row_builder {
output_str += Misc;
}
output_str += brace2;
if (!output_str.ptr) d_offsets[idx] = output_str.bytes;
if (!output_str.ptr) { d_sizes[idx] = output_str.bytes; }
}
};

Expand All @@ -177,7 +178,7 @@ auto build_json_string_column(int desired_bytes, int num_rows)
auto d_store_order = cudf::column_device_view::create(float_2bool_columns->get_column(2));
json_benchmark_row_builder jb{
desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order};
auto [offsets, chars] = cudf::strings::detail::make_strings_children(
auto [offsets, chars] = cudf::strings::detail::experimental::make_strings_children(
jb, num_rows, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
return cudf::make_strings_column(num_rows, std::move(offsets), chars.release(), 0, {});
}
Expand Down
11 changes: 6 additions & 5 deletions cpp/src/io/csv/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include <cudf/strings/detail/combine.hpp>
#include <cudf/strings/detail/converters.hpp>
#include <cudf/strings/detail/replace.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table.hpp>
#include <cudf/utilities/error.hpp>
Expand Down Expand Up @@ -75,8 +75,9 @@ namespace {
struct escape_strings_fn {
column_device_view const d_column;
string_view const d_delimiter; // check for column delimiter
size_type* d_offsets{};
size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;

__device__ void write_char(char_utf8 chr, char*& d_buffer, size_type& bytes)
{
Expand All @@ -89,7 +90,7 @@ struct escape_strings_fn {
__device__ void operator()(size_type idx)
{
if (d_column.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
return;
}

Expand All @@ -115,7 +116,7 @@ struct escape_strings_fn {
}
if (quote_row) write_char(quote, d_buffer, bytes);

if (!d_chars) d_offsets[idx] = bytes;
if (!d_chars) { d_sizes[idx] = bytes; }
}
};

Expand Down Expand Up @@ -182,7 +183,7 @@ struct column_to_strings_fn {
auto d_column = column_device_view::create(column_v, stream_);
escape_strings_fn fn{*d_column, delimiter.value(stream_)};
auto [offsets_column, chars] =
cudf::strings::detail::make_strings_children(fn, column_v.size(), stream_, mr_);
cudf::strings::detail::experimental::make_strings_children(fn, column_v.size(), stream_, mr_);

return make_strings_column(column_v.size(),
std::move(offsets_column),
Expand Down
13 changes: 7 additions & 6 deletions cpp/src/io/json/write_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/detail/combine.hpp>
#include <cudf/strings/detail/converters.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/table/table.hpp>
Expand Down Expand Up @@ -78,8 +78,9 @@ namespace {
struct escape_strings_fn {
column_device_view const d_column;
bool const append_colon{false};
size_type* d_offsets{};
size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;

__device__ void write_char(char_utf8 chr, char*& d_buffer, size_type& bytes)
{
Expand Down Expand Up @@ -123,7 +124,7 @@ struct escape_strings_fn {
__device__ void operator()(size_type idx)
{
if (d_column.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
return;
}

Expand Down Expand Up @@ -163,15 +164,15 @@ struct escape_strings_fn {
constexpr char_utf8 const colon = ':'; // append colon
if (append_colon) write_char(colon, d_buffer, bytes);

if (!d_chars) d_offsets[idx] = bytes;
if (!d_chars) { d_sizes[idx] = bytes; }
}

std::unique_ptr<column> get_escaped_strings(column_view const& column_v,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
auto [offsets_column, chars] =
cudf::strings::detail::make_strings_children(*this, column_v.size(), stream, mr);
auto [offsets_column, chars] = cudf::strings::detail::experimental::make_strings_children(
*this, column_v.size(), stream, mr);

return make_strings_column(column_v.size(),
std::move(offsets_column),
Expand Down

0 comments on commit 4aabf51

Please sign in to comment.