Skip to content

Commit

Permalink
Fix async synchronization issues in json_column.cu (#15497)
Browse files Browse the repository at this point in the history
Fixes #15390 
This change fixes async synchronization issues in json_column.cu. 
Related file json_tree.cu does not have async synchronization issues.

Summary of changes:
changed debug print async to sync, 
added synchronize after multiple async calls
changed h_chars to async since subsequent call is sync (it will also help because chars array is usually large).
changed is_str_column_all_nulls to sync.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Robert (Bobby) Evans (https://github.com/revans2)
  - Nghia Truong (https://github.com/ttnghia)

URL: #15497
  • Loading branch information
karthikeyann authored Apr 12, 2024
1 parent ff22a7a commit f19d4eb
Showing 1 changed file with 17 additions and 13 deletions.
30 changes: 17 additions & 13 deletions cpp/src/io/json/json_column.cu
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,16 @@ void print_tree(host_span<SymbolT const> input,
tree_meta_t const& d_gpu_tree,
rmm::cuda_stream_view stream)
{
print_vec(cudf::detail::make_std_vector_async(d_gpu_tree.node_categories, stream),
print_vec(cudf::detail::make_std_vector_sync(d_gpu_tree.node_categories, stream),
"node_categories",
to_cat);
print_vec(cudf::detail::make_std_vector_async(d_gpu_tree.parent_node_ids, stream),
print_vec(cudf::detail::make_std_vector_sync(d_gpu_tree.parent_node_ids, stream),
"parent_node_ids",
to_int);
print_vec(
cudf::detail::make_std_vector_async(d_gpu_tree.node_levels, stream), "node_levels", to_int);
auto node_range_begin = cudf::detail::make_std_vector_async(d_gpu_tree.node_range_begin, stream);
auto node_range_end = cudf::detail::make_std_vector_async(d_gpu_tree.node_range_end, stream);
cudf::detail::make_std_vector_sync(d_gpu_tree.node_levels, stream), "node_levels", to_int);
auto node_range_begin = cudf::detail::make_std_vector_sync(d_gpu_tree.node_range_begin, stream);
auto node_range_end = cudf::detail::make_std_vector_sync(d_gpu_tree.node_range_end, stream);
print_vec(node_range_begin, "node_range_begin", to_int);
print_vec(node_range_end, "node_range_end", to_int);
for (int i = 0; i < int(node_range_begin.size()); i++) {
Expand Down Expand Up @@ -333,10 +333,11 @@ rmm::device_uvector<NodeIndexT> get_values_column_indices(TreeDepthT const row_a
* @param stream CUDA stream
* @return Vector of strings
*/
std::vector<std::string> copy_strings_to_host(device_span<SymbolT const> input,
device_span<SymbolOffsetT const> node_range_begin,
device_span<SymbolOffsetT const> node_range_end,
rmm::cuda_stream_view stream)
std::vector<std::string> copy_strings_to_host_sync(
device_span<SymbolT const> input,
device_span<SymbolOffsetT const> node_range_begin,
device_span<SymbolOffsetT const> node_range_end,
rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();
auto const num_strings = node_range_begin.size();
Expand Down Expand Up @@ -371,12 +372,13 @@ std::vector<std::string> copy_strings_to_host(device_span<SymbolT const> input,
auto to_host = [stream](auto const& col) {
if (col.is_empty()) return std::vector<std::string>{};
auto const scv = cudf::strings_column_view(col);
auto const h_chars = cudf::detail::make_std_vector_sync<char>(
auto const h_chars = cudf::detail::make_std_vector_async<char>(
cudf::device_span<char const>(scv.chars_begin(stream), scv.chars_size(stream)), stream);
auto const h_offsets = cudf::detail::make_std_vector_sync(
auto const h_offsets = cudf::detail::make_std_vector_async(
cudf::device_span<cudf::size_type const>(scv.offsets().data<cudf::size_type>() + scv.offset(),
scv.size() + 1),
stream);
stream.synchronize();

// build std::string vector from chars and offsets
std::vector<std::string> host_data;
Expand Down Expand Up @@ -528,15 +530,17 @@ void make_device_json_column(device_span<SymbolT const> input,
auto column_range_beg =
cudf::detail::make_std_vector_async(d_column_tree.node_range_begin, stream);
auto max_row_offsets = cudf::detail::make_std_vector_async(d_max_row_offsets, stream);
std::vector<std::string> column_names = copy_strings_to_host(
std::vector<std::string> column_names = copy_strings_to_host_sync(
input, d_column_tree.node_range_begin, d_column_tree.node_range_end, stream);
stream.synchronize();
// array of arrays column names
if (is_array_of_arrays) {
TreeDepthT const row_array_children_level = is_enabled_lines ? 1 : 2;
auto values_column_indices =
get_values_column_indices(row_array_children_level, tree, col_ids, num_columns, stream);
auto h_values_column_indices =
cudf::detail::make_std_vector_async(values_column_indices, stream);
stream.synchronize();
std::transform(unique_col_ids.begin(),
unique_col_ids.end(),
column_names.begin(),
Expand Down Expand Up @@ -609,7 +613,7 @@ void make_device_json_column(device_span<SymbolT const> input,

std::vector<uint8_t> is_str_column_all_nulls{};
if (is_enabled_mixed_types_as_string) {
is_str_column_all_nulls = cudf::detail::make_std_vector_async(
is_str_column_all_nulls = cudf::detail::make_std_vector_sync(
is_all_nulls_each_column(input, d_column_tree, tree, col_ids, options, stream), stream);
}

Expand Down

0 comments on commit f19d4eb

Please sign in to comment.