Skip to content

Commit

Permalink
Performance improvement in JSON Tree traversal (#11919)
Browse files Browse the repository at this point in the history
This PR improves performance of JSON Tree traversal - mainly in creation of column id.
- Replaced per-level processing with two-level hash algorithm
- Reduced memory usage for hash map (reduced oversubscription)

Other changes are
- Fail if tokens has error token in tree generation
- Created device_span version of device_parse_nested_json

Hits 2 GB/s in GV100 from 128MB json.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Tobias Ribizel (https://github.com/upsj)
  - Nghia Truong (https://github.com/ttnghia)

URL: #11919
  • Loading branch information
karthikeyann authored Oct 28, 2022
1 parent c915523 commit aaf251d
Show file tree
Hide file tree
Showing 4 changed files with 296 additions and 315 deletions.
20 changes: 15 additions & 5 deletions cpp/src/io/json/json_column.cu
Original file line number Diff line number Diff line change
Expand Up @@ -722,24 +722,22 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> device_json_co
}
}

table_with_metadata device_parse_nested_json(host_span<SymbolT const> input,
table_with_metadata device_parse_nested_json(device_span<SymbolT const> d_input,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();

// Allocate device memory for the JSON input & copy over to device
rmm::device_uvector<SymbolT> d_input = cudf::detail::make_device_uvector_async(input, stream);

auto gpu_tree = [&]() {
// Parse the JSON and get the token stream
const auto [tokens_gpu, token_indices_gpu] = get_token_stream(d_input, options, stream);
// gpu tree generation
return get_tree_representation(tokens_gpu, token_indices_gpu, stream);
}(); // IILE used to free memory of token data.
#ifdef NJP_DEBUG_PRINT
print_tree(input, gpu_tree, stream);
auto h_input = cudf::detail::make_host_vector_async(d_input, stream);
print_tree(h_input, gpu_tree, stream);
#endif

auto [gpu_col_id, gpu_row_offsets] = records_orient_tree_traversal(d_input, gpu_tree, stream);
Expand Down Expand Up @@ -841,5 +839,17 @@ table_with_metadata device_parse_nested_json(host_span<SymbolT const> input,
{{}, out_column_names}};
}

table_with_metadata device_parse_nested_json(host_span<SymbolT const> input,
cudf::io::json_reader_options const& options,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();

// Allocate device memory for the JSON input & copy over to device
rmm::device_uvector<SymbolT> d_input = cudf::detail::make_device_uvector_async(input, stream);

return device_parse_nested_json(device_span<SymbolT const>{d_input}, options, stream, mr);
}
} // namespace detail
} // namespace cudf::io::json
Loading

0 comments on commit aaf251d

Please sign in to comment.