From 204a09cb9de58485df2c931db9a56d19a6eda356 Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Thu, 22 Sep 2022 21:47:36 +0200 Subject: [PATCH] Reduces memory requirements in JSON parser and adds bytes/s and peak memory usage to benchmarks (#11732) This PR reduces memory requirements in the new nested JSON parser and adds `bytes_per_second` and `peak_memory_usage` usage to benchmarks Authors: - Elias Stehle (https://github.com/elstehle) Approvers: - Tobias Ribizel (https://github.com/upsj) - Karthikeyan (https://github.com/karthikeyann) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/11732 --- cpp/benchmarks/io/json/nested_json.cpp | 9 +++++- cpp/src/io/json/nested_json_gpu.cu | 38 ++++++++++++++------------ 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp index 1e84d0ee7c5..e2d4c3b77d8 100644 --- a/cpp/benchmarks/io/json/nested_json.cpp +++ b/cpp/benchmarks/io/json/nested_json.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -71,15 +72,21 @@ void BM_NESTED_JSON(nvbench::state& state) state.add_element_count(input.size()); // Run algorithm + auto const mem_stats_logger = cudf::memory_stats_logger(); state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { // Allocate device-side temporary storage & run algorithm cudf::io::json::detail::parse_nested_json(input, default_options, cudf::default_stream_value); }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(string_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); } NVBENCH_BENCH(BM_NESTED_JSON) .set_name("nested_json_gpu_parser") - .add_int64_power_of_two_axis("string_size", nvbench::range(20, 31, 1)); + .add_int64_power_of_two_axis("string_size", nvbench::range(20, 30, 1)); } // namespace cudf diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index 7e567aae9fe..552cd1e6167 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -1079,26 +1079,25 @@ std::pair, rmm::device_uvector> ge // Range of encapsulating function that parses to internal columnar data representation CUDF_FUNC_RANGE(); - rmm::device_uvector tokens{json_in.size(), stream, mr}; - rmm::device_uvector tokens_indices{json_in.size(), stream, mr}; - rmm::device_scalar num_written_tokens{stream, mr}; - auto const new_line_delimited_json = options.is_enabled_lines(); - // Memory holding the top-of-stack stack context for the input - rmm::device_uvector stack_op_indices{json_in.size(), stream}; - - // Identify what is the stack context for each input character (is it: JSON-root, struct, or list) - get_stack_context(json_in, stack_op_indices.data(), stream); - // Prepare for PDA transducer pass, merging input symbols with stack symbols - rmm::device_uvector pda_sgids{json_in.size(), stream}; - auto zip_in = thrust::make_zip_iterator(json_in.data(), stack_op_indices.data()); - thrust::transform(rmm::exec_policy(stream), - zip_in, - zip_in + json_in.size(), - pda_sgids.data(), - tokenizer_pda::PdaSymbolToSymbolGroupId{}); + rmm::device_uvector pda_sgids = [json_in, stream]() { + rmm::device_uvector pda_sgids{json_in.size(), stream}; + // Memory holding the top-of-stack stack context for the input + rmm::device_uvector stack_op_indices{json_in.size(), stream}; + + // Identify what is the stack context for each input character (JSON-root, struct, or list) + get_stack_context(json_in, stack_op_indices.data(), stream); + + auto zip_in = thrust::make_zip_iterator(json_in.data(), stack_op_indices.data()); + thrust::transform(rmm::exec_policy(stream), + zip_in, + zip_in + json_in.size(), + pda_sgids.data(), + tokenizer_pda::PdaSymbolToSymbolGroupId{}); + return pda_sgids; + }(); // PDA transducer alias using ToTokenStreamFstT = @@ -1118,6 +1117,9 @@ std::pair, rmm::device_uvector> ge stream}; // Perform a PDA-transducer pass + rmm::device_scalar num_written_tokens{stream}; + rmm::device_uvector tokens{json_in.size(), stream, mr}; + rmm::device_uvector tokens_indices{json_in.size(), stream, mr}; json_to_tokens_fst.Transduce(pda_sgids.begin(), static_cast(json_in.size()), tokens.data(), @@ -1126,7 +1128,7 @@ std::pair, rmm::device_uvector> ge tokenizer_pda::start_state, stream); - auto num_total_tokens = num_written_tokens.value(stream); + auto const num_total_tokens = num_written_tokens.value(stream); tokens.resize(num_total_tokens, stream); tokens_indices.resize(num_total_tokens, stream);