Skip to content

Commit

Permalink
Reduces memory requirements in JSON parser and adds bytes/s and peak …
Browse files Browse the repository at this point in the history
…memory usage to benchmarks (#11732)

This PR reduces memory requirements in the new nested JSON parser and adds `bytes_per_second` and `peak_memory_usage` usage to benchmarks

Authors:
  - Elias Stehle (https://github.com/elstehle)

Approvers:
  - Tobias Ribizel (https://github.com/upsj)
  - Karthikeyan (https://github.com/karthikeyann)
  - Yunsong Wang (https://github.com/PointKernel)

URL: #11732
  • Loading branch information
elstehle authored Sep 22, 2022
1 parent 25e5e17 commit 204a09c
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 19 deletions.
9 changes: 8 additions & 1 deletion cpp/benchmarks/io/json/nested_json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/fixture/rmm_pool_raii.hpp>

#include <nvbench/nvbench.cuh>
Expand Down Expand Up @@ -71,15 +72,21 @@ void BM_NESTED_JSON(nvbench::state& state)
state.add_element_count(input.size());

// Run algorithm
auto const mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
// Allocate device-side temporary storage & run algorithm
cudf::io::json::detail::parse_nested_json(input, default_options, cudf::default_stream_value);
});

auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
state.add_element_count(static_cast<double>(string_size) / time, "bytes_per_second");
state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
}

NVBENCH_BENCH(BM_NESTED_JSON)
.set_name("nested_json_gpu_parser")
.add_int64_power_of_two_axis("string_size", nvbench::range(20, 31, 1));
.add_int64_power_of_two_axis("string_size", nvbench::range(20, 30, 1));

} // namespace cudf
38 changes: 20 additions & 18 deletions cpp/src/io/json/nested_json_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1079,26 +1079,25 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
// Range of encapsulating function that parses to internal columnar data representation
CUDF_FUNC_RANGE();

rmm::device_uvector<PdaTokenT> tokens{json_in.size(), stream, mr};
rmm::device_uvector<SymbolOffsetT> tokens_indices{json_in.size(), stream, mr};
rmm::device_scalar<SymbolOffsetT> num_written_tokens{stream, mr};

auto const new_line_delimited_json = options.is_enabled_lines();

// Memory holding the top-of-stack stack context for the input
rmm::device_uvector<StackSymbolT> stack_op_indices{json_in.size(), stream};

// Identify what is the stack context for each input character (is it: JSON-root, struct, or list)
get_stack_context(json_in, stack_op_indices.data(), stream);

// Prepare for PDA transducer pass, merging input symbols with stack symbols
rmm::device_uvector<PdaSymbolGroupIdT> pda_sgids{json_in.size(), stream};
auto zip_in = thrust::make_zip_iterator(json_in.data(), stack_op_indices.data());
thrust::transform(rmm::exec_policy(stream),
zip_in,
zip_in + json_in.size(),
pda_sgids.data(),
tokenizer_pda::PdaSymbolToSymbolGroupId{});
rmm::device_uvector<PdaSymbolGroupIdT> pda_sgids = [json_in, stream]() {
rmm::device_uvector<PdaSymbolGroupIdT> pda_sgids{json_in.size(), stream};
// Memory holding the top-of-stack stack context for the input
rmm::device_uvector<StackSymbolT> stack_op_indices{json_in.size(), stream};

// Identify what is the stack context for each input character (JSON-root, struct, or list)
get_stack_context(json_in, stack_op_indices.data(), stream);

auto zip_in = thrust::make_zip_iterator(json_in.data(), stack_op_indices.data());
thrust::transform(rmm::exec_policy(stream),
zip_in,
zip_in + json_in.size(),
pda_sgids.data(),
tokenizer_pda::PdaSymbolToSymbolGroupId{});
return pda_sgids;
}();

// PDA transducer alias
using ToTokenStreamFstT =
Expand All @@ -1118,6 +1117,9 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
stream};

// Perform a PDA-transducer pass
rmm::device_scalar<SymbolOffsetT> num_written_tokens{stream};
rmm::device_uvector<PdaTokenT> tokens{json_in.size(), stream, mr};
rmm::device_uvector<SymbolOffsetT> tokens_indices{json_in.size(), stream, mr};
json_to_tokens_fst.Transduce(pda_sgids.begin(),
static_cast<SymbolOffsetT>(json_in.size()),
tokens.data(),
Expand All @@ -1126,7 +1128,7 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
tokenizer_pda::start_state,
stream);

auto num_total_tokens = num_written_tokens.value(stream);
auto const num_total_tokens = num_written_tokens.value(stream);
tokens.resize(num_total_tokens, stream);
tokens_indices.resize(num_total_tokens, stream);

Expand Down

0 comments on commit 204a09c

Please sign in to comment.