Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ingest_raw_data performance issue in Nested JSON reader due to RVO #12070

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 36 additions & 10 deletions cpp/src/io/json/experimental/read_json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,64 @@
#include <io/comp/io_uncomp.hpp>
#include <io/json/nested_json.hpp>

#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/utilities/error.hpp>

#include <numeric>

namespace cudf::io::detail::json::experimental {

std::vector<uint8_t> ingest_raw_input(host_span<std::unique_ptr<datasource>> sources,
compression_type compression)
size_t sources_size(host_span<std::unique_ptr<datasource>> const sources,
size_t range_offset,
size_t range_size)
{
auto const total_source_size =
std::accumulate(sources.begin(), sources.end(), 0ul, [](size_t sum, auto& source) {
return sum + source->size();
});
auto buffer = std::vector<uint8_t>(total_source_size);
return std::accumulate(sources.begin(), sources.end(), 0ul, [=](size_t sum, auto& source) {
auto const size = source->size();
// TODO take care of 0, 0, or *, 0 case.
return sum +
(range_size == 0 or range_offset + range_size > size ? size - range_offset : range_size);
});
}

std::vector<uint8_t> ingest_raw_input(host_span<std::unique_ptr<datasource>> const& sources,
compression_type compression,
size_t range_offset,
size_t range_size)
{
CUDF_FUNC_RANGE();
// Iterate through the user defined sources and read the contents into the local buffer
auto const total_source_size = sources_size(sources, range_offset, range_size);
auto buffer = std::vector<uint8_t>(total_source_size);

size_t bytes_read = 0;
for (const auto& source : sources) {
bytes_read += source->host_read(0, source->size(), buffer.data() + bytes_read);
if (!source->is_empty()) {
auto data_size = (range_size != 0) ? range_size : source->size();
auto destination = buffer.data() + bytes_read;
bytes_read += source->host_read(range_offset, data_size, destination);
}
}

return (compression == compression_type::NONE) ? buffer : decompress(compression, buffer);
if (compression == compression_type::NONE) {
return buffer;
} else {
return decompress(compression, buffer);
}
Comment on lines +60 to +64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, I also didn't know that ternary expression could prevent copy elision
Ref: https://stackoverflow.com/questions/22078029/why-does-the-ternary-operator-prevent-return-value-optimization

If possible, can you make a simple example in goldbolt (class printing to stdout when constructed/copied) to demonstrate that, please?

Copy link
Contributor Author

@karthikeyann karthikeyann Nov 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://godbolt.org/z/1vzPqqzs4
Here is a sample code.
tertiary operator calls vector(vector const&) for buffer return
but if() return buffer; calls vector(vector&&).

Reason for this could be - for tertiary operator, one is lvalue (buffer), and the other is rvalue (return value of another function).
So, for lvalue, it makes call to copy constructor.
For Rvalue, it called move constructor.

}

table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
json_reader_options const& reader_opts,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
CUDF_EXPECTS(reader_opts.get_byte_range_offset() == 0 and reader_opts.get_byte_range_size() == 0,
"specifying a byte range is not yet supported");

auto const buffer = ingest_raw_input(sources, reader_opts.get_compression());
auto const buffer = ingest_raw_input(sources,
reader_opts.get_compression(),
reader_opts.get_byte_range_offset(),
reader_opts.get_byte_range_size());
auto data = host_span<char const>(reinterpret_cast<char const*>(buffer.data()), buffer.size());

try {
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/io/json/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <io/utilities/type_conversion.hpp>

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/detail/utilities/visitor_overload.hpp>
#include <cudf/groupby.hpp>
Expand Down Expand Up @@ -222,6 +223,7 @@ std::vector<uint8_t> ingest_raw_input(std::vector<std::unique_ptr<datasource>> c
size_t range_size,
size_t range_size_padded)
{
CUDF_FUNC_RANGE();
// Iterate through the user defined sources and read the contents into the local buffer
size_t total_source_size = 0;
for (const auto& source : sources) {
Expand Down Expand Up @@ -313,6 +315,7 @@ rmm::device_uvector<char> upload_data_to_device(json_reader_options const& reade
rmm::device_uvector<uint64_t>& rec_starts,
rmm::cuda_stream_view stream)
{
CUDF_FUNC_RANGE();
size_t end_offset = h_data.size();

// Trim lines that are outside range
Expand Down Expand Up @@ -592,6 +595,7 @@ table_with_metadata read_json(std::vector<std::unique_ptr<datasource>>& sources,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
if (reader_opts.is_enabled_experimental()) {
return experimental::read_json(sources, reader_opts, stream, mr);
}
Expand Down