Skip to content

Commit

Permalink
change device_uvector to device_buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
karthikeyann committed Aug 9, 2024
1 parent 2c8de62 commit 7188095
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 20 deletions.
4 changes: 2 additions & 2 deletions cpp/include/cudf/io/detail/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ void write_json(data_sink* sink,
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource to use for device memory allocation
*/
void normalize_single_quotes(datasource::owning_buffer<rmm::device_uvector<char>>& indata,
void normalize_single_quotes(datasource::owning_buffer<rmm::device_buffer>& indata,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

Expand All @@ -72,7 +72,7 @@ void normalize_single_quotes(datasource::owning_buffer<rmm::device_uvector<char>
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource to use for device memory allocation
*/
void normalize_whitespace(datasource::owning_buffer<rmm::device_uvector<char>>& indata,
void normalize_whitespace(datasource::owning_buffer<rmm::device_buffer>& indata,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);
} // namespace io::json::detail
Expand Down
23 changes: 13 additions & 10 deletions cpp/src/io/json/json_normalization.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "io/fst/lookup_tables.cuh"

#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/io/detail/json.hpp>
#include <cudf/types.hpp>

Expand Down Expand Up @@ -298,10 +299,11 @@ struct TransduceToNormalizedWS {

namespace detail {

void normalize_single_quotes(datasource::owning_buffer<rmm::device_uvector<SymbolT>>& indata,
void normalize_single_quotes(datasource::owning_buffer<rmm::device_buffer>& indata,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
static constexpr std::int32_t min_out = 0;
static constexpr std::int32_t max_out = 2;
auto parser =
Expand All @@ -311,25 +313,26 @@ void normalize_single_quotes(datasource::owning_buffer<rmm::device_uvector<Symbo
normalize_quotes::TransduceToNormalizedQuotes{}),
stream);

rmm::device_uvector<SymbolT> outbuf(indata.size() * 2, stream, mr);
rmm::device_buffer outbuf(indata.size() * 2, stream, mr);
rmm::device_scalar<SymbolOffsetT> outbuf_size(stream, mr);
parser.Transduce(indata.data(),
parser.Transduce(reinterpret_cast<SymbolT const*>(indata.data()),
static_cast<SymbolOffsetT>(indata.size()),
outbuf.data(),
static_cast<SymbolT*>(outbuf.data()),
thrust::make_discard_iterator(),
outbuf_size.data(),
normalize_quotes::start_state,
stream);

outbuf.resize(outbuf_size.value(stream), stream);
datasource::owning_buffer<rmm::device_uvector<SymbolT>> outdata(std::move(outbuf));
datasource::owning_buffer<rmm::device_buffer> outdata(std::move(outbuf));
std::swap(indata, outdata);
}

void normalize_whitespace(datasource::owning_buffer<rmm::device_uvector<SymbolT>>& indata,
void normalize_whitespace(datasource::owning_buffer<rmm::device_buffer>& indata,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
static constexpr std::int32_t min_out = 0;
static constexpr std::int32_t max_out = 2;
auto parser =
Expand All @@ -339,18 +342,18 @@ void normalize_whitespace(datasource::owning_buffer<rmm::device_uvector<SymbolT>
normalize_whitespace::TransduceToNormalizedWS{}),
stream);

rmm::device_uvector<SymbolT> outbuf(indata.size(), stream, mr);
rmm::device_buffer outbuf(indata.size(), stream, mr);
rmm::device_scalar<SymbolOffsetT> outbuf_size(stream, mr);
parser.Transduce(indata.data(),
parser.Transduce(reinterpret_cast<SymbolT const*>(indata.data()),
static_cast<SymbolOffsetT>(indata.size()),
outbuf.data(),
static_cast<SymbolT*>(outbuf.data()),
thrust::make_discard_iterator(),
outbuf_size.data(),
normalize_whitespace::start_state,
stream);

outbuf.resize(outbuf_size.value(stream), stream);
datasource::owning_buffer<rmm::device_uvector<SymbolT>> outdata(std::move(outbuf));
datasource::owning_buffer<rmm::device_buffer> outdata(std::move(outbuf));
std::swap(indata, outdata);
}

Expand Down
16 changes: 8 additions & 8 deletions cpp/src/io/json/read_json.cu
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ size_t estimate_size_per_subchunk(size_t chunk_size)
* @param stream CUDA stream used for device memory operations and kernel launches
* @returns Data source owning buffer enclosing the bytes read
*/
datasource::owning_buffer<rmm::device_uvector<char>> get_record_range_raw_input(
datasource::owning_buffer<rmm::device_buffer> get_record_range_raw_input(
host_span<std::unique_ptr<datasource>> sources,
json_reader_options const& reader_opts,
rmm::cuda_stream_view stream)
Expand Down Expand Up @@ -200,8 +200,8 @@ datasource::owning_buffer<rmm::device_uvector<char>> get_record_range_raw_input(
? total_source_size * estimated_compression_ratio + header_size
: std::min(total_source_size, chunk_size + num_subchunks_prealloced * size_per_subchunk) +
num_extra_delimiters;
rmm::device_uvector<char> buffer(buffer_size, stream);
device_span<char> bufspan(buffer);
rmm::device_buffer buffer(buffer_size, stream);
device_span<char> bufspan(reinterpret_cast<char*>(buffer.data()), buffer.size());

// Offset within buffer indicating first read position
std::int64_t buffer_offset = 0;
Expand All @@ -213,8 +213,8 @@ datasource::owning_buffer<rmm::device_uvector<char>> get_record_range_raw_input(
chunk_offset == 0 ? 0 : find_first_delimiter(readbufspan, '\n', stream);
if (first_delim_pos == -1) {
// return empty owning datasource buffer
auto empty_buf = rmm::device_uvector<char>(0, stream);
return datasource::owning_buffer<rmm::device_uvector<char>>(std::move(empty_buf));
auto empty_buf = rmm::device_buffer(0, stream);
return datasource::owning_buffer<rmm::device_buffer>(std::move(empty_buf));
} else if (!should_load_all_sources) {
// Find next delimiter
std::int64_t next_delim_pos = -1;
Expand All @@ -232,12 +232,12 @@ datasource::owning_buffer<rmm::device_uvector<char>> get_record_range_raw_input(
}
if (next_delim_pos < buffer_offset) next_delim_pos = buffer_offset + readbufspan.size();

return datasource::owning_buffer<rmm::device_uvector<char>>(
return datasource::owning_buffer<rmm::device_buffer>(
std::move(buffer),
reinterpret_cast<uint8_t*>(buffer.data()) + first_delim_pos + shift_for_nonzero_offset,
next_delim_pos - first_delim_pos - shift_for_nonzero_offset);
}
return datasource::owning_buffer<rmm::device_uvector<char>>(
return datasource::owning_buffer<rmm::device_buffer>(
std::move(buffer),
reinterpret_cast<uint8_t*>(buffer.data()) + first_delim_pos + shift_for_nonzero_offset,
readbufspan.size() - first_delim_pos - shift_for_nonzero_offset);
Expand All @@ -249,7 +249,7 @@ table_with_metadata read_batch(host_span<std::unique_ptr<datasource>> sources,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
datasource::owning_buffer<rmm::device_uvector<char>> bufview =
datasource::owning_buffer<rmm::device_buffer> bufview =
get_record_range_raw_input(sources, reader_opts, stream);

// If input JSON buffer has single quotes and option to normalize single quotes is enabled,
Expand Down

0 comments on commit 7188095

Please sign in to comment.