Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup hostdevice_vector and add more APIs #15252

Merged
merged 17 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions cpp/src/io/orc/reader_impl_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,11 @@ rmm::device_buffer decompress_stripe_data(
rmm::cuda_stream_view stream)
{
// Parse the columns' compressed info
cudf::detail::hostdevice_vector<gpu::CompressedStreamInfo> compinfo(
0, stream_info.size(), stream);
for (auto const& info : stream_info) {
compinfo.push_back(gpu::CompressedStreamInfo(
static_cast<uint8_t const*>(stripe_data[info.stripe_idx].data()) + info.dst_pos,
info.length));
cudf::detail::hostdevice_vector<gpu::CompressedStreamInfo> compinfo(stream_info.size(), stream);
for (std::size_t idx = 0; idx < stream_info.size(); ++idx) {
auto const& info = stream_info[idx];
compinfo[idx] = gpu::CompressedStreamInfo(
static_cast<uint8_t const*>(stripe_data[info.stripe_idx].data()) + info.dst_pos, info.length);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you kidding me, this was it?!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's wrong with it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nothing, I just thought (feared) bigger changes were needed to remove push_back.

}
compinfo.host_to_device_async(stream);

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/parquet/reader_impl_chunking.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1392,7 +1392,7 @@ void reader::impl::setup_next_subpass(bool uses_custom_row_bounds)
// copy the appropriate subset of pages from each column and store the mapping back to the source
// (pass) pages
else {
subpass.page_buf = cudf::detail::hostdevice_vector<PageInfo>(total_pages, total_pages, _stream);
subpass.page_buf = cudf::detail::hostdevice_vector<PageInfo>(total_pages, _stream);
subpass.page_src_index = rmm::device_uvector<size_t>(total_pages, _stream);
auto iter = thrust::make_counting_iterator(0);
rmm::device_uvector<size_t> dst_offsets(num_columns + 1, _stream);
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/io/parquet/reader_impl_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -514,8 +514,7 @@ cudf::detail::hostdevice_vector<PageInfo> sort_pages(device_span<PageInfo const>
page_keys.end(),
sort_indices.begin(),
thrust::less<int>());
auto pass_pages =
cudf::detail::hostdevice_vector<PageInfo>(unsorted_pages.size(), unsorted_pages.size(), stream);
auto pass_pages = cudf::detail::hostdevice_vector<PageInfo>(unsorted_pages.size(), stream);
thrust::transform(
rmm::exec_policy_nosync(stream),
sort_indices.begin(),
Expand Down
38 changes: 10 additions & 28 deletions cpp/src/io/utilities/hostdevice_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,9 @@
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/host/host_memory_resource.hpp>

#include <thrust/host_vector.h>

#include <variant>

namespace cudf::detail {

/**
Expand All @@ -52,31 +48,12 @@ class hostdevice_vector {
hostdevice_vector() : hostdevice_vector(0, cudf::get_default_stream()) {}

explicit hostdevice_vector(size_t size, rmm::cuda_stream_view stream)
: hostdevice_vector(size, size, stream)
{
}

explicit hostdevice_vector(size_t initial_size, size_t max_size, rmm::cuda_stream_view stream)
: h_data({cudf::io::get_host_memory_resource(), stream}), d_data(0, stream)
{
CUDF_EXPECTS(initial_size <= max_size, "initial_size cannot be larger than max_size");

h_data.reserve(max_size);
h_data.resize(initial_size);

current_size = initial_size;
d_data.resize(max_size, stream);
}

void push_back(T const& data)
: h_data{size, rmm_host_allocator<T>{cudf::io::get_host_memory_resource(), stream}},
d_data{size, stream}
{
CUDF_EXPECTS(size() < capacity(),
"Cannot insert data into hostdevice_vector because capacity has been exceeded.");
h_data[current_size++] = data;
}

[[nodiscard]] size_t capacity() const noexcept { return d_data.size(); }
[[nodiscard]] size_t size() const noexcept { return current_size; }
[[nodiscard]] size_t size() const noexcept { return h_data.size(); }
[[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); }
[[nodiscard]] bool empty() const noexcept { return size() == 0; }

Expand All @@ -92,6 +69,12 @@ class hostdevice_vector {
[[nodiscard]] T* end() { return host_ptr(size()); }
[[nodiscard]] T const* end() const { return host_ptr(size()); }

[[nodiscard]] T& front() { return h_data.front(); }
[[nodiscard]] T const& front() const { return front(); }

[[nodiscard]] T& back() { return h_data.back(); }
[[nodiscard]] T const& back() const { return back(); }

[[nodiscard]] T* device_ptr(size_t offset = 0) { return d_data.data() + offset; }
[[nodiscard]] T const* device_ptr(size_t offset = 0) const { return d_data.data() + offset; }

Expand Down Expand Up @@ -175,7 +158,6 @@ class hostdevice_vector {

private:
cudf::detail::rmm_host_vector<T> h_data;
size_t current_size = 0;
rmm::device_uvector<T> d_data;
};

Expand Down
Loading