Skip to content

Commit

Permalink
Make device_buffer streams explicit and enforce move construction (#8280
Browse files Browse the repository at this point in the history
)

In preparation for rapidsai/rmm#775, this PR has many changes to ensure streams on which device_buffers are allocated and copied are explicit. As a consequence, this PR also finds and fixes many places where buffers were copied that should have been moved.  In addition, it adds copy constructors that take an optional stream to all `cudf::scalar` classes.

This can and should be merged before rapidsai/rmm#775 to ensure no breakage of the build by that PR.

Authors:
  - Mark Harris (https://github.com/harrism)

Approvers:
  - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu)
  - Devavret Makkar (https://github.com/devavret)
  - Jason Lowe (https://github.com/jlowe)
  - Nghia Truong (https://github.com/ttnghia)

URL: #8280
  • Loading branch information
harrism authored May 26, 2021
1 parent cbbcba7 commit fa6e7e0
Show file tree
Hide file tree
Showing 43 changed files with 507 additions and 259 deletions.
2 changes: 1 addition & 1 deletion cpp/benchmarks/iterator/iterator_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ inline auto reduce_by_cub(OutputIterator result, InputIterator d_in, int num_ite
nullptr, temp_storage_bytes, d_in, result, num_items, cudf::DeviceSum{}, init);

// Allocate temporary storage
rmm::device_buffer d_temp_storage(temp_storage_bytes);
rmm::device_buffer d_temp_storage(temp_storage_bytes, rmm::cuda_stream_default);

// Run reduction
cub::DeviceReduce::Reduce(
Expand Down
9 changes: 6 additions & 3 deletions cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <cudf/table/table_view.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>

#include <type_traits>

Expand Down Expand Up @@ -186,10 +187,12 @@ void type_dispatcher_benchmark(::benchmark::State& state)
cudf::mutable_table_view source_table{source_columns};

// For no dispatching
std::vector<rmm::device_buffer> h_vec(n_cols,
rmm::device_buffer(source_size * sizeof(TypeParam)));
std::vector<rmm::device_buffer> h_vec(n_cols);
std::vector<TypeParam*> h_vec_p(n_cols);
for (int c = 0; c < n_cols; c++) { h_vec_p[c] = static_cast<TypeParam*>(h_vec[c].data()); }
std::transform(h_vec.begin(), h_vec.end(), h_vec_p.begin(), [source_size](auto& col) {
col.resize(source_size * sizeof(TypeParam), rmm::cuda_stream_default);
return static_cast<TypeParam*>(col.data());
});
rmm::device_uvector<TypeParam*> d_vec(n_cols, rmm::cuda_stream_default);

if (dispatching_type == NO_DISPATCHING) {
Expand Down
16 changes: 10 additions & 6 deletions cpp/docs/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -414,21 +414,25 @@ Allocates a specified number of bytes of untyped, uninitialized device memory us
`device_memory_resource`. If no resource is explicitly provided, uses
`rmm::mr::get_current_device_resource()`.
`rmm::device_buffer` is copyable and movable. A copy performs a deep copy of the `device_buffer`'s
device memory, whereas a move moves ownership of the device memory from one `device_buffer` to
another.
`rmm::device_buffer` is movable and copyable on a stream. A copy performs a deep copy of the
`device_buffer`'s device memory on the specified stream, whereas a move moves ownership of the
device memory from one `device_buffer` to another.
```c++
// Allocates at least 100 bytes of uninitialized device memory
// using the specified resource and stream
rmm::device_buffer buff(100, stream, mr);
void * raw_data = buff.data(); // Raw pointer to underlying device memory
rmm::device_buffer copy(buff); // Deep copies `buff` into `copy`
rmm::device_buffer moved_to(std::move(buff)); // Moves contents of `buff` into `moved_to`
// Deep copies `buff` into `copy` on `stream`
rmm::device_buffer copy(buff, stream);
// Moves contents of `buff` into `moved_to`
rmm::device_buffer moved_to(std::move(buff));
custom_memory_resource *mr...;
rmm::device_buffer custom_buff(100, mr); // Allocates 100 bytes from the custom_memory_resource
// Allocates 100 bytes from the custom_memory_resource
rmm::device_buffer custom_buff(100, mr, stream);
```

#### `rmm::device_scalar<T>`
Expand Down
34 changes: 15 additions & 19 deletions cpp/include/cudf/column/column.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,13 +49,6 @@ class column {
column& operator=(column const& other) = delete;
column& operator=(column&& other) = delete;

/**
* @brief Construct a new column by deep copying the contents of `other`.
*
* @param other The column to copy
*/
column(column const& other);

/**
* @brief Construct a new column object by deep copying the contents of
*`other`.
Expand All @@ -68,7 +61,7 @@ class column {
* @param mr Device memory resource to use for all device memory allocations
*/
column(column const& other,
rmm::cuda_stream_view stream,
rmm::cuda_stream_view stream = rmm::cuda_stream_view{},
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -165,18 +158,21 @@ class column {
/**
* @brief Sets the column's null value indicator bitmask to `new_null_mask`.
*
* @throws cudf::logic_error if new_null_count is larger than 0 and the size
* of `new_null_mask` does not match the size of this column.
*
* @param new_null_mask New null value indicator bitmask (lvalue overload &
* copied) to set the column's null value indicator mask. May be empty if
* `new_null_count` is 0 or `UNKOWN_NULL_COUNT`.
* @param new_null_count Optional, the count of null elements. If unknown,
* specify `UNKNOWN_NULL_COUNT` to indicate that the null count should be
* computed on the first invocation of `null_count()`.
* @throws cudf::logic_error if new_null_count is larger than 0 and the size of `new_null_mask`
* does not match the size of this column.
*
* @param new_null_mask New null value indicator bitmask (lvalue overload & copied) to set the
* column's null value indicator mask. May be empty if `new_null_count` is 0 or
* `UNKOWN_NULL_COUNT`.
* @param new_null_count Optional, the count of null elements. If unknown, specify
* `UNKNOWN_NULL_COUNT` to indicate that the null count should be computed on the first invocation
* of `null_count()`.
* @param stream The stream on which to perform the allocation and copy. Uses the default CUDA
* stream if none is specified.
*/
void set_null_mask(rmm::device_buffer const& new_null_mask,
size_type new_null_count = UNKNOWN_NULL_COUNT);
size_type new_null_count = UNKNOWN_NULL_COUNT,
rmm::cuda_stream_view stream = rmm::cuda_stream_view{});

/**
* @brief Updates the count of null elements.
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/lists/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -306,15 +306,15 @@ struct list_child_constructor {
auto const num_child_rows{
cudf::detail::get_value<size_type>(list_offsets, list_offsets.size() - 1, stream)};

auto const child_null_mask =
auto child_null_mask =
source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable()
? construct_child_nullmask(
list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr)
: std::make_pair(rmm::device_buffer{}, 0);

auto child_column = cudf::make_fixed_width_column(source_lists_column_view.child().type(),
num_child_rows,
child_null_mask.first,
std::move(child_null_mask.first),
child_null_mask.second,
stream,
mr);
Expand Down Expand Up @@ -595,7 +595,7 @@ struct list_child_constructor {
std::make_unique<column>(structs_list_offsets, stream, mr),
std::make_unique<column>(structs_member, stream, mr),
structs_list_null_count,
rmm::device_buffer(structs_list_nullmask),
rmm::device_buffer(structs_list_nullmask, stream),
stream,
mr);
};
Expand Down
Loading

0 comments on commit fa6e7e0

Please sign in to comment.