Skip to content

Commit

Permalink
Update thrust/RMM deprecated calls (#1789)
Browse files Browse the repository at this point in the history
PR essentially backports the fixes in #1707 for the deprecated RMM exec policy, and unpins the RMM nighlty to unblock local builds and CI and local development. Conflicts with the bigger PR should be minimal, and mainly around a single change:

```
rmm::exec_policy(handle.get_stream())
```

to 

```
handle.get_thrust_policy()
```

Authors:
  - Dante Gama Dessavre (https://github.com/dantegd)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Seunghwa Kang (https://github.com/seunghwak)

URL: #1789
  • Loading branch information
dantegd authored Aug 25, 2021
1 parent fc0aef7 commit 80a3459
Show file tree
Hide file tree
Showing 83 changed files with 357 additions and 381 deletions.
5 changes: 2 additions & 3 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,12 @@ gpuci_logger "Activate conda env"
. /opt/conda/etc/profile.d/conda.sh
conda activate rapids

# FIXME: return librmm and RMM to ${MINOR_VERSION}
gpuci_logger "Install dependencies"
gpuci_mamba_retry install -y \
"libcudf=${MINOR_VERSION}" \
"cudf=${MINOR_VERSION}" \
"librmm=21.10.00a210812" \
"rmm=21.10.00a210813" \
"librmm=${MINOR_VERSION}" \
"rmm=${MINOR_VERSION}" \
"cudatoolkit=$CUDA_REL" \
"dask-cudf=${MINOR_VERSION}" \
"dask-cuda=${MINOR_VERSION}" \
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cugraph_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ dependencies:
- cudatoolkit=11.0
- cudf=21.10.*
- libcudf=21.10.*
- rmm=21.10.00a210813
- librmm=21.10.00a210812
- rmm=21.10.*
- librmm=21.10.*
- dask>=2021.6.0
- distributed>=2021.6.0
- dask-cuda=21.10.*
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cugraph_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ dependencies:
- cudatoolkit=11.2
- cudf=21.10.*
- libcudf=21.10.*
- rmm=21.10.00a210813
- librmm=21.10.00a210812
- rmm=21.10.*
- librmm=21.10.*
- dask>=2021.6.0
- distributed>=2021.6.0
- dask-cuda=21.10.*
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cugraph_dev_cuda11.4.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ dependencies:
- cudatoolkit=11.4
- cudf=21.10.*
- libcudf=21.10.*
- rmm=21.10.00a210813
- librmm=21.10.00a210812
- rmm=21.10.*
- librmm=21.10.*
- dask>=2021.6.0
- distributed>=2021.6.0
- dask-cuda=21.10.*
Expand Down
3 changes: 1 addition & 2 deletions conda/recipes/libcugraph/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,11 @@ build:
- CMAKE_CXX_COMPILER_LAUNCHER
- CMAKE_CUDA_COMPILER_LAUNCHER

# FIXME: return librmm to {{ minor_version }}.*
requirements:
build:
- cmake>=3.20.1
- cudatoolkit {{ cuda_version }}.*
- librmm=21.10.00a210812
- librmm {{ minor_version }}.*
- boost-cpp>=1.66
- nccl>=2.9.9
- ucx-proc=*=gpu
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_cuhornet.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function(find_and_configure_cuhornet)
FetchContent_Declare(
cuhornet
GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git
GIT_TAG 261399356e62bd76fa7628880f1a847aee713eed
GIT_TAG 4a1daa18405c0242370e16ce302dfa7eb5d9e857
SOURCE_SUBDIR hornet
)
FetchContent_GetProperties(cuhornet)
Expand Down
6 changes: 2 additions & 4 deletions cpp/cmake/thirdparty/get_rmm.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,14 @@ function(find_and_configure_rmm VERSION)
return()
endif()

# FIXME: turn GIT_SHALLOW back to TRUE when changing GIT_TAG back
# to branch-${MAJOR_AND_MINOR}
rapids_cpm_find(rmm ${VERSION}
GLOBAL_TARGETS rmm::rmm
BUILD_EXPORT_SET cugraph-exports
INSTALL_EXPORT_SET cugraph-exports
CPM_ARGS
GIT_REPOSITORY https://github.com/rapidsai/rmm.git
GIT_TAG 23bbe745af1d988224b5498f7b8e3fe3720532d4
GIT_SHALLOW FALSE
GIT_TAG branch-${MAJOR_AND_MINOR}
GIT_SHALLOW TRUE
OPTIONS "BUILD_TESTS OFF"
"BUILD_BENCHMARKS OFF"
"CUDA_STATIC_RUNTIME ${CUDA_STATIC_RUNTIME}"
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cugraph/compute_partition.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#include <cugraph/graph.hpp>

#include <rmm/thrust_rmm_allocator.h>
#include <rmm/exec_policy.hpp>

namespace cugraph {
namespace detail {
Expand Down
10 changes: 5 additions & 5 deletions cpp/include/cugraph/detail/graph_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#include <cugraph/utilities/dataframe_buffer.cuh>
#include <cugraph/utilities/device_comm.cuh>

#include <rmm/thrust_rmm_allocator.h>
#include <raft/handle.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/sort.h>
#include <thrust/tabulate.h>
Expand Down Expand Up @@ -78,19 +78,19 @@ rmm::device_uvector<edge_t> compute_major_degrees(
[(detail::num_sparse_segments_per_vertex_partition + 2) * i +
detail::num_sparse_segments_per_vertex_partition]
: major_last;
thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::transform(rmm::exec_policy(handle.get_stream()),
thrust::make_counting_iterator(vertex_t{0}),
thrust::make_counting_iterator(major_hypersparse_first - major_first),
local_degrees.begin(),
[p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; });
if (use_dcs) {
auto p_dcs_nzd_vertices = (*adj_matrix_partition_dcs_nzd_vertices)[i];
auto dcs_nzd_vertex_count = (*adj_matrix_partition_dcs_nzd_vertex_counts)[i];
thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::fill(rmm::exec_policy(handle.get_stream()),
local_degrees.begin() + (major_hypersparse_first - major_first),
local_degrees.begin() + (major_last - major_first),
edge_t{0});
thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::for_each(rmm::exec_policy(handle.get_stream()),
thrust::make_counting_iterator(vertex_t{0}),
thrust::make_counting_iterator(dcs_nzd_vertex_count),
[p_offsets,
Expand Down Expand Up @@ -123,7 +123,7 @@ rmm::device_uvector<edge_t> compute_major_degrees(raft::handle_t const& handle,
vertex_t number_of_vertices)
{
rmm::device_uvector<edge_t> degrees(number_of_vertices, handle.get_stream());
thrust::tabulate(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::tabulate(rmm::exec_policy(handle.get_stream()),
degrees.begin(),
degrees.end(),
[offsets] __device__(auto i) { return offsets[i + 1] - offsets[i]; });
Expand Down
22 changes: 11 additions & 11 deletions cpp/include/cugraph/prims/copy_to_adj_matrix_row_col.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
#include <cugraph/utilities/thrust_tuple_utils.cuh>
#include <cugraph/vertex_partition_device_view.cuh>

#include <rmm/thrust_rmm_allocator.h>
#include <raft/handle.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/copy.h>
#include <thrust/execution_policy.h>
Expand Down Expand Up @@ -98,7 +98,7 @@ void copy_to_matrix_major(raft::handle_t const& handle,
assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed
? graph_view.get_number_of_local_adj_matrix_partition_cols()
: graph_view.get_number_of_local_adj_matrix_partition_rows());
thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::copy(rmm::exec_policy(handle.get_stream()),
vertex_value_input_first,
vertex_value_input_first + graph_view.get_number_of_local_vertices(),
matrix_major_value_output_first);
Expand Down Expand Up @@ -169,7 +169,7 @@ void copy_to_matrix_major(raft::handle_t const& handle,
});
// FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a
// permutation iterator (and directly gathers to the internal buffer)
thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::gather(rmm::exec_policy(handle.get_stream()),
map_first,
map_first + thrust::distance(vertex_first, vertex_last),
vertex_value_input_first,
Expand All @@ -190,7 +190,7 @@ void copy_to_matrix_major(raft::handle_t const& handle,
// FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and
// directly scatters from the internal buffer)
thrust::scatter(
rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
rmm::exec_policy(handle.get_stream()),
rx_value_first,
rx_value_first + rx_counts[i],
map_first,
Expand All @@ -203,7 +203,7 @@ void copy_to_matrix_major(raft::handle_t const& handle,
// FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and
// directly scatters from the internal buffer)
thrust::scatter(
rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
rmm::exec_policy(handle.get_stream()),
rx_value_first,
rx_value_first + rx_counts[i],
map_first,
Expand All @@ -226,7 +226,7 @@ void copy_to_matrix_major(raft::handle_t const& handle,
? graph_view.get_number_of_local_adj_matrix_partition_cols()
: graph_view.get_number_of_local_adj_matrix_partition_rows());
auto val_first = thrust::make_permutation_iterator(vertex_value_input_first, vertex_first);
thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::scatter(rmm::exec_policy(handle.get_stream()),
val_first,
val_first + thrust::distance(vertex_first, vertex_last),
vertex_first,
Expand Down Expand Up @@ -290,7 +290,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed
? graph_view.get_number_of_local_adj_matrix_partition_rows()
: graph_view.get_number_of_local_adj_matrix_partition_cols());
thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::copy(rmm::exec_policy(handle.get_stream()),
vertex_value_input_first,
vertex_value_input_first + graph_view.get_number_of_local_vertices(),
matrix_minor_value_output_first);
Expand Down Expand Up @@ -360,7 +360,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
});
// FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a
// permutation iterator (and directly gathers to the internal buffer)
thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::gather(rmm::exec_policy(handle.get_stream()),
map_first,
map_first + thrust::distance(vertex_first, vertex_last),
vertex_value_input_first,
Expand All @@ -380,7 +380,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
});
// FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and
// directly scatters from the internal buffer)
thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::scatter(rmm::exec_policy(handle.get_stream()),
rx_value_first,
rx_value_first + rx_counts[i],
map_first,
Expand All @@ -392,7 +392,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
});
// FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and
// directly scatters from the internal buffer)
thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::scatter(rmm::exec_policy(handle.get_stream()),
rx_value_first,
rx_value_first + rx_counts[i],
map_first,
Expand All @@ -414,7 +414,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
assert(graph_view.get_number_of_local_vertices() ==
graph_view.get_number_of_local_adj_matrix_partition_rows());
auto val_first = thrust::make_permutation_iterator(vertex_value_input_first, vertex_first);
thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::scatter(rmm::exec_policy(handle.get_stream()),
val_first,
val_first + thrust::distance(vertex_first, vertex_last),
vertex_first,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
#include <cugraph/utilities/host_barrier.hpp>

#include <raft/cudart_utils.h>
#include <rmm/thrust_rmm_allocator.h>
#include <raft/handle.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/distance.h>
#include <thrust/functional.h>
Expand Down Expand Up @@ -439,12 +439,12 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,
}

if (GraphViewType::is_multi_gpu) {
thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::fill(rmm::exec_policy(handle.get_stream()),
minor_buffer_first,
minor_buffer_first + minor_tmp_buffer_size,
minor_init);
} else {
thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::fill(rmm::exec_policy(handle.get_stream()),
vertex_value_output_first,
vertex_value_output_first + graph_view.get_number_of_local_vertices(),
minor_init);
Expand Down Expand Up @@ -546,7 +546,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,
if constexpr (update_major) { // this is necessary as we don't visit every vertex in the
// hypersparse segment in
// for_all_major_for_all_nbr_hypersparse
thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::fill(rmm::exec_policy(handle.get_stream()),
output_buffer_first + (*segment_offsets)[3],
output_buffer_first + (*segment_offsets)[4],
major_init);
Expand Down
Loading

0 comments on commit 80a3459

Please sign in to comment.