Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Apply modifications to account for RAFT changes #1707

Merged
merged 21 commits into from
Aug 27, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cpp/cmake/thirdparty/get_cuhornet.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ function(find_and_configure_cuhornet)
# or to use CPM
FetchContent_Declare(
cuhornet
GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git
GIT_TAG 261399356e62bd76fa7628880f1a847aee713eed
GIT_REPOSITORY https://github.com/viclafargue/cuhornet.git
viclafargue marked this conversation as resolved.
Show resolved Hide resolved
GIT_TAG 004aa8a75ad131558c158bd4311dee1987e793c3
SOURCE_SUBDIR hornet
)
FetchContent_GetProperties(cuhornet)
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cugraph/compute_partition.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#include <cugraph/graph.hpp>

#include <rmm/thrust_rmm_allocator.h>
#include <rmm/exec_policy.hpp>
viclafargue marked this conversation as resolved.
Show resolved Hide resolved

namespace cugraph {
namespace detail {
Expand Down
11 changes: 6 additions & 5 deletions cpp/include/cugraph/detail/graph_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#include <cugraph/utilities/dataframe_buffer.cuh>
#include <cugraph/utilities/device_comm.cuh>

#include <rmm/thrust_rmm_allocator.h>
#include <raft/handle.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/sort.h>
#include <thrust/tabulate.h>
Expand Down Expand Up @@ -78,19 +78,20 @@ rmm::device_uvector<edge_t> compute_major_degrees(
[(detail::num_sparse_segments_per_vertex_partition + 2) * i +
detail::num_sparse_segments_per_vertex_partition]
: major_last;
thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
rmm::exec_policy execution_policy = handle.get_thrust_policy();
viclafargue marked this conversation as resolved.
Show resolved Hide resolved
thrust::transform(execution_policy,
thrust::make_counting_iterator(vertex_t{0}),
thrust::make_counting_iterator(major_hypersparse_first - major_first),
local_degrees.begin(),
[p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; });
if (use_dcs) {
auto p_dcs_nzd_vertices = (*adj_matrix_partition_dcs_nzd_vertices)[i];
auto dcs_nzd_vertex_count = (*adj_matrix_partition_dcs_nzd_vertex_counts)[i];
thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::fill(execution_policy,
local_degrees.begin() + (major_hypersparse_first - major_first),
local_degrees.begin() + (major_last - major_first),
edge_t{0});
thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::for_each(execution_policy,
thrust::make_counting_iterator(vertex_t{0}),
thrust::make_counting_iterator(dcs_nzd_vertex_count),
[p_offsets,
Expand Down Expand Up @@ -123,7 +124,7 @@ rmm::device_uvector<edge_t> compute_major_degrees(raft::handle_t const& handle,
vertex_t number_of_vertices)
{
rmm::device_uvector<edge_t> degrees(number_of_vertices, handle.get_stream());
thrust::tabulate(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::tabulate(rmm::exec_policy(handle.get_stream()),
viclafargue marked this conversation as resolved.
Show resolved Hide resolved
degrees.begin(),
degrees.end(),
[offsets] __device__(auto i) { return offsets[i + 1] - offsets[i]; });
Expand Down
73 changes: 73 additions & 0 deletions cpp/include/cugraph/prims/any_of_adj_matrix_row.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
viclafargue marked this conversation as resolved.
Show resolved Hide resolved
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cugraph/experimental/graph_view.hpp>
#include <cugraph/utilities/error.hpp>
#include <cugraph/utilities/host_scalar_comm.cuh>

#include <raft/handle.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/count.h>
#include <thrust/execution_policy.h>

namespace cugraph {
namespace experimental {

/**
* @brief Check any of graph adjacency matrix row properties satisfy the given predicate.
*
* Returns true if @p row_op returns true for at least once (in any process in multi-GPU), returns
* false otherwise. This function is inspired by thrust::any_of().
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row
* input properties.
* @tparam RowOp Type of the unary predicate operator.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row properties
* for the first (inclusive) row (assigned to this process in multi-GPU).
* `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first +
* @p graph_view.get_number_of_local_adj_matrix_partition_rows().
* @param row_op Unary predicate operator that takes *(@p adj_matrix_row_value_input_first + i)
* (where i = [0, @p graph_view.get_number_of_local_adj_matrix_partition_rows()) and returns either
* true or false.
* @return true If the predicate returns true at least once (in any process in multi-GPU).
* @return false If the predicate never returns true (in any process in multi-GPU).
*/
template <typename GraphViewType, typename AdjMatrixRowValueInputIterator, typename RowOp>
bool any_of_adj_matrix_row(raft::handle_t const& handle,
GraphViewType const& graph_view,
AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first,
RowOp row_op)
{
// better use thrust::any_of once https://github.com/thrust/thrust/issues/1016 is resolved
auto count = thrust::count_if(
rmm::exec_policy(handle.get_stream()),
adj_matrix_row_value_input_first,
adj_matrix_row_value_input_first + graph_view.get_number_of_local_adj_matrix_partition_rows(),
row_op);
if (GraphViewType::is_multi_gpu) {
count = host_scalar_allreduce(handle.get_comms(), count, handle.get_stream());
}
return (count > 0);
}

} // namespace experimental
} // namespace cugraph
22 changes: 11 additions & 11 deletions cpp/include/cugraph/prims/copy_to_adj_matrix_row_col.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
#include <cugraph/utilities/thrust_tuple_utils.cuh>
#include <cugraph/vertex_partition_device_view.cuh>

#include <rmm/thrust_rmm_allocator.h>
#include <raft/handle.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/copy.h>
#include <thrust/execution_policy.h>
Expand Down Expand Up @@ -98,7 +98,7 @@ void copy_to_matrix_major(raft::handle_t const& handle,
assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed
? graph_view.get_number_of_local_adj_matrix_partition_cols()
: graph_view.get_number_of_local_adj_matrix_partition_rows());
thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::copy(rmm::exec_policy(handle.get_stream()),
vertex_value_input_first,
vertex_value_input_first + graph_view.get_number_of_local_vertices(),
matrix_major_value_output_first);
Expand Down Expand Up @@ -169,7 +169,7 @@ void copy_to_matrix_major(raft::handle_t const& handle,
});
// FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a
// permutation iterator (and directly gathers to the internal buffer)
thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::gather(rmm::exec_policy(handle.get_stream()),
map_first,
map_first + thrust::distance(vertex_first, vertex_last),
vertex_value_input_first,
Expand All @@ -190,7 +190,7 @@ void copy_to_matrix_major(raft::handle_t const& handle,
// FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and
// directly scatters from the internal buffer)
thrust::scatter(
rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
rmm::exec_policy(handle.get_stream()),
rx_value_first,
rx_value_first + rx_counts[i],
map_first,
Expand All @@ -203,7 +203,7 @@ void copy_to_matrix_major(raft::handle_t const& handle,
// FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and
// directly scatters from the internal buffer)
thrust::scatter(
rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
rmm::exec_policy(handle.get_stream()),
rx_value_first,
rx_value_first + rx_counts[i],
map_first,
Expand All @@ -226,7 +226,7 @@ void copy_to_matrix_major(raft::handle_t const& handle,
? graph_view.get_number_of_local_adj_matrix_partition_cols()
: graph_view.get_number_of_local_adj_matrix_partition_rows());
auto val_first = thrust::make_permutation_iterator(vertex_value_input_first, vertex_first);
thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::scatter(rmm::exec_policy(handle.get_stream()),
val_first,
val_first + thrust::distance(vertex_first, vertex_last),
vertex_first,
Expand Down Expand Up @@ -290,7 +290,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed
? graph_view.get_number_of_local_adj_matrix_partition_rows()
: graph_view.get_number_of_local_adj_matrix_partition_cols());
thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::copy(rmm::exec_policy(handle.get_stream()),
vertex_value_input_first,
vertex_value_input_first + graph_view.get_number_of_local_vertices(),
matrix_minor_value_output_first);
Expand Down Expand Up @@ -360,7 +360,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
});
// FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a
// permutation iterator (and directly gathers to the internal buffer)
thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::gather(rmm::exec_policy(handle.get_stream()),
map_first,
map_first + thrust::distance(vertex_first, vertex_last),
vertex_value_input_first,
Expand All @@ -380,7 +380,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
});
// FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and
// directly scatters from the internal buffer)
thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::scatter(rmm::exec_policy(handle.get_stream()),
rx_value_first,
rx_value_first + rx_counts[i],
map_first,
Expand All @@ -392,7 +392,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
});
// FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and
// directly scatters from the internal buffer)
thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::scatter(rmm::exec_policy(handle.get_stream()),
rx_value_first,
rx_value_first + rx_counts[i],
map_first,
Expand All @@ -414,7 +414,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
assert(graph_view.get_number_of_local_vertices() ==
graph_view.get_number_of_local_adj_matrix_partition_rows());
auto val_first = thrust::make_permutation_iterator(vertex_value_input_first, vertex_first);
thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::scatter(rmm::exec_policy(handle.get_stream()),
val_first,
val_first + thrust::distance(vertex_first, vertex_last),
vertex_first,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
#include <cugraph/utilities/host_barrier.hpp>

#include <raft/cudart_utils.h>
#include <rmm/thrust_rmm_allocator.h>
#include <raft/handle.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/distance.h>
#include <thrust/functional.h>
Expand Down Expand Up @@ -439,12 +439,12 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,
}

if (GraphViewType::is_multi_gpu) {
thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::fill(rmm::exec_policy(handle.get_stream()),
minor_buffer_first,
minor_buffer_first + minor_tmp_buffer_size,
minor_init);
} else {
thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::fill(rmm::exec_policy(handle.get_stream()),
vertex_value_output_first,
vertex_value_output_first + graph_view.get_number_of_local_vertices(),
minor_init);
Expand Down Expand Up @@ -546,7 +546,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,
if constexpr (update_major) { // this is necessary as we don't visit every vertex in the
// hypersparse segment in
// for_all_major_for_all_nbr_hypersparse
thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
thrust::fill(handle.get_thrust_policy(),
output_buffer_first + (*segment_offsets)[3],
output_buffer_first + (*segment_offsets)[4],
major_init);
Expand Down
Loading