Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Graph primitives API updates #2220

Merged
merged 10 commits into from
Apr 19, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,

auto execution_policy = handle.get_thrust_policy();
if constexpr (GraphViewType::is_multi_gpu) {
minor_tmp_buffer.fill(minor_init, handle.get_stream());
minor_tmp_buffer.fill(handle, minor_init);
} else {
thrust::fill(execution_policy,
vertex_value_output_first,
Expand Down
71 changes: 29 additions & 42 deletions cpp/include/cugraph/prims/count_if_v.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,24 @@

namespace cugraph {

namespace detail {

template <typename vertex_t, typename VertexValueInputIterator, typename VertexOp>
struct count_if_call_v_op_t {
vertex_t local_vertex_partition_range_first{};
VertexValueInputIterator vertex_value_input_first{};
VertexOp v_op{};

__device__ bool operator()(vertex_t i)
{
return v_op(local_vertex_partition_range_first + i, *(vertex_value_input_first + i))
? vertex_t{1}
: vertex_t{0};
}
};

} // namespace detail

/**
* @brief Count the number of vertices that satisfies the given predicate.
*
Expand All @@ -42,8 +60,8 @@ namespace cugraph {
* @param vertex_value_input_first Iterator pointing to the vertex properties for the first
* (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive)
* is deduced as @p vertex_value_input_first + @p graph_view.local_vertex_partition_range_size().
* @param v_op Unary operator takes *(@p vertex_value_input_first + i) (where i is [0, @p
* graph_view.local_vertex_partition_range_size())) and returns true if this vertex should be
* @param v_op Binary operator takes vertex ID and *(@p vertex_value_input_first + i) (where i is
* [0, @p graph_view.local_vertex_partition_range_size())) and returns true if this vertex should be
* included in the returned count.
* @return GraphViewType::vertex_type Number of times @p v_op returned true.
*/
Expand All @@ -53,47 +71,16 @@ typename GraphViewType::vertex_type count_if_v(raft::handle_t const& handle,
VertexValueInputIterator vertex_value_input_first,
VertexOp v_op)
{
auto count =
thrust::count_if(handle.get_thrust_policy(),
vertex_value_input_first,
vertex_value_input_first + graph_view.local_vertex_partition_range_size(),
v_op);
if (GraphViewType::is_multi_gpu) {
count =
host_scalar_allreduce(handle.get_comms(), count, raft::comms::op_t::SUM, handle.get_stream());
}
return count;
}
using vertex_t = typename GraphViewType::vertex_type;

/**
* @brief Count the number of vertices that satisfies the given predicate.
*
* This version (conceptually) iterates over only a subset of the graph vertices. This function
* actually works as thrust::count_if() on [@p input_first, @p input_last) (followed by
* inter-process reduction in multi-GPU). @p input_last - @p input_first (or the sum of @p
* input_last - @p input_first values in multi-GPU) should not overflow GraphViewType::vertex_type.
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @tparam InputIterator Type of the iterator for input values.
* @tparam VertexOp VertexOp Type of the unary predicate operator.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @param input_first Iterator pointing to the beginning (inclusive) of the values to be passed to
* @p v_op.
* @param input_last Iterator pointing to the end (exclusive) of the values to be passed to @p v_op.
* @param v_op Unary operator takes *(@p input_first + i) (where i is [0, @p input_last - @p
* input_first)) and returns true if this vertex should be included in the returned count.
* @return GraphViewType::vertex_type Number of times @p v_op returned true.
*/
template <typename GraphViewType, typename InputIterator, typename VertexOp>
typename GraphViewType::vertex_type count_if_v(raft::handle_t const& handle,
GraphViewType const& graph_view,
InputIterator input_first,
InputIterator input_last,
VertexOp v_op)
{
auto count = thrust::count_if(handle.get_thrust_policy(), input_first, input_last, v_op);
auto it = thrust::make_transform_iterator(
thrust::make_counting_iterator(vertex_t{0}),
detail::count_if_call_v_op_t<vertex_t, VertexValueInputIterator, VertexOp>{
graph_view.local_vertex_partition_range_first(), vertex_value_input_first, v_op});
auto count = thrust::reduce(handle.get_thrust_policy(),
it,
it + graph_view.local_vertex_partition_range_size(),
vertex_t{0});
if (GraphViewType::is_multi_gpu) {
count =
host_scalar_allreduce(handle.get_comms(), count, raft::comms::op_t::SUM, handle.get_stream());
Expand Down
22 changes: 14 additions & 8 deletions cpp/include/cugraph/prims/edge_partition_src_dst_property.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -254,10 +254,12 @@ class edge_partition_major_property_t {
edge_partition_major_value_start_offsets_ = std::nullopt;
}

void fill(T value, rmm::cuda_stream_view stream)
void fill(raft::handle_t const& handle, T value)
{
thrust::fill(
rmm::exec_policy(stream), value_data(), value_data() + size_dataframe_buffer(buffer_), value);
thrust::fill(handle.get_thrust_policy(),
value_data(),
value_data() + size_dataframe_buffer(buffer_),
value);
}

auto key_first() { return key_first_; }
Expand All @@ -267,6 +269,7 @@ class edge_partition_major_property_t {
(*edge_partition_key_offsets_).back())
: std::nullopt;
}

auto value_data() { return get_dataframe_buffer_begin(buffer_); }

auto device_view() const
Expand Down Expand Up @@ -351,14 +354,17 @@ class edge_partition_minor_property_t {
shrink_to_fit_dataframe_buffer(buffer_, handle.get_stream());
}

void fill(T value, rmm::cuda_stream_view stream)
void fill(raft::handle_t const& handle, T value)
{
thrust::fill(
rmm::exec_policy(stream), value_data(), value_data() + size_dataframe_buffer(buffer_), value);
thrust::fill(handle.get_thrust_policy(),
value_data(),
value_data() + size_dataframe_buffer(buffer_),
value);
}

auto key_first() { return key_first_; }
auto key_last() { return key_last_; }

auto value_data() { return get_dataframe_buffer_begin(buffer_); }

auto device_view() const
Expand Down Expand Up @@ -480,7 +486,7 @@ class edge_partition_src_property_t {

void clear(raft::handle_t const& handle) { property_.clear(handle); }

void fill(T value, rmm::cuda_stream_view stream) { property_.fill(value, stream); }
void fill(raft::handle_t const& handle, T value) { property_.fill(handle, value); }

auto key_first() { return property_.key_first(); }
auto key_last() { return property_.key_last(); }
Expand Down Expand Up @@ -561,7 +567,7 @@ class edge_partition_dst_property_t {

void clear(raft::handle_t const& handle) { property_.clear(handle); }

void fill(T value, rmm::cuda_stream_view stream) { property_.fill(value, stream); }
void fill(raft::handle_t const& handle, T value) { property_.fill(handle, value); }

auto key_first() { return property_.key_first(); }
auto key_last() { return property_.key_last(); }
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cugraph/prims/property_op_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ struct property_op<thrust::tuple<Args...>, Op>

private:
template <typename T, std::size_t... Is>
__host__ __device__ constexpr auto sum_impl(T& t1, T& t2, std::index_sequence<Is...>)
__host__ __device__ constexpr auto binary_op_impl(T& t1, T& t2, std::index_sequence<Is...>)
{
return thrust::make_tuple((Op<typename thrust::tuple_element<Is, Type>::type>()(
thrust::get<Is>(t1), thrust::get<Is>(t2)))...);
Expand All @@ -200,7 +200,7 @@ struct property_op<thrust::tuple<Args...>, Op>
public:
__host__ __device__ constexpr auto operator()(const Type& t1, const Type& t2)
{
return sum_impl(t1, t2, std::make_index_sequence<thrust::tuple_size<Type>::value>());
return binary_op_impl(t1, t2, std::make_index_sequence<thrust::tuple_size<Type>::value>());
}
};

Expand Down
40 changes: 0 additions & 40 deletions cpp/include/cugraph/prims/reduce_v.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -68,44 +68,4 @@ T reduce_v(raft::handle_t const& handle,
return ret;
}

/**
* @brief Reduce the vertex properties.
*
* This version (conceptually) iterates over only a subset of the graph vertices. This function
* actually works as thrust::reduce() on [@p input_first, @p input_last) (followed by
* inter-process reduction in multi-GPU).
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @tparam InputIterator Type of the iterator for input values.
* @tparam T Type of the initial value.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @param input_first Iterator pointing to the beginning (inclusive) of the values to be reduced.
* @param input_last Iterator pointing to the end (exclusive) of the values to be reduced.
* @param init Initial value to be added to the reduced input vertex properties.
* @return T Reduction of the input vertex properties.
*/
template <typename GraphViewType, typename InputIterator, typename T>
T reduce_v(raft::handle_t const& handle,
GraphViewType const& graph_view,
InputIterator input_first,
InputIterator input_last,
T init = T{},
raft::comms::op_t op = raft::comms::op_t::SUM)
{
auto ret = op_dispatch<T>(op, [&handle, &graph_view, input_first, input_last, init](auto op) {
return thrust::reduce(
handle.get_thrust_policy(),
input_first,
input_last,
((GraphViewType::is_multi_gpu) && (handle.get_comms().get_rank() != 0)) ? T{} : init,
op);
});
if constexpr (GraphViewType::is_multi_gpu) {
ret = host_scalar_allreduce(handle.get_comms(), ret, op, handle.get_stream());
}
return ret;
}

} // namespace cugraph
85 changes: 30 additions & 55 deletions cpp/include/cugraph/prims/transform_reduce_v.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,22 @@

namespace cugraph {

namespace detail {

template <typename vertex_t, typename VertexValueInputIterator, typename VertexOp, typename T>
struct transform_reduce_call_v_op_t {
vertex_t local_vertex_partition_range_first{};
VertexValueInputIterator vertex_value_input_first{};
VertexOp v_op{};

__device__ T operator()(vertex_t i)
{
return v_op(local_vertex_partition_range_first + i, *(vertex_value_input_first + i));
}
};

} // namespace detail

/**
* @brief Apply an operator to the vertex properties and reduce.
*
Expand All @@ -43,8 +59,9 @@ namespace cugraph {
* @param vertex_value_input_first Iterator pointing to the vertex properties for the first
* (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive)
* is deduced as @p vertex_value_input_first + @p graph_view.local_vertex_partition_range_size().
* @param v_op Unary operator takes *(@p vertex_value_input_first + i) (where i is [0, @p
* graph_view.local_vertex_partition_range_size())) and returns a transformed value to be reduced.
* @param v_op Binary operator takes vertex ID and *(@p vertex_value_input_first + i) (where i is
* [0, @p graph_view.local_vertex_partition_range_size())) and returns a transformed value to be
* reduced.
* @param init Initial value to be added to the transform-reduced input vertex properties.
* @return T Reduction of the @p v_op outputs.
*/
Expand All @@ -56,61 +73,19 @@ T transform_reduce_v(raft::handle_t const& handle,
T init,
raft::comms::op_t op = raft::comms::op_t::SUM)
{
auto id = identity_element<T>(op);
auto ret =
op_dispatch<T>(op, [&handle, &graph_view, vertex_value_input_first, v_op, id, init](auto op) {
return thrust::transform_reduce(
handle.get_thrust_policy(),
vertex_value_input_first,
vertex_value_input_first + graph_view.local_vertex_partition_range_size(),
v_op,
((GraphViewType::is_multi_gpu) && (handle.get_comms().get_rank() != 0)) ? id : init,
op);
});
if (GraphViewType::is_multi_gpu) {
ret = host_scalar_allreduce(handle.get_comms(), ret, op, handle.get_stream());
}
return ret;
}
using vertex_t = typename GraphViewType::vertex_type;

/**
* @brief Apply an operator to the vertex properties and reduce.
*
* This version (conceptually) iterates over only a subset of the graph vertices. This function
* actually works as thrust::transform_reduce() on [@p input_first, @p input_last) (followed by
* inter-process reduction in multi-GPU).
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @tparam InputIterator Type of the iterator for input values.
* @tparam VertexOp
* @tparam T Type of the initial value.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @param input_first Iterator pointing to the beginning (inclusive) of the values to be passed to
* @p v_op.
* @param input_last Iterator pointing to the end (exclusive) of the values to be passed to @p v_op.
* @param v_op Unary operator takes *(@p input_first + i) (where i is [0, @p input_last - @p
* input_first)) and returns a transformed value to be reduced.
* @param init Initial value to be added to the transform-reduced input vertex properties.
* @return T Reduction of the @p v_op outputs.
*/
template <typename GraphViewType, typename InputIterator, typename VertexOp, typename T>
T transform_reduce_v(raft::handle_t const& handle,
GraphViewType const& graph_view,
InputIterator input_first,
InputIterator input_last,
VertexOp v_op,
T init = T{},
raft::comms::op_t op = raft::comms::op_t::SUM)
{
auto ret = op_dispatch<T>(op, [&handle, input_first, input_last, v_op, init](auto op) {
return thrust::transform_reduce(
auto id = identity_element<T>(op);
auto it = thrust::make_transform_iterator(
thrust::make_counting_iterator(vertex_t{0}),
detail::transform_reduce_call_v_op_t<vertex_t, VertexValueInputIterator, VertexOp, T>{
graph_view.local_vertex_partition_range_first(), vertex_value_input_first, v_op});
auto ret = op_dispatch<T>(op, [&handle, &graph_view, it, id, init](auto op) {
return thrust::reduce(
handle.get_thrust_policy(),
input_first,
input_last,
v_op,
((GraphViewType::is_multi_gpu) && (handle.get_comms().get_rank() != 0)) ? T{} : init,
it,
it + graph_view.local_vertex_partition_range_size(),
((GraphViewType::is_multi_gpu) && (handle.get_comms().get_rank() != 0)) ? id : init,
op);
});
if (GraphViewType::is_multi_gpu) {
Expand Down
Loading