Skip to content

Commit

Permalink
Merge cfb29b0 into af47c0c
Browse files Browse the repository at this point in the history
  • Loading branch information
seunghwak authored Oct 10, 2022
2 parents af47c0c + cfb29b0 commit 7a13d6f
Show file tree
Hide file tree
Showing 13 changed files with 1,074 additions and 64 deletions.
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ set(CUGRAPH_SOURCES
src/structure/symmetrize_edgelist_mg.cu
src/community/triangle_count_sg.cu
src/community/triangle_count_mg.cu
src/traversal/k_hop_nbrs_sg.cu
src/traversal/k_hop_nbrs_mg.cu
)

if(USE_CUGRAPH_OPS)
Expand Down
30 changes: 30 additions & 0 deletions cpp/include/cugraph/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1798,6 +1798,36 @@ rmm::device_uvector<weight_t> overlap_coefficients(
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs,
bool use_weights);

/*
* @brief Enumerate K-hop neighbors
*
* Note that the number of K-hop neighbors (and memory footprint) can grow very fast if there are
* high-degree vertices. Limit the number of start vertices and @p k to avoid rapid increase in
* memory footprint.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Graph view object.
* @param start_vertices Find K-hop neighbors from each vertex in @p start_vertices.
* @param k Number of hops to make to enumerate neighbors.
* @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
* @return Tuple of two arrays: offsets and K-hop neighbors. The size of the offset array is @p
* start_vertices.size() + 1. The i'th and (i+1)'th elements of the offset array demarcates the
* beginning (inclusive) and end (exclusive) of the K-hop neighbors of the i'th element of @p
* start_vertices, respectively.
*/
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
std::tuple<rmm::device_uvector<size_t>, rmm::device_uvector<vertex_t>> k_hop_nbrs(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, weight_t, false, multi_gpu> const& graph_view,
raft::device_span<vertex_t const> start_vertices,
size_t k,
bool do_expensive_check = false);

} // namespace cugraph

/**
Expand Down
114 changes: 63 additions & 51 deletions cpp/include/cugraph/edge_partition_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ __device__ thrust::optional<vertex_t> major_hypersparse_idx_from_major_nocheck_i
: thrust::nullopt;
}

template <typename vertex_t, typename edge_t, bool multi_gpu, bool use_dcs>
template <typename vertex_t, typename edge_t, typename return_type_t, bool multi_gpu, bool use_dcs>
struct local_degree_op_t {
raft::device_span<edge_t const> offsets{};
std::conditional_t<multi_gpu, vertex_t, std::byte /* dummy */> major_range_first{};
Expand All @@ -64,30 +64,30 @@ struct local_degree_op_t {
dcs_nzd_vertices{};
std::conditional_t<use_dcs, vertex_t, std::byte /* dummy */> major_hypersparse_first{};

__device__ edge_t operator()(vertex_t major) const
__device__ return_type_t operator()(vertex_t major) const
{
if constexpr (multi_gpu) {
vertex_t idx{};
if constexpr (use_dcs) {
if (major < major_hypersparse_first) {
idx = major - major_range_first;
return offsets[idx + 1] - offsets[idx];
return static_cast<return_type_t>(offsets[idx + 1] - offsets[idx]);
} else {
auto major_hypersparse_idx =
major_hypersparse_idx_from_major_nocheck_impl(dcs_nzd_vertices, major);
if (major_hypersparse_idx) {
idx = (major_hypersparse_first - major_range_first) + *major_hypersparse_idx;
return offsets[idx + 1] - offsets[idx];
return static_cast<return_type_t>(offsets[idx + 1] - offsets[idx]);
} else {
return edge_t{0};
return return_type_t{0};
}
}
} else {
idx = major - major_range_first;
return offsets[idx + 1] - offsets[idx];
return static_cast<return_type_t>(offsets[idx + 1] - offsets[idx]);
}
} else {
return offsets[major + 1] - offsets[major];
return static_cast<return_type_t>(offsets[major + 1] - offsets[major]);
}
}
};
Expand Down Expand Up @@ -176,31 +176,39 @@ class edge_partition_device_view_t<vertex_t,
{
}

edge_t compute_number_of_edges(raft::device_span<vertex_t const> majors,
size_t compute_number_of_edges(raft::device_span<vertex_t const> majors,
rmm::cuda_stream_view stream) const
{
return dcs_nzd_vertices_ ? thrust::transform_reduce(
rmm::exec_policy(stream),
majors.begin(),
majors.end(),
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, true>{
this->offsets_,
major_range_first_,
*dcs_nzd_vertices_,
*major_hypersparse_first_},
edge_t{0},
thrust::plus<edge_t>())
detail::local_degree_op_t<
vertex_t,
edge_t,
size_t /* no limit on majors.size(), so edge_t can overflow */,
multi_gpu,
true>{this->offsets_,
major_range_first_,
*dcs_nzd_vertices_,
*major_hypersparse_first_},
size_t{0},
thrust::plus<size_t>())
: thrust::transform_reduce(
rmm::exec_policy(stream),
majors.begin(),
majors.end(),
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, false>{
this->offsets_,
major_range_first_,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */},
edge_t{0},
thrust::plus<edge_t>());
detail::local_degree_op_t<
vertex_t,
edge_t,
size_t /* no limit on majors.size(), so edge_t can overflow */,
multi_gpu,
false>{this->offsets_,
major_range_first_,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */},
size_t{0},
thrust::plus<size_t>());
}

rmm::device_uvector<edge_t> compute_local_degrees(rmm::cuda_stream_view stream) const
Expand All @@ -212,7 +220,7 @@ class edge_partition_device_view_t<vertex_t,
thrust::make_counting_iterator(this->major_range_first()),
thrust::make_counting_iterator(this->major_range_last()),
local_degrees.begin(),
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, true>{
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, true>{
this->offsets_,
major_range_first_,
*dcs_nzd_vertices_,
Expand All @@ -223,7 +231,7 @@ class edge_partition_device_view_t<vertex_t,
thrust::make_counting_iterator(this->major_range_first()),
thrust::make_counting_iterator(this->major_range_last()),
local_degrees.begin(),
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, false>{
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, false>{
this->offsets_, major_range_first_, std::byte{0} /* dummy */, std::byte{0} /* dummy */});
}
return local_degrees;
Expand All @@ -239,7 +247,7 @@ class edge_partition_device_view_t<vertex_t,
majors.begin(),
majors.end(),
local_degrees.begin(),
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, true>{
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, true>{
this->offsets_,
major_range_first_,
dcs_nzd_vertices_.value(),
Expand All @@ -250,7 +258,7 @@ class edge_partition_device_view_t<vertex_t,
majors.begin(),
majors.end(),
local_degrees.begin(),
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, false>{
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, false>{
this->offsets_, major_range_first_, std::byte{0} /* dummy */, std::byte{0} /* dummy */});
}
return local_degrees;
Expand Down Expand Up @@ -366,49 +374,53 @@ class edge_partition_device_view_t<vertex_t,
{
}

edge_t compute_number_of_edges(raft::device_span<vertex_t const> majors,
size_t compute_number_of_edges(raft::device_span<vertex_t const> majors,
rmm::cuda_stream_view stream) const
{
return thrust::transform_reduce(
rmm::exec_policy(stream),
majors.begin(),
majors.end(),
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, false>{this->offsets_,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */},
edge_t{0},
thrust::plus<edge_t>());
detail::local_degree_op_t<vertex_t,
edge_t,
size_t /* no limit on majors.size(), so edge_t can overflow */,
multi_gpu,
false>{this->offsets_,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */},
size_t{0},
thrust::plus<size_t>());
}

rmm::device_uvector<edge_t> compute_local_degrees(rmm::cuda_stream_view stream) const
{
rmm::device_uvector<edge_t> local_degrees(this->major_range_size(), stream);
thrust::transform(
rmm::exec_policy(stream),
thrust::make_counting_iterator(this->major_range_first()),
thrust::make_counting_iterator(this->major_range_last()),
local_degrees.begin(),
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, false>{this->offsets_,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */});
thrust::transform(rmm::exec_policy(stream),
thrust::make_counting_iterator(this->major_range_first()),
thrust::make_counting_iterator(this->major_range_last()),
local_degrees.begin(),
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, false>{
this->offsets_,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */});
return local_degrees;
}

rmm::device_uvector<edge_t> compute_local_degrees(raft::device_span<vertex_t const> majors,
rmm::cuda_stream_view stream) const
{
rmm::device_uvector<edge_t> local_degrees(majors.size(), stream);
thrust::transform(
rmm::exec_policy(stream),
majors.begin(),
majors.end(),
local_degrees.begin(),
detail::local_degree_op_t<vertex_t, edge_t, multi_gpu, false>{this->offsets_,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */});
thrust::transform(rmm::exec_policy(stream),
majors.begin(),
majors.end(),
local_degrees.begin(),
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, false>{
this->offsets_,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */,
std::byte{0} /* dummy */});
return local_degrees;
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/components/weakly_connected_components_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -563,8 +563,8 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
}

auto max_pushes = GraphViewType::is_multi_gpu
? compute_num_out_nbrs_from_frontier(
handle, level_graph_view, vertex_frontier.bucket(bucket_idx_cur))
? static_cast<edge_t>(compute_num_out_nbrs_from_frontier(
handle, level_graph_view, vertex_frontier.bucket(bucket_idx_cur)))
: edge_count;

// FIXME: if we use cuco::static_map (no duplicates, ideally we need static_set), edge_buffer
Expand Down
10 changes: 4 additions & 6 deletions cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -210,10 +210,9 @@ auto sort_and_reduce_buffer_elements(
} // namespace detail

template <typename GraphViewType, typename VertexFrontierBucketType>
typename GraphViewType::edge_type compute_num_out_nbrs_from_frontier(
raft::handle_t const& handle,
GraphViewType const& graph_view,
VertexFrontierBucketType const& frontier)
size_t compute_num_out_nbrs_from_frontier(raft::handle_t const& handle,
GraphViewType const& graph_view,
VertexFrontierBucketType const& frontier)
{
static_assert(!GraphViewType::is_storage_transposed,
"GraphViewType should support the push model.");
Expand All @@ -223,7 +222,7 @@ typename GraphViewType::edge_type compute_num_out_nbrs_from_frontier(
using weight_t = typename GraphViewType::weight_type;
using key_t = typename VertexFrontierBucketType::key_type;

edge_t ret{0};
size_t ret{0};

vertex_t const* local_frontier_vertex_first{nullptr};
if constexpr (std::is_same_v<key_t, vertex_t>) {
Expand All @@ -244,7 +243,6 @@ typename GraphViewType::edge_type compute_num_out_nbrs_from_frontier(
edge_partition_device_view_t<vertex_t, edge_t, weight_t, GraphViewType::is_multi_gpu>(
graph_view.local_edge_partition_view(i));

// FIXME: edge_partition.compute_number_of_edges()???
if constexpr (GraphViewType::is_multi_gpu) {
auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name());
auto const col_comm_rank = col_comm.get_rank();
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/prims/vertex_frontier.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,9 @@ class key_bucket_t {
tags_ = std::move(merged_tags);
} else {
auto cur_size = vertices_.size();
vertices_.resize(cur_size + thrust::distance(key_first, key_last));
tags_.resize(vertices_.size());
vertices_.resize(cur_size + thrust::distance(key_first, key_last),
handle_ptr_->get_stream());
tags_.resize(vertices_.size(), handle_ptr_->get_stream());
thrust::copy(
handle_ptr_->get_thrust_policy(),
key_first,
Expand Down
Loading

0 comments on commit 7a13d6f

Please sign in to comment.