Skip to content

Commit

Permalink
MNMG Neighborhood Sampling (#2073)
Browse files Browse the repository at this point in the history
This PR tracks work on MNMG Neighborhood Sampling, for G(C)NN needs.

Dependencies:
1. #1982
2. #2064
3. Integration of rapidsai/cugraph-ops#24 into `cugraph`

Authors:
  - Andrei Schaffer (https://github.com/aschaffer)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)
  - Kumar Aatish (https://github.com/kaatish)
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Rick Ratzel (https://github.com/rlratzel)

URL: #2073
  • Loading branch information
aschaffer authored Mar 17, 2022
1 parent 08ab284 commit 9732e5e
Show file tree
Hide file tree
Showing 13 changed files with 1,640 additions and 340 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ add_library(cugraph SHARED
src/sampling/neighborhood.cu
src/sampling/random_walks.cu
src/sampling/detail/gather_utils_impl.cu
src/sampling/nbr_sampling_mg.cu
src/cores/legacy/core_number.cu
src/cores/core_number_sg.cu
src/cores/core_number_mg.cu
Expand Down
35 changes: 35 additions & 0 deletions cpp/include/cugraph/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1538,4 +1538,39 @@ void core_number(raft::handle_t const& handle,
size_t k_last = std::numeric_limits<size_t>::max(),
bool do_expensive_check = false);

/**
* @brief Multi-GPU Uniform Neighborhood Sampling.
*
* @tparam graph_view_t Type of graph view.
* @tparam gpu_t Type of rank (GPU) indices;
* @tparam index_t Type used for indexing; typically edge_t
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Graph View object to generate NBR Sampling on.
* @param ptr_d_starting_vertices Device array of starting vertex IDs for the NBR Sampling.
* @param ptr_d_ranks Device array of: rank IDs (GPU IDs) for the NBR Sampling.
* @param num_starting_vertices size of starting vertex set
* @param h_fan_out vector of branching out (fan-out) degree per source vertex for each level
* parameter used for obtaining local out-degree information
* @param with_replacement boolean flag specifying if random sampling is done with replacement
* (true); or, without replacement (false); default = true;
* @return tuple of tuple of device vectors and counts:
* ((vertex_t source_vertex, vertex_t destination_vertex, int rank, edge_t index), rx_counts)
*/
template <typename graph_view_t,
typename gpu_t,
typename index_t = typename graph_view_t::edge_type>
std::tuple<std::tuple<rmm::device_uvector<typename graph_view_t::vertex_type>,
rmm::device_uvector<typename graph_view_t::vertex_type>,
rmm::device_uvector<gpu_t>,
rmm::device_uvector<index_t>>,
std::vector<size_t>>
uniform_nbr_sample(raft::handle_t const& handle,
graph_view_t const& graph_view,
typename graph_view_t::vertex_type const* ptr_d_starting_vertices,
gpu_t const* ptr_d_ranks,
size_t num_starting_vertices,
std::vector<int> const& h_fan_out,
bool with_replacement = true);

} // namespace cugraph
28 changes: 15 additions & 13 deletions cpp/include/cugraph/detail/graph_functions.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -158,32 +158,34 @@ partition_information(raft::handle_t const& handle, GraphViewType const& graph_v
* Collect all the edges that are present in the adjacency lists on the current gpu
*
* @tparam GraphViewType Type of the passed non-owning graph object.
* @tparam EdgeIndexIterator Type of the iterator for edge indices.
* @tparam GPUIdIterator Type of the iterator for gpu id identifiers.
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Non-owning graph object.
* @param active_majors_in_row Device vector containing all the vertex id that are processed by
* @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator,
* and handles to various CUDA libraries) to run graph algorithms.
* @param[in] graph_view Non-owning graph object.
* @param[in] active_majors_in_row Device vector containing all the vertex id that are processed by
* gpus in the column communicator
* @param active_major_gpu_ids Device vector containing the gpu id associated by every vertex
* @param[in] active_major_gpu_ids Device vector containing the gpu id associated by every vertex
* present in active_majors_in_row
* @param edge_index_first Iterator pointing to the first destination index
* @param indices_per_source Number of indices supplied for every source in the range
* @param[in] minor_map Device vector of destination indices (modifiable in-place) corresponding to
* vertex IDs being returned
* @param[in] indices_per_source Number of indices supplied for every source in the range
* [vertex_input_first, vertex_input_last)
* @param global_degree_offset Global degree offset to local adjacency list for every source
* @param[in] global_degree_offset Global degree offset to local adjacency list for every source
* represented by current gpu
* @return A tuple of device vector containing the majors, minors and gpu_ids gathered locally
* @return A tuple of device vector containing the majors, minors, gpu_ids and indices gathered
* locally
*/
template <typename GraphViewType, typename EdgeIndexIterator, typename gpu_t>
template <typename GraphViewType, typename gpu_t>
std::tuple<rmm::device_uvector<typename GraphViewType::vertex_type>,
rmm::device_uvector<typename GraphViewType::vertex_type>,
rmm::device_uvector<gpu_t>>
rmm::device_uvector<gpu_t>,
rmm::device_uvector<typename GraphViewType::edge_type>>
gather_local_edges(
raft::handle_t const& handle,
GraphViewType const& graph_view,
const rmm::device_uvector<typename GraphViewType::vertex_type>& active_majors_in_row,
const rmm::device_uvector<gpu_t>& active_major_gpu_ids,
EdgeIndexIterator edge_index_first,
rmm::device_uvector<typename GraphViewType::edge_type>&& minor_map,
typename GraphViewType::edge_type indices_per_major,
const rmm::device_uvector<typename GraphViewType::edge_type>& global_degree_offsets);

Expand Down
130 changes: 71 additions & 59 deletions cpp/src/sampling/detail/gather_utils_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -203,65 +203,77 @@ template std::tuple<
partition_information(raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, double, false, true> const& graph_view);

template std::
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>
gather_local_edges(raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, float, false, true> const& graph_view,
const rmm::device_uvector<int32_t>& active_majors_in_row,
const rmm::device_uvector<int32_t>& active_major_gpu_ids,
int32_t const* edge_index_first,
int32_t indices_per_major,
const rmm::device_uvector<int32_t>& global_degree_offsets);

template std::
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>
gather_local_edges(raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, double, false, true> const& graph_view,
const rmm::device_uvector<int32_t>& active_majors_in_row,
const rmm::device_uvector<int32_t>& active_major_gpu_ids,
int32_t const* edge_index_first,
int32_t indices_per_major,
const rmm::device_uvector<int32_t>& global_degree_offsets);

template std::
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>
gather_local_edges(raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, float, false, true> const& graph_view,
const rmm::device_uvector<int32_t>& active_majors_in_row,
const rmm::device_uvector<int32_t>& active_major_gpu_ids,
int64_t const* edge_index_first,
int64_t indices_per_major,
const rmm::device_uvector<int64_t>& global_degree_offsets);

template std::
tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>
gather_local_edges(raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, double, false, true> const& graph_view,
const rmm::device_uvector<int32_t>& active_majors_in_row,
const rmm::device_uvector<int32_t>& active_major_gpu_ids,
int64_t const* edge_index_first,
int64_t indices_per_major,
const rmm::device_uvector<int64_t>& global_degree_offsets);

template std::
tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>, rmm::device_uvector<int32_t>>
gather_local_edges(raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, float, false, true> const& graph_view,
const rmm::device_uvector<int64_t>& active_majors_in_row,
const rmm::device_uvector<int32_t>& active_major_gpu_ids,
int64_t const* edge_index_first,
int64_t indices_per_major,
const rmm::device_uvector<int64_t>& global_degree_offsets);

template std::
tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>, rmm::device_uvector<int32_t>>
gather_local_edges(raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, double, false, true> const& graph_view,
const rmm::device_uvector<int64_t>& active_majors_in_row,
const rmm::device_uvector<int32_t>& active_major_gpu_ids,
int64_t const* edge_index_first,
int64_t indices_per_major,
const rmm::device_uvector<int64_t>& global_degree_offsets);
template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>>
gather_local_edges(raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, float, false, true> const& graph_view,
const rmm::device_uvector<int32_t>& active_majors_in_row,
const rmm::device_uvector<int32_t>& active_major_gpu_ids,
rmm::device_uvector<int32_t>&& minor_map,
int32_t indices_per_major,
const rmm::device_uvector<int32_t>& global_degree_offsets);

template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>>
gather_local_edges(raft::handle_t const& handle,
graph_view_t<int32_t, int32_t, double, false, true> const& graph_view,
const rmm::device_uvector<int32_t>& active_majors_in_row,
const rmm::device_uvector<int32_t>& active_major_gpu_ids,
rmm::device_uvector<int32_t>&& minor_map,
int32_t indices_per_major,
const rmm::device_uvector<int32_t>& global_degree_offsets);

template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<int64_t>>
gather_local_edges(raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, float, false, true> const& graph_view,
const rmm::device_uvector<int32_t>& active_majors_in_row,
const rmm::device_uvector<int32_t>& active_major_gpu_ids,
rmm::device_uvector<int64_t>&& minor_map,
int64_t indices_per_major,
const rmm::device_uvector<int64_t>& global_degree_offsets);

template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<int64_t>>
gather_local_edges(raft::handle_t const& handle,
graph_view_t<int32_t, int64_t, double, false, true> const& graph_view,
const rmm::device_uvector<int32_t>& active_majors_in_row,
const rmm::device_uvector<int32_t>& active_major_gpu_ids,
rmm::device_uvector<int64_t>&& minor_map,
int64_t indices_per_major,
const rmm::device_uvector<int64_t>& global_degree_offsets);

template std::tuple<rmm::device_uvector<int64_t>,
rmm::device_uvector<int64_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<int64_t>>
gather_local_edges(raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, float, false, true> const& graph_view,
const rmm::device_uvector<int64_t>& active_majors_in_row,
const rmm::device_uvector<int32_t>& active_major_gpu_ids,
rmm::device_uvector<int64_t>&& minor_map,
int64_t indices_per_major,
const rmm::device_uvector<int64_t>& global_degree_offsets);

template std::tuple<rmm::device_uvector<int64_t>,
rmm::device_uvector<int64_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<int64_t>>
gather_local_edges(raft::handle_t const& handle,
graph_view_t<int64_t, int64_t, double, false, true> const& graph_view,
const rmm::device_uvector<int64_t>& active_majors_in_row,
const rmm::device_uvector<int32_t>& active_major_gpu_ids,
rmm::device_uvector<int64_t>&& minor_map,
int64_t indices_per_major,
const rmm::device_uvector<int64_t>& global_degree_offsets);

} // namespace detail

Expand Down
18 changes: 12 additions & 6 deletions cpp/src/sampling/detail/gather_utils_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -376,16 +376,17 @@ partition_information(raft::handle_t const& handle, GraphViewType const& graph_v
std::move(vc_offsets));
}

template <typename GraphViewType, typename EdgeIndexIterator, typename gpu_t>
template <typename GraphViewType, typename gpu_t>
std::tuple<rmm::device_uvector<typename GraphViewType::vertex_type>,
rmm::device_uvector<typename GraphViewType::vertex_type>,
rmm::device_uvector<gpu_t>>
rmm::device_uvector<gpu_t>,
rmm::device_uvector<typename GraphViewType::edge_type>>
gather_local_edges(
raft::handle_t const& handle,
GraphViewType const& graph_view,
const rmm::device_uvector<typename GraphViewType::vertex_type>& active_majors_in_row,
const rmm::device_uvector<gpu_t>& active_major_gpu_ids,
EdgeIndexIterator edge_index_first,
rmm::device_uvector<typename GraphViewType::edge_type>&& minor_map,
typename GraphViewType::edge_type indices_per_major,
const rmm::device_uvector<typename GraphViewType::edge_type>& global_degree_offsets)
{
Expand All @@ -405,7 +406,7 @@ gather_local_edges(
handle.get_thrust_policy(),
thrust::make_counting_iterator<size_t>(0),
thrust::make_counting_iterator<size_t>(edge_count),
[edge_index_first,
[edge_index_first = minor_map.cbegin(),
active_majors = active_majors_in_row.data(),
active_major_gpu_ids = active_major_gpu_ids.data(),
id_begin = id_begin.data(),
Expand Down Expand Up @@ -460,14 +461,16 @@ gather_local_edges(
auto location = location_in_segment + vertex_count_offsets[partition_id];
auto g_degree_offset = global_degree_offsets[location];
auto g_dst_index = edge_index_first[index];

if ((g_dst_index >= g_degree_offset) && (g_dst_index < g_degree_offset + local_out_degree)) {
minors[index] = adjacency_list[g_dst_index - g_degree_offset];
} else {
minors[index] = invalid_vertex_id;
}
});

auto input_iter = thrust::make_zip_iterator(
thrust::make_tuple(majors.begin(), minors.begin(), minor_gpu_ids.begin()));
thrust::make_tuple(majors.begin(), minors.begin(), minor_gpu_ids.begin(), minor_map.begin()));

auto compacted_length = thrust::distance(
input_iter,
Expand All @@ -480,7 +483,10 @@ gather_local_edges(
majors.resize(compacted_length, handle.get_stream());
minors.resize(compacted_length, handle.get_stream());
minor_gpu_ids.resize(compacted_length, handle.get_stream());
return std::make_tuple(std::move(majors), std::move(minors), std::move(minor_gpu_ids));
minor_map.resize(compacted_length, handle.get_stream());

return std::make_tuple(
std::move(majors), std::move(minors), std::move(minor_gpu_ids), std::move(minor_map));
}

} // namespace detail
Expand Down
Loading

0 comments on commit 9732e5e

Please sign in to comment.