Add new all-pairs similarity algorithm (#4158)

Added a new entry point for similarity functionality that combines the functionality of k_hop_nbrs and similarity. This entry point allows us to compute similarity for all pairs of vertices in the graph in a single call. We also add the optional parameter topk which, if specified, will only return the vertices that have the highest scores. If topk is specified on an all pairs call, we compute the scores for pairs in batches and extract the topk as we go along to keep the memory footprint low. This PR also updates a FIXME in the C++ similarity test. The C++ similarity test had been written before we had a k_hop_nbrs call, so there was some inefficient test code to compute that. Now that we have a k_hop_nbrs call, the test code was refactored to use that call. Supersedes PR #4134 Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) URL: #4158
rapidsai · Mar 7, 2024 · 5b5061e · 5b5061e
1 parent ee988db
commit 5b5061e
Show file tree

Hide file tree

Showing 26 changed files with 2,244 additions and 89 deletions.
diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp
@@ -2135,6 +2135,172 @@ rmm::device_uvector<weight_t> overlap_coefficients(
   std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs,
   bool do_expensive_check = false);
 
+/**
+ * @brief     Compute Jaccard all pairs similarity coefficient
+ *
+ * Similarity is computed for all pairs of vertices.  Note that in a sparse
+ * graph, many of the vertex pairs will have a score of zero.  We actually
+ * compute similarity only for vertices that are two hop neighbors within
+ * the graph, since vertices that are not two hop neighbors will have
+ * a score of 0.
+ *
+ * If @p vertices is specified we will compute similarity on two hop
+ * neighbors the @p vertices.  If @p vertices is not specified it will
+ * compute similarity on all two hop neighbors in the graph.
+ *
+ * If @p topk is specified only the top @p topk scoring vertex pairs
+ * will be returned, if not specified then scores for all computed vertex pairs
+ * will be returned.
+ *
+ * Note the list of two hop neighbors in the entire graph might be a large
+ * number of vertex pairs.  If the graph is dense enough it could be as large
+ * as the the number of vertices squared, which might run out of memory.
+ *
+ * @throws                 cugraph::logic_error when an error occurs.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object.
+ * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
+ * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume
+ * a weight of 1 for all edges.
+ * @param vertices optional device span defining the seed vertices. In a multi-gpu context the
+ * vertices should be local to this GPU.
+ * @param topk optional specification of the how many of the top scoring vertex pairs should be
+ * returned
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return tuple containing three device vectors (v1, v2, score) of the same length.  Corresponding
+ * elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying
+ * one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2.
+ * If @p topk was specified then the vectors will be no longer than @p topk elements.  In a
+ * multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they
+ * will be returned on the local GPU for vertex v1.
+ */
+template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
+std::
+  tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>, rmm::device_uvector<weight_t>>
+  jaccard_all_pairs_coefficients(
+    raft::handle_t const& handle,
+    graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+    std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+    std::optional<raft::device_span<vertex_t const>> vertices,
+    std::optional<size_t> topk,
+    bool do_expensive_check = false);
+
+/**
+ * @brief     Compute Sorensen similarity coefficient
+ *
+ * Similarity is computed for all pairs of vertices.  Note that in a sparse
+ * graph, many of the vertex pairs will have a score of zero.  We actually
+ * compute similarity only for vertices that are two hop neighbors within
+ * the graph, since vertices that are not two hop neighbors will have
+ * a score of 0.
+ *
+ * If @p vertices is specified we will compute similarity on two hop
+ * neighbors the @p vertices.  If @p vertices is not specified it will
+ * compute similarity on all two hop neighbors in the graph.
+ *
+ * If @p topk is specified only the top @p topk scoring vertex pairs
+ * will be returned, if not specified then scores for all computed vertex pairs
+ * will be returned.
+ *
+ * Note the list of two hop neighbors in the entire graph might be a large
+ * number of vertex pairs.  If the graph is dense enough it could be as large
+ * as the the number of vertices squared, which might run out of memory.
+ *
+ * @throws                 cugraph::logic_error when an error occurs.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object.
+ * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
+ * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume
+ * a weight of 1 for all edges.
+ * @param vertices optional device span defining the seed vertices.
+ * @param topk optional specification of the how many of the top scoring vertex pairs should be
+ * returned
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return tuple containing three device vectors (v1, v2, score) of the same length.  Corresponding
+ * elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying
+ * one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2.
+ * If @p topk was specified then the vectors will be no longer than @p topk elements.  In a
+ * multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they
+ * will be returned on the local GPU for vertex v1.
+ */
+template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
+std::
+  tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>, rmm::device_uvector<weight_t>>
+  sorensen_all_pairs_coefficients(
+    raft::handle_t const& handle,
+    graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+    std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+    std::optional<raft::device_span<vertex_t const>> vertices,
+    std::optional<size_t> topk,
+    bool do_expensive_check = false);
+
+/**
+ * @brief     Compute overlap similarity coefficient
+ *
+ * Similarity is computed for all pairs of vertices.  Note that in a sparse
+ * graph, many of the vertex pairs will have a score of zero.  We actually
+ * compute similarity only for vertices that are two hop neighbors within
+ * the graph, since vertices that are not two hop neighbors will have
+ * a score of 0.
+ *
+ * If @p vertices is specified we will compute similarity on two hop
+ * neighbors the @p vertices.  If @p vertices is not specified it will
+ * compute similarity on all two hop neighbors in the graph.
+ *
+ * If @p topk is specified only the top @p topk scoring vertex pairs
+ * will be returned, if not specified then scores for all computed vertex pairs
+ * will be returned.
+ *
+ * Note the list of two hop neighbors in the entire graph might be a large
+ * number of vertex pairs.  If the graph is dense enough it could be as large
+ * as the the number of vertices squared, which might run out of memory.
+ *
+ * @throws                 cugraph::logic_error when an error occurs.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object.
+ * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
+ * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume
+ * a weight of 1 for all edges.
+ * @param vertices optional device span defining the seed vertices.
+ * @param topk optional specification of the how many of the top scoring vertex pairs should be
+ * returned
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return tuple containing three device vectors (v1, v2, score) of the same length.  Corresponding
+ * elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying
+ * one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2.
+ * If @p topk was specified then the vectors will be no longer than @p topk elements.  In a
+ * multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they
+ * will be returned on the local GPU for vertex v1.
+ */
+template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
+std::
+  tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>, rmm::device_uvector<weight_t>>
+  overlap_all_pairs_coefficients(
+    raft::handle_t const& handle,
+    graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+    std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+    std::optional<raft::device_span<vertex_t const>> vertices,
+    std::optional<size_t> topk,
+    bool do_expensive_check = false);
+
 /*
  * @brief Enumerate K-hop neighbors
  *

diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh
@@ -37,51 +37,52 @@ namespace cugraph {
 
 namespace detail {
 
-template <typename vertex_t, typename edge_t>
-std::tuple<std::vector<vertex_t>, std::vector<edge_t>> compute_offset_aligned_edge_chunks(
+template <typename vertex_t, typename offset_t>
+std::tuple<std::vector<vertex_t>, std::vector<offset_t>> compute_offset_aligned_element_chunks(
   raft::handle_t const& handle,
-  edge_t const* offsets,
-  vertex_t num_vertices,
-  edge_t num_edges,
-  size_t approx_edge_chunk_size)
+  raft::device_span<offset_t const> offsets,
+  offset_t num_elements,
+  vertex_t approx_element_chunk_size)
 {
   auto search_offset_first = thrust::make_transform_iterator(
     thrust::make_counting_iterator(size_t{1}),
     cuda::proclaim_return_type<size_t>(
-      [approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; }));
-  auto num_chunks = (num_edges + approx_edge_chunk_size - 1) / approx_edge_chunk_size;
+      [approx_element_chunk_size] __device__(auto i) { return i * approx_element_chunk_size; }));
+  auto num_chunks = (num_elements + approx_element_chunk_size - 1) / approx_element_chunk_size;
 
   if (num_chunks > 1) {
-    rmm::device_uvector<vertex_t> d_vertex_offsets(num_chunks - 1, handle.get_stream());
+    rmm::device_uvector<vertex_t> d_chunk_offsets(num_chunks - 1, handle.get_stream());
     thrust::lower_bound(handle.get_thrust_policy(),
-                        offsets,
-                        offsets + num_vertices + 1,
+                        offsets.begin(),
+                        offsets.end(),
                         search_offset_first,
-                        search_offset_first + d_vertex_offsets.size(),
-                        d_vertex_offsets.begin());
-    rmm::device_uvector<edge_t> d_edge_offsets(d_vertex_offsets.size(), handle.get_stream());
+                        search_offset_first + d_chunk_offsets.size(),
+                        d_chunk_offsets.begin());
+    rmm::device_uvector<offset_t> d_element_offsets(d_chunk_offsets.size(), handle.get_stream());
     thrust::gather(handle.get_thrust_policy(),
-                   d_vertex_offsets.begin(),
-                   d_vertex_offsets.end(),
-                   offsets,
-                   d_edge_offsets.begin());
-    std::vector<edge_t> h_edge_offsets(num_chunks + 1, edge_t{0});
-    h_edge_offsets.back() = num_edges;
-    raft::update_host(
-      h_edge_offsets.data() + 1, d_edge_offsets.data(), d_edge_offsets.size(), handle.get_stream());
-    std::vector<vertex_t> h_vertex_offsets(num_chunks + 1, vertex_t{0});
-    h_vertex_offsets.back() = num_vertices;
-    raft::update_host(h_vertex_offsets.data() + 1,
-                      d_vertex_offsets.data(),
-                      d_vertex_offsets.size(),
+                   d_chunk_offsets.begin(),
+                   d_chunk_offsets.end(),
+                   offsets.begin(),
+                   d_element_offsets.begin());
+    std::vector<offset_t> h_element_offsets(num_chunks + 1, offset_t{0});
+    h_element_offsets.back() = num_elements;
+    raft::update_host(h_element_offsets.data() + 1,
+                      d_element_offsets.data(),
+                      d_element_offsets.size(),
+                      handle.get_stream());
+    std::vector<vertex_t> h_chunk_offsets(num_chunks + 1, vertex_t{0});
+    h_chunk_offsets.back() = offsets.size() - 1;
+    raft::update_host(h_chunk_offsets.data() + 1,
+                      d_chunk_offsets.data(),
+                      d_chunk_offsets.size(),
                       handle.get_stream());
 
     handle.sync_stream();
 
-    return std::make_tuple(h_vertex_offsets, h_edge_offsets);
+    return std::make_tuple(h_chunk_offsets, h_element_offsets);
   } else {
-    return std::make_tuple(std::vector<vertex_t>{{0, num_vertices}},
-                           std::vector<edge_t>{{0, num_edges}});
+    return std::make_tuple(std::vector<vertex_t>{{0, offsets.size() - 1}},
+                           std::vector<offset_t>{{0, num_elements}});
   }
 }
 

diff --git a/cpp/include/cugraph_c/graph_functions.h b/cpp/include/cugraph_c/graph_functions.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.