rapidsai · rapids-bot · Feb 4, 2021 · Dec 16, 2020 · Dec 17, 2020 · Jan 4, 2021
@@ -40,6 +40,7 @@ for i in range(len(df_page)):
 |              | Betweenness Centrality                 | Single-GPU   |                     |
 |              | Edge Betweenness Centrality            | Single-GPU   |                     |
 | Community    |                                        |              |                     |
+|              | EgoNet                                 | Single-GPU   |                     |
 |              | Leiden                                 | Single-GPU   |                     |
 |              | Louvain                                | Multi-GPU    |                     |
 |              | Ensemble Clustering for Graphs         | Single-GPU   |                     |

@@ -360,6 +360,7 @@ add_library(cugraph SHARED
     src/community/ECG.cu
     src/community/triangles_counting.cu
     src/community/extract_subgraph_by_vertex.cu
+    src/community/egonet.cu
     src/cores/core_number.cu
     src/traversal/two_hop_neighbors.cu
     src/components/connectivity.cu

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #pragma once
-
+#include <experimental/graph.hpp>
 #include <experimental/graph_view.hpp>
 #include <graph.hpp>
 #include <internals.hpp>
@@ -815,6 +815,7 @@ template <typename VT, typename ET, typename WT>
 std::unique_ptr<GraphCOO<VT, ET, WT>> extract_subgraph_vertex(GraphCOOView<VT, ET, WT> const &graph,
                                                               VT const *vertices,
                                                               VT num_vertices);
+}  // namespace subgraph
 
 /**
  * @brief     Wrapper function for Nvgraph balanced cut clustering
@@ -837,7 +838,6 @@ std::unique_ptr<GraphCOO<VT, ET, WT>> extract_subgraph_vertex(GraphCOOView<VT, E
  * @param[out] clustering            Pointer to device memory where the resulting clustering will
  * be stored
  */
-}  // namespace subgraph
 
 namespace ext_raft {
 template <typename VT, typename ET, typename WT>
@@ -1191,6 +1191,35 @@ void katz_centrality(raft::handle_t const &handle,
                      bool has_initial_guess  = false,
                      bool normalize          = false,
                      bool do_expensive_check = false);
-
+/**
+ * @brief returns induced EgoNet subgraph(s) of neighbors centered at nodes in source_vertex within
+ * a given radius.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * or multi-GPU (true).
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object of, we extract induced egonet subgraphs from @p graph_view.
+ * @param source_vertex Pointer to egonet center vertices (size == @p n_subgraphs).
+ * @param n_subgraphs Number of induced EgoNet subgraphs to extract (ie. number of elements in @p
+ * source_vertex).
+ * @param radius  Include all neighbors of distance <= radius from @p source_vertex.
+ * @return std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>,
+ * rmm::device_uvector<weight_t>, rmm::device_uvector<size_t>> Quadraplet of edge source vertices,
+ * edge destination vertices, edge weights, and edge offsets for each induced EgoNet subgraph.
+ */
+template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<weight_t>,
+           rmm::device_uvector<size_t>>
+extract_ego(raft::handle_t const &handle,
+            graph_view_t<vertex_t, edge_t, weight_t, false, multi_gpu> const &graph_view,
+            vertex_t *source_vertex,
+            vertex_t n_subgraphs,
+            vertex_t radius);
 }  // namespace experimental
 }  // namespace cugraph
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -109,6 +109,40 @@ struct graph_container_t {
   experimental::graph_properties_t graph_props;
 };
 
+/**
+ * @brief     Owning struct. Allows returning multiple edge lists and edge offsets.
+ *            cython only
+ *
+ * @param  number_of_vertices    The total number of vertices
+ * @param  number_of_edges       The total number of edges (number of elements in src_indices,
+ dst_indices and edge_data)
+ * @param  number_of_subgraph    The number of subgraphs, number of elements in subgraph_offsets - 1
+ * @param  source_indices        This array of size E (number of edges) contains
+ * the index of the
+ * source for each edge. Indices must be in the range [0, V-1].
+ * @param  destination_indices   This array of size E (number of edges) contains
+ * the index of the
+ * destination for each edge. Indices must be in the range [0, V-1].
+ * @param  edge_data             This array size E (number of edges) contains
+ * the weight for each
+ * edge.  This array can be null in which case the graph is considered
+ * unweighted.
+ * @param  subgraph_offsets            This array size number_of_subgraph + 1 contains edge offsets
+ for each subgraph
+
+
+ */
+struct cy_multi_edgelists_t {
+  size_t number_of_vertices;
+  size_t number_of_edges;
+  size_t number_of_subgraph;
+  std::unique_ptr<rmm::device_buffer> src_indices;
+  std::unique_ptr<rmm::device_buffer> dst_indices;
+  std::unique_ptr<rmm::device_buffer> edge_data;
+  std::unique_ptr<rmm::device_buffer> subgraph_offsets;
+};
+// cy_multi_edgelists_t() : number_of_vertices(0), number_of_edges(0);
+
 // FIXME: finish description for vertex_partition_offsets
 //
 // Factory function for populating an empty graph container with a new graph
@@ -246,6 +280,14 @@ void call_sssp(raft::handle_t const& handle,
                vertex_t* predecessors,
                const vertex_t source_vertex);
 
+// Wrapper for calling egonet through a graph container
+template <typename vertex_t, typename weight_t>
+std::unique_ptr<cy_multi_edgelists_t> call_egonet(raft::handle_t const& handle,
+                                                  graph_container_t const& graph_container,
+                                                  vertex_t* source_vertex,
+                                                  vertex_t n_subgraphs,
+                                                  vertex_t radius);
+
 // Helper for setting up subcommunicators, typically called as part of the
 // user-initiated comms initialization in Python.
 //

@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Alex Fender [email protected]
+#include <algorithms.hpp>
+#include <cstddef>
+#include <memory>
+#include <tuple>
+#include <utility>
+
+#include <rmm/thrust_rmm_allocator.h>
+#include <thrust/transform.h>
+#include <ctime>
+
+#include <graph.hpp>
+
+#include <utilities/error.hpp>
+#include "experimental/graph.hpp"
+#include "utilities/graph_utils.cuh"
+
+#include <experimental/graph_functions.hpp>
+#include <experimental/graph_view.hpp>
+
+namespace {
+/*
+Description
+Let the egonet graph of a node x be the subgraph that includes the neighborhood of x and all edges
+between them. Naive algorithm
+- Add center node x to the graph.
+- Go through all the neighbors y of this center node x, add edge (x, y) to the graph.
+- For each neighbor y of center node x, go through all the neighbors z of center node x, if there is
+an edge between y and z in original graph, add edge (y, z) to our new graph.
+
+Rather than doing custom one/two hops features, we propose a generic k-hops solution leveraging BFS
+cutoff and subgraph extraction
+*/
+
+template <typename vertex_t, typename edge_t, typename weight_t>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<weight_t>,
+           rmm::device_uvector<size_t>>
+extract(
+  raft::handle_t const &handle,
+  cugraph::experimental::graph_view_t<vertex_t, edge_t, weight_t, false, false> const &csr_view,
+  vertex_t *source_vertex,
+  vertex_t n_subgraphs,
+  vertex_t radius)
+{
+  auto v           = csr_view.get_number_of_vertices();
+  auto e           = csr_view.get_number_of_edges();
+  auto stream      = handle.get_stream();
+  float avg_degree = e / v;
+  rmm::device_vector<size_t> neighbors_offsets(n_subgraphs + 1);
+  rmm::device_vector<vertex_t> neighbors;
+
+  // It is the right thing to accept device memory for source_vertex
+  // FIXME consider adding a device API to BFS (ie. accept source on the device)
+  std::vector<vertex_t> h_source_vertex(n_subgraphs);
+  raft::update_host(&h_source_vertex[0], source_vertex, n_subgraphs, stream);
+
+  // reserve some reasonable memory, but could grow larger than that
+  neighbors.reserve(v + avg_degree * n_subgraphs * radius);
+  neighbors_offsets[0] = 0;
+  // each source should be done concurently in the future
+  for (vertex_t i = 0; i < n_subgraphs; i++) {
+    // BFS with cutoff
+    rmm::device_vector<vertex_t> reached(v);
+    rmm::device_vector<vertex_t> predecessors(v);  // not used
+    bool direction_optimizing = false;
+    cugraph::experimental::bfs<vertex_t, edge_t, weight_t, false>(handle,
+                                                                  csr_view,
+                                                                  reached.data().get(),
+                                                                  predecessors.data().get(),
+                                                                  h_source_vertex[i],
+                                                                  direction_optimizing,
+                                                                  radius);
+
+    // identify reached vertex ids from distance array
+    thrust::transform(rmm::exec_policy(stream)->on(stream),
+                      thrust::make_counting_iterator(vertex_t{0}),
+                      thrust::make_counting_iterator(v),
+                      reached.begin(),
+                      reached.begin(),
+                      [sentinel = std::numeric_limits<vertex_t>::max()] __device__(
+                        auto id, auto val) { return val < sentinel ? id : sentinel; });
+
+    // removes unreached data
+    auto reached_end = thrust::remove(rmm::exec_policy(stream)->on(stream),
+                                      reached.begin(),
+                                      reached.end(),
+                                      std::numeric_limits<vertex_t>::max());
+
+    // update extraction input
+    size_t n_reached         = thrust::distance(reached.begin(), reached_end);
+    neighbors_offsets[i + 1] = neighbors_offsets[i] + n_reached;
+    if (neighbors_offsets[i + 1] > neighbors.capacity())
+      neighbors.reserve(neighbors_offsets[i + 1] * 2);
+    neighbors.insert(neighbors.end(), reached.begin(), reached_end);
+  }
+
+  // extract
+  return cugraph::experimental::extract_induced_subgraphs(
+    handle, csr_view, neighbors_offsets.data().get(), neighbors.data().get(), n_subgraphs);
+}
+}  // namespace
+namespace cugraph {
+namespace experimental {
+template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<weight_t>,
+           rmm::device_uvector<size_t>>
+extract_ego(raft::handle_t const &handle,
+            graph_view_t<vertex_t, edge_t, weight_t, false, multi_gpu> const &graph_view,
+            vertex_t *source_vertex,
+            vertex_t n_subgraphs,
+            vertex_t radius)
+{
+  if (multi_gpu) {
+    CUGRAPH_FAIL("Unimplemented.");
+    return std::make_tuple(rmm::device_uvector<vertex_t>(0, handle.get_stream()),
+                           rmm::device_uvector<vertex_t>(0, handle.get_stream()),
+                           rmm::device_uvector<weight_t>(0, handle.get_stream()),
+                           rmm::device_uvector<size_t>(0, handle.get_stream()));
+  }
+  CUGRAPH_EXPECTS(n_subgraphs > 0, "Need at least one source to extract the egonet from");
+  CUGRAPH_EXPECTS(n_subgraphs < graph_view.get_number_of_vertices(),
+                  "Can't have more sources to extract from than vertices in the graph");
+  CUGRAPH_EXPECTS(radius > 0, "Radius should be at least 1");
+  CUGRAPH_EXPECTS(radius < graph_view.get_number_of_vertices(), "radius is too large");
+  // source_vertex range is checked in bfs.
+
+  return extract<vertex_t, edge_t, weight_t>(
+    handle, graph_view, source_vertex, n_subgraphs, radius);
+}
+
+// SG FP32
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<float>,
+                    rmm::device_uvector<size_t>>
+extract_ego(raft::handle_t const &,
+            graph_view_t<int32_t, int32_t, float, false, false> const &,
+            int32_t *,
+            int32_t,
+            int32_t);
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<float>,
+                    rmm::device_uvector<size_t>>
+extract_ego(raft::handle_t const &,
+            graph_view_t<int32_t, int64_t, float, false, false> const &,
+            int32_t *,
+            int32_t,
+            int32_t);
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<float>,
+                    rmm::device_uvector<size_t>>
+extract_ego(raft::handle_t const &,
+            graph_view_t<int64_t, int64_t, float, false, false> const &,
+            int64_t *,
+            int64_t,
+            int64_t);
+
+// SG FP64
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<double>,
+                    rmm::device_uvector<size_t>>
+extract_ego(raft::handle_t const &,
+            graph_view_t<int32_t, int32_t, double, false, false> const &,
+            int32_t *,
+            int32_t,
+            int32_t);
+template std::tuple<rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<int32_t>,
+                    rmm::device_uvector<double>,
+                    rmm::device_uvector<size_t>>
+extract_ego(raft::handle_t const &,
+            graph_view_t<int32_t, int64_t, double, false, false> const &,
+            int32_t *,
+            int32_t,
+            int32_t);
+template std::tuple<rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<int64_t>,
+                    rmm::device_uvector<double>,
+                    rmm::device_uvector<size_t>>
+extract_ego(raft::handle_t const &,
+            graph_view_t<int64_t, int64_t, double, false, false> const &,
+            int64_t *,
+            int64_t,
+            int64_t);
+}  // namespace experimental
+}  // namespace cugraph