Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EgoNet #1365

Merged
merged 34 commits into from
Feb 4, 2021
Merged

EgoNet #1365

Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
31f01c9
saving wip
afender Dec 16, 2020
9bc7fe7
test WIP
afender Dec 17, 2020
c6b990a
Merge remote-tracking branch 'upstream/branch-0.18' into ego
afender Jan 4, 2021
7a46eed
cpp builds
afender Jan 8, 2021
01cbd41
api upgrade
afender Jan 8, 2021
3ff0e0a
cpp cython stuff
afender Jan 13, 2021
058485d
cython friendly layer at cpp level
afender Jan 15, 2021
fa44e61
created python files
afender Jan 15, 2021
3a7e69e
pxd
afender Jan 15, 2021
cbd7359
typo
afender Jan 15, 2021
29d384c
progress on bindings
afender Jan 21, 2021
1c0c572
docs
afender Jan 21, 2021
7844fa4
some cleanup
afender Jan 21, 2021
4ef3df0
Merge remote-tracking branch 'upstream/branch-0.18' into ego
afender Jan 26, 2021
03d8127
integrated extraction, upgraded test, debugging
afender Jan 27, 2021
f040790
fixed segfault
afender Jan 27, 2021
b17a4a5
disabled output
afender Jan 27, 2021
9a14440
cpp output basic check
afender Jan 28, 2021
44a9deb
cleanup
afender Jan 28, 2021
ec56089
update egonet wrapper and fixes
Iroy30 Jan 29, 2021
c49a40c
copyright years
afender Feb 1, 2021
2995ada
python style
afender Feb 1, 2021
5898167
add output types
Iroy30 Feb 3, 2021
065bf0e
flake8
Iroy30 Feb 3, 2021
2459309
pull branch 0.18
Iroy30 Feb 3, 2021
b3460ab
flake8, remove prints, copyright
Iroy30 Feb 3, 2021
ae4b7cb
fix mst pxd
Iroy30 Feb 3, 2021
3a3aa89
add batched ego test
Iroy30 Feb 3, 2021
ca5b3b3
copyright
Iroy30 Feb 3, 2021
8f7921f
update doc
Iroy30 Feb 3, 2021
98a71c5
Merge branch 'branch-0.18' into ego
afender Feb 4, 2021
0887b64
Update algorithms.hpp
afender Feb 4, 2021
da23265
Update egonet.cu
afender Feb 4, 2021
e76a5ab
clang style
afender Feb 4, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ for i in range(len(df_page)):
| | Betweenness Centrality | Single-GPU | |
| | Edge Betweenness Centrality | Single-GPU | |
| Community | | | |
| | EgoNet | Single-GPU | |
| | Leiden | Single-GPU | |
| | Louvain | Multi-GPU | |
| | Ensemble Clustering for Graphs | Single-GPU | |
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ add_library(cugraph SHARED
src/community/ECG.cu
src/community/triangles_counting.cu
src/community/extract_subgraph_by_vertex.cu
src/community/egonet.cu
src/cores/core_number.cu
src/traversal/two_hop_neighbors.cu
src/components/connectivity.cu
Expand Down
37 changes: 33 additions & 4 deletions cpp/include/algorithms.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,7 +14,7 @@
* limitations under the License.
*/
#pragma once

#include <experimental/graph.hpp>
afender marked this conversation as resolved.
Show resolved Hide resolved
#include <experimental/graph_view.hpp>
#include <graph.hpp>
#include <internals.hpp>
Expand Down Expand Up @@ -815,6 +815,7 @@ template <typename VT, typename ET, typename WT>
std::unique_ptr<GraphCOO<VT, ET, WT>> extract_subgraph_vertex(GraphCOOView<VT, ET, WT> const &graph,
VT const *vertices,
VT num_vertices);
} // namespace subgraph

/**
* @brief Wrapper function for Nvgraph balanced cut clustering
Expand All @@ -837,7 +838,6 @@ std::unique_ptr<GraphCOO<VT, ET, WT>> extract_subgraph_vertex(GraphCOOView<VT, E
* @param[out] clustering Pointer to device memory where the resulting clustering will
* be stored
*/
} // namespace subgraph

namespace ext_raft {
template <typename VT, typename ET, typename WT>
Expand Down Expand Up @@ -1191,6 +1191,35 @@ void katz_centrality(raft::handle_t const &handle,
bool has_initial_guess = false,
bool normalize = false,
bool do_expensive_check = false);

/**
* @brief returns induced EgoNet subgraph(s) of neighbors centered at nodes in source_vertex within
* a given radius.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
* or multi-GPU (true).
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Graph view object of, we extract induced egonet subgraphs from @p graph_view.
* @param source_vertex Pointer to egonet center vertices (size == @p n_subgraphs).
* @param n_subgraphs Number of induced EgoNet subgraphs to extract (ie. number of elements in @p
* source_vertex).
* @param radius Include all neighbors of distance <= radius from @p source_vertex.
* @return std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>,
* rmm::device_uvector<weight_t>, rmm::device_uvector<size_t>> Quadraplet of edge source vertices,
* edge destination vertices, edge weights, and edge offsets for each induced EgoNet subgraph.
*/
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
rmm::device_uvector<weight_t>,
rmm::device_uvector<size_t>>
extract_ego(raft::handle_t const &handle,
graph_view_t<vertex_t, edge_t, weight_t, false, multi_gpu> const &graph_view,
vertex_t *source_vertex,
vertex_t n_subgraphs,
vertex_t radius);
} // namespace experimental
} // namespace cugraph
44 changes: 43 additions & 1 deletion cpp/include/utilities/cython.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -109,6 +109,40 @@ struct graph_container_t {
experimental::graph_properties_t graph_props;
};

/**
afender marked this conversation as resolved.
Show resolved Hide resolved
* @brief Owning struct. Allows returning multiple edge lists and edge offsets.
* cython only
*
* @param number_of_vertices The total number of vertices
* @param number_of_edges The total number of edges (number of elements in src_indices,
dst_indices and edge_data)
* @param number_of_subgraph The number of subgraphs, number of elements in subgraph_offsets - 1
* @param source_indices This array of size E (number of edges) contains
* the index of the
* source for each edge. Indices must be in the range [0, V-1].
* @param destination_indices This array of size E (number of edges) contains
* the index of the
* destination for each edge. Indices must be in the range [0, V-1].
* @param edge_data This array size E (number of edges) contains
* the weight for each
* edge. This array can be null in which case the graph is considered
* unweighted.
* @param subgraph_offsets This array size number_of_subgraph + 1 contains edge offsets
for each subgraph


*/
struct cy_multi_edgelists_t {
size_t number_of_vertices;
size_t number_of_edges;
size_t number_of_subgraph;
std::unique_ptr<rmm::device_buffer> src_indices;
std::unique_ptr<rmm::device_buffer> dst_indices;
std::unique_ptr<rmm::device_buffer> edge_data;
std::unique_ptr<rmm::device_buffer> subgraph_offsets;
};
// cy_multi_edgelists_t() : number_of_vertices(0), number_of_edges(0);

// FIXME: finish description for vertex_partition_offsets
//
// Factory function for populating an empty graph container with a new graph
Expand Down Expand Up @@ -246,6 +280,14 @@ void call_sssp(raft::handle_t const& handle,
vertex_t* predecessors,
const vertex_t source_vertex);

// Wrapper for calling egonet through a graph container
template <typename vertex_t, typename weight_t>
std::unique_ptr<cy_multi_edgelists_t> call_egonet(raft::handle_t const& handle,
graph_container_t const& graph_container,
vertex_t* source_vertex,
vertex_t n_subgraphs,
vertex_t radius);

// Helper for setting up subcommunicators, typically called as part of the
// user-initiated comms initialization in Python.
//
Expand Down
209 changes: 209 additions & 0 deletions cpp/src/community/egonet.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Alex Fender [email protected]
#include <algorithms.hpp>
#include <cstddef>
#include <memory>
#include <tuple>
#include <utility>

#include <rmm/thrust_rmm_allocator.h>
#include <thrust/transform.h>
#include <ctime>

#include <graph.hpp>

#include <utilities/error.hpp>
#include "experimental/graph.hpp"
#include "utilities/graph_utils.cuh"

#include <experimental/graph_functions.hpp>
#include <experimental/graph_view.hpp>

namespace {
/*
Description
Let the egonet graph of a node x be the subgraph that includes the neighborhood of x and all edges
afender marked this conversation as resolved.
Show resolved Hide resolved
between them. Naive algorithm
- Add center node x to the graph.
- Go through all the neighbors y of this center node x, add edge (x, y) to the graph.
- For each neighbor y of center node x, go through all the neighbors z of center node x, if there is
an edge between y and z in original graph, add edge (y, z) to our new graph.

Rather than doing custom one/two hops features, we propose a generic k-hops solution leveraging BFS
cutoff and subgraph extraction
*/

template <typename vertex_t, typename edge_t, typename weight_t>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
rmm::device_uvector<weight_t>,
rmm::device_uvector<size_t>>
extract(
raft::handle_t const &handle,
cugraph::experimental::graph_view_t<vertex_t, edge_t, weight_t, false, false> const &csr_view,
vertex_t *source_vertex,
vertex_t n_subgraphs,
vertex_t radius)
{
auto v = csr_view.get_number_of_vertices();
auto e = csr_view.get_number_of_edges();
auto stream = handle.get_stream();
float avg_degree = e / v;
rmm::device_vector<size_t> neighbors_offsets(n_subgraphs + 1);
rmm::device_vector<vertex_t> neighbors;

// It is the right thing to accept device memory for source_vertex
// FIXME consider adding a device API to BFS (ie. accept source on the device)
std::vector<vertex_t> h_source_vertex(n_subgraphs);
raft::update_host(&h_source_vertex[0], source_vertex, n_subgraphs, stream);

// reserve some reasonable memory, but could grow larger than that
neighbors.reserve(v + avg_degree * n_subgraphs * radius);
neighbors_offsets[0] = 0;
// each source should be done concurently in the future
for (vertex_t i = 0; i < n_subgraphs; i++) {
// BFS with cutoff
rmm::device_vector<vertex_t> reached(v);
rmm::device_vector<vertex_t> predecessors(v); // not used
bool direction_optimizing = false;
cugraph::experimental::bfs<vertex_t, edge_t, weight_t, false>(handle,
csr_view,
reached.data().get(),
predecessors.data().get(),
h_source_vertex[i],
direction_optimizing,
radius);

// identify reached vertex ids from distance array
thrust::transform(rmm::exec_policy(stream)->on(stream),
thrust::make_counting_iterator(vertex_t{0}),
thrust::make_counting_iterator(v),
reached.begin(),
reached.begin(),
[sentinel = std::numeric_limits<vertex_t>::max()] __device__(
auto id, auto val) { return val < sentinel ? id : sentinel; });

// removes unreached data
auto reached_end = thrust::remove(rmm::exec_policy(stream)->on(stream),
reached.begin(),
reached.end(),
std::numeric_limits<vertex_t>::max());

// update extraction input
size_t n_reached = thrust::distance(reached.begin(), reached_end);
neighbors_offsets[i + 1] = neighbors_offsets[i] + n_reached;
if (neighbors_offsets[i + 1] > neighbors.capacity())
neighbors.reserve(neighbors_offsets[i + 1] * 2);
neighbors.insert(neighbors.end(), reached.begin(), reached_end);
}

// extract
return cugraph::experimental::extract_induced_subgraphs(
handle, csr_view, neighbors_offsets.data().get(), neighbors.data().get(), n_subgraphs);
}
} // namespace
namespace cugraph {
namespace experimental {
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
rmm::device_uvector<weight_t>,
rmm::device_uvector<size_t>>
extract_ego(raft::handle_t const &handle,
graph_view_t<vertex_t, edge_t, weight_t, false, multi_gpu> const &graph_view,
vertex_t *source_vertex,
vertex_t n_subgraphs,
vertex_t radius)
{
if (multi_gpu) {
CUGRAPH_FAIL("Unimplemented.");
return std::make_tuple(rmm::device_uvector<vertex_t>(0, handle.get_stream()),
rmm::device_uvector<vertex_t>(0, handle.get_stream()),
rmm::device_uvector<weight_t>(0, handle.get_stream()),
rmm::device_uvector<size_t>(0, handle.get_stream()));
}
CUGRAPH_EXPECTS(n_subgraphs > 0, "Need at least one source to extract the egonet from");
CUGRAPH_EXPECTS(n_subgraphs < graph_view.get_number_of_vertices(),
"Can't have more sources to extract from than vertices in the graph");
CUGRAPH_EXPECTS(radius > 0, "Radius should be at least 1");
CUGRAPH_EXPECTS(radius < graph_view.get_number_of_vertices(), "radius is too large");
// source_vertex range is checked in bfs.

return extract<vertex_t, edge_t, weight_t>(
handle, graph_view, source_vertex, n_subgraphs, radius);
}

// SG FP32
template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<float>,
rmm::device_uvector<size_t>>
extract_ego(raft::handle_t const &,
graph_view_t<int32_t, int32_t, float, false, false> const &,
int32_t *,
int32_t,
int32_t);
template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<float>,
rmm::device_uvector<size_t>>
extract_ego(raft::handle_t const &,
graph_view_t<int32_t, int64_t, float, false, false> const &,
int32_t *,
int32_t,
int32_t);
template std::tuple<rmm::device_uvector<int64_t>,
rmm::device_uvector<int64_t>,
rmm::device_uvector<float>,
rmm::device_uvector<size_t>>
extract_ego(raft::handle_t const &,
graph_view_t<int64_t, int64_t, float, false, false> const &,
int64_t *,
int64_t,
int64_t);

// SG FP64
template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<double>,
rmm::device_uvector<size_t>>
extract_ego(raft::handle_t const &,
graph_view_t<int32_t, int32_t, double, false, false> const &,
int32_t *,
int32_t,
int32_t);
template std::tuple<rmm::device_uvector<int32_t>,
rmm::device_uvector<int32_t>,
rmm::device_uvector<double>,
rmm::device_uvector<size_t>>
extract_ego(raft::handle_t const &,
graph_view_t<int32_t, int64_t, double, false, false> const &,
int32_t *,
int32_t,
int32_t);
template std::tuple<rmm::device_uvector<int64_t>,
rmm::device_uvector<int64_t>,
rmm::device_uvector<double>,
rmm::device_uvector<size_t>>
extract_ego(raft::handle_t const &,
graph_view_t<int64_t, int64_t, double, false, false> const &,
int64_t *,
int64_t,
int64_t);
} // namespace experimental
} // namespace cugraph
Loading