From 93ab18dab5103b1a199c789d178a787aa10ac040 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Tue, 24 Mar 2020 10:03:27 -0400 Subject: [PATCH 01/40] remove gdf_column from katz --- cpp/CMakeLists.txt | 1 - cpp/include/algorithms.h | 74 ------------ cpp/include/algorithms.hpp | 74 ++++++++++++ cpp/include/functions.h | 16 --- cpp/include/graph.hpp | 42 +++++-- cpp/src/centrality/katz_centrality.cu | 41 ++++--- cpp/src/structure/graph.cu | 93 ++++++++++++++- cpp/src/utilities/degree.cu | 87 -------------- cpp/tests/centrality/katz_centrality_test.cu | 102 +++++++++-------- .../betweenness_centrality_wrapper.pyx | 2 +- python/cugraph/centrality/katz_centrality.pxd | 14 +-- .../centrality/katz_centrality_wrapper.pyx | 59 +++++----- python/cugraph/structure/graph.py | 13 +-- python/cugraph/structure/graph_new.pxd | 6 +- .../cugraph/structure/graph_new_wrapper.pyx | 106 ++++++++++++------ python/cugraph/structure/graph_wrapper.pyx | 2 + 16 files changed, 390 insertions(+), 342 deletions(-) delete mode 100644 cpp/src/utilities/degree.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b1c4a9f0802..daa81db2fd8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -317,7 +317,6 @@ add_library(cugraph SHARED src/db/db_object.cu src/db/db_parser_integration_test.cu src/db/db_operators.cu - src/utilities/degree.cu src/utilities/cusparse_helper.cu src/structure/cugraph.cu src/structure/graph.cu diff --git a/cpp/include/algorithms.h b/cpp/include/algorithms.h index b3a63d2287f..346600d1089 100644 --- a/cpp/include/algorithms.h +++ b/cpp/include/algorithms.h @@ -218,79 +218,5 @@ void snmg_pagerank (gdf_column **src_col_ptrs, const size_t n_gpus, const float damping_factor, const int n_iter); -/** - * @Synopsis Compute the Katz centrality for the nodes of the graph G - * - * @Param[in] *graph cuGRAPH graph descriptor with a valid edgeList or adjList - * - * @Param[out] *katz_centrality If set to a valid column, this is populated by the katz centrality of every vertex in the graph - * - * @Param[in] alpha Attenuation factor with a default value of 0.1. Alpha is set to - 1/(lambda_max) if it is greater where lambda_max is the maximum degree - of the graph. - * - * @Param[in] max_iter The maximum number of iterations before an answer is returned. This can - be used to limit the execution time and do an early exit before the - solver reaches the convergence tolerance. - If this value is lower or equal to 0 cuGraph will use the default - value, which is 100. - * - * @Param[in] tol Set the tolerance the approximation, this parameter should be a small - magnitude value. - The lower the tolerance the better the approximation. If this value is - 0.0f, cuGraph will use the default value which is 1.0E-5. - Setting too small a tolerance can lead to non-convergence due to - numerical roundoff. Usually values between 0.01 and 0.00001 are - acceptable. - * - * @Param[in] has_guess Flag to determine whether \p katz_centrality contains an initial guess for katz centrality values - * - * @Param[in] normalized If True normalize the resulting katz centrality values - * - * @throws cugraph::logic_error when an error occurs. - */ -/* ----------------------------------------------------------------------------*/ -void katz_centrality(Graph* graph, - gdf_column *katz_centrality, - double alpha, - int max_iter, - double tol, - bool has_guess, - bool normalized); - -/** - * @Synopsis Compute the Core Number for the nodes of the graph G - * - * @Param[in] *graph cuGRAPH graph descriptor with a valid edgeList or adjList - * - * @Param[out] *core_number If set to a valid column, this is populated by the core number of every vertex in the graph - * - * @throws cugraph::logic_error when an error occurs. - */ -/* ----------------------------------------------------------------------------*/ -void core_number(Graph* graph, - gdf_column *core_number); - -/** - * @Synopsis Compute K Core of the graph G - * - * @Param[in] *in_graph cuGRAPH graph descriptor with a valid edgeList or adjList - * - * @Param[in] k Order of the core. This value must not be negative. - * - * @Param[in] *vertex_id User specified vertex identifiers for which core number values are supplied - * - * @Param[in] *core_number User supplied core number values corresponding to vertex_id - * - * @Param[out] *out_graph K Core subgraph - * - * @throws cugraph::logic_error when an error occurs. - */ -/* ----------------------------------------------------------------------------*/ -void k_core(Graph* in_graph, - int k, - gdf_column *vertex_id, - gdf_column *core_number, - Graph* out_graph); } //namespace cugraph diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 106533576eb..1d022d30ee1 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -220,4 +220,78 @@ void k_truss_subgraph(experimental::GraphCOO const &graph, int k, experimental::GraphCOO &output_graph); +/** + * @brief Compute the Katz centrality for the nodes of the graph G + * + * @throws cugraph::logic_error with a custom message when an error occurs. + * + * @tparam VT Type of vertex identifiers. Supported value : int (signed, 32-bit) + * @tparam ET Type of edge identifiers. Supported value : int (signed, 32-bit) + * @tparam WT Type of edge weights. Supported values : float or double. + * @tparam result_t Type of computed result. Supported values : float + * + * @param[in] graph cuGRAPH graph descriptor, should contain the connectivity information as a CSR + * @param[out] result Device array of centrality scores + * @param[in] alpha Attenuation factor with a default value of 0.1. Alpha is set to + * 1/(lambda_max) if it is greater where lambda_max is the maximum degree + * of the graph. + * @param[in] max_iter The maximum number of iterations before an answer is returned. This can + * be used to limit the execution time and do an early exit before the + * solver reaches the convergence tolerance. + * If this value is lower or equal to 0 cuGraph will use the default + * value, which is 100. + * @param[in] tol Set the tolerance the approximation, this parameter should be a small + * magnitude value. + * The lower the tolerance the better the approximation. If this value is + * 0.0f, cuGraph will use the default value which is 1.0E-5. + * Setting too small a tolerance can lead to non-convergence due to + * numerical roundoff. Usually values between 0.01 and 0.00001 are + * acceptable. + * @param[in] has_guess Flag to determine whether \p katz_centrality contains an initial guess for katz centrality values + * @param[in] normalized If True normalize the resulting katz centrality values + */ +template +void katz_centrality(experimental::GraphCSR const &graph, + result_t *result, + double alpha, + int max_iter, + double tol, + bool has_guess, + bool normalized); + +/** + * @brief Compute the Core Number for the nodes of the graph G + * + * @param[in] graph cuGRAPH graph descriptor with a valid edgeList or adjList + * @param[out] core_number Populated by the core number of every vertex in the graph + * + * @throws cugraph::logic_error when an error occurs. + */ +/* ----------------------------------------------------------------------------*/ +template +void core_number(experimental::GraphCSR const &graph, VT *core_number); + +/** + * @Synopsis Compute K Core of the graph G + * + * @Param[in] *in_graph cuGRAPH graph descriptor with a valid edgeList or adjList + * + * @Param[in] k Order of the core. This value must not be negative. + * + * @Param[in] *vertex_id User specified vertex identifiers for which core number values are supplied + * + * @Param[in] *core_number User supplied core number values corresponding to vertex_id + * + * @Param[out] *out_graph K Core subgraph + * + * @throws cugraph::logic_error when an error occurs. + */ +/* ----------------------------------------------------------------------------*/ +template +void k_core(experimental::GraphCSR const &graph, + int k, + VT *vertex_id, + VT *core_number, + experimental::GraphCOO &out_graph); + } //namespace cugraph diff --git a/cpp/include/functions.h b/cpp/include/functions.h index ddffbf694c4..6d64655f45f 100644 --- a/cpp/include/functions.h +++ b/cpp/include/functions.h @@ -219,22 +219,6 @@ void get_two_hop_neighbors(Graph* graph, gdf_column* first, gdf_column* second); /* ----------------------------------------------------------------------------*/ void snmg_csrmv (size_t * part_offsets, gdf_column * off, gdf_column * ind, gdf_column * val, gdf_column ** x_col); -/** - * @Synopsis Computes degree(in, out, in+out) of all the nodes of a Graph - * - * @Param[in]* graph in : graph descriptor with graph->transposedAdjList or graph->adjList present - * @Param[in] x in : integer value indicating type of degree calculation - * 0 : in+out degree - * 1 : in-degree - * 2 : out-degree - * - * @Param[out] *degree out : gdf_column of size V (V is number of vertices) initialized to zeros. - * Contains the computed degree of every vertex. - * - * @throws cugraph::logic_error when an error occurs. - */ -/* ----------------------------------------------------------------------------*/ -void degree(Graph* graph, gdf_column *degree, int x); int get_device(const void *ptr); /** diff --git a/cpp/include/graph.hpp b/cpp/include/graph.hpp index f5356a26e74..bdc04d50e25 100644 --- a/cpp/include/graph.hpp +++ b/cpp/include/graph.hpp @@ -47,6 +47,13 @@ class GraphBase { VT number_of_vertices; ET number_of_edges; + /** + * @brief Fill the identifiers array with the vertex identifiers. + * + * @param[out] identifier Pointer to device memory to store the vertex identifiers + */ + void get_vertex_identifiers(VT *identifiers) const; + GraphBase(WT const *edge_data_, VT number_of_vertices_, ET number_of_edges_): edge_data(edge_data_), prop(), @@ -68,6 +75,20 @@ class GraphCOO: public GraphBase { VT const *src_indices{nullptr}; ///< rowInd VT const *dst_indices{nullptr}; ///< colInd + /** + * @brief Computes degree(in, out, in+out) of all the nodes of a Graph + * + * @throws cugraph::logic_error when an error occurs. + * + * @param[out] degree Device array of size V (V is number of vertices) initialized to zeros. + * Will contain the computed degree of every vertex. + * @param[in] x Integer value indicating type of degree calculation + * 0 : in+out degree + * 1 : in-degree + * 2 : out-degree + */ + void degree(ET *degree, int x) const; + /** * @brief Default constructor */ @@ -108,13 +129,6 @@ class GraphCompressedSparseBase: public GraphBase { ET const *offsets{nullptr}; ///< CSR offsets VT const *indices{nullptr}; ///< CSR indices - /** - * @brief Fill the identifiers array with the vertex identifiers. - * - * @param[out] identifier Pointer to device memory to store the vertex identifiers - */ - void get_vertex_identifiers(VT *identifiers) const; - /** * @brief Fill the identifiers in the array with the source vertex identifiers * @@ -122,6 +136,20 @@ class GraphCompressedSparseBase: public GraphBase { */ void get_source_indices(VT *src_indices) const; + /** + * @brief Computes degree(in, out, in+out) of all the nodes of a Graph + * + * @throws cugraph::logic_error when an error occurs. + * + * @param[out] degree Device array of size V (V is number of vertices) initialized to zeros. + * Will contain the computed degree of every vertex. + * @param[in] x Integer value indicating type of degree calculation + * 0 : in+out degree + * 1 : in-degree + * 2 : out-degree + */ + void degree(ET *degree, int x) const; + /** * @brief Wrap existing arrays representing adjacency lists in a Graph. * GraphCSR does not own the memory used to represent this graph. This diff --git a/cpp/src/centrality/katz_centrality.cu b/cpp/src/centrality/katz_centrality.cu index 27709d7f1dd..2bed72e8864 100644 --- a/cpp/src/centrality/katz_centrality.cu +++ b/cpp/src/centrality/katz_centrality.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,34 +21,31 @@ * @file katz_centrality.cu * --------------------------------------------------------------------------*/ -#include +#include #include "utilities/error_utils.h" #include #include namespace cugraph { -void katz_centrality(Graph *graph, - gdf_column *katz_centrality, - double alpha, - int max_iter, - double tol, - bool has_guess, - bool normalized) { - CHECK_GRAPH(graph); - CUGRAPH_EXPECTS(graph->adjList->offsets->dtype == GDF_INT32, "Unsupported data type: offsets need to be int32"); - CUGRAPH_EXPECTS(graph->adjList->indices->dtype == GDF_INT32, "Unsupported data type: indices need to be int32"); - CUGRAPH_EXPECTS(katz_centrality->dtype == GDF_FLOAT64, "Unsupported data type: centrality needs to be float64"); - CUGRAPH_EXPECTS(katz_centrality->size == graph->numberOfVertices, "Column size mismatch"); + +template +void katz_centrality(experimental::GraphCSR const &graph, + result_t *result, + double alpha, + int max_iter, + double tol, + bool has_guess, + bool normalized) { const bool isStatic = true; - using HornetGraph = hornet::gpu::HornetStatic; - using HornetInit = hornet::HornetInit; + using HornetGraph = hornet::gpu::HornetStatic; + using HornetInit = hornet::HornetInit; using Katz = hornets_nest::KatzCentralityStatic; - HornetInit init(graph->numberOfVertices, graph->adjList->indices->size, - reinterpret_cast(graph->adjList->offsets->data), - reinterpret_cast(graph->adjList->indices->data)); + + HornetInit init(graph.number_of_vertices, graph.number_of_edges, + graph.offsets, graph.indices); HornetGraph hnt(init, hornet::DeviceType::DEVICE); - Katz katz(hnt, alpha, max_iter, tol, normalized, isStatic, reinterpret_cast(katz_centrality->data)); + Katz katz(hnt, alpha, max_iter, tol, normalized, isStatic, result); if (katz.getAlpha() < alpha) { CUGRAPH_FAIL("Error : alpha is not small enough for convergence"); } @@ -56,6 +53,8 @@ void katz_centrality(Graph *graph, if (!katz.hasConverged()) { CUGRAPH_FAIL("Error : Convergence not reached"); } - } + +template void katz_centrality(experimental::GraphCSR const &, double *, double, int, double, bool, bool); + } diff --git a/cpp/src/structure/graph.cu b/cpp/src/structure/graph.cu index f0ed7d07a6b..0a8eb62150a 100644 --- a/cpp/src/structure/graph.cu +++ b/cpp/src/structure/graph.cu @@ -12,14 +12,47 @@ #include #include "utilities/graph_utils.cuh" #include "utilities/error_utils.h" +#include "utilities/cuda_utils.cuh" + +namespace { + +template +void degree_from_offsets(vertex_t number_of_vertices, + edge_t const *offsets, + edge_t *degree, + cudaStream_t stream) { + + // Computes out-degree for x = 0 and x = 2 + thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(number_of_vertices), + [offsets, degree] __device__ (vertex_t v) { + degree[v] = offsets[v+1]-offsets[v]; + }); +} + +template +void degree_from_vertex_ids(edge_t number_of_edges, + vertex_t const *indices, + edge_t *degree, + cudaStream_t stream) { + + thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(number_of_edges), + [indices, degree] __device__ (edge_t e) { + cugraph::atomicAdd(degree + indices[e], 1); + }); +} + +} //namespace anonymous namespace cugraph { namespace experimental { template -void GraphCompressedSparseBase::get_vertex_identifiers(VT *identifiers) const { - CUGRAPH_EXPECTS( offsets != nullptr , "No graph specified"); - cugraph::detail::sequence(GraphBase::number_of_vertices, identifiers); +void GraphBase::get_vertex_identifiers(VT *identifiers) const { + cugraph::detail::sequence(number_of_vertices, identifiers); } template @@ -28,7 +61,61 @@ void GraphCompressedSparseBase::get_source_indices(VT *src_indices) co cugraph::detail::offsets_to_indices(offsets, GraphBase::number_of_vertices, src_indices); } +template +void GraphCOO::degree(ET *degree, int x) const { + // + // NOTE: We assume offsets/indices are a CSR. If a CSC is passed + // in then x should be modified to reflect the expected direction. + // (e.g. if you have a CSC and you want in-degree (x=1) then pass + // the offsets/indices and request an out-degree (x=2)) + // + + // Calculates the degree of all vertices of the graph + // x = 0: in+out degree + // x = 1: in-degree + // x = 2: out-degree + + cudaStream_t stream{nullptr}; + + if (x != 1) { + degree_from_vertex_ids(GraphBase::number_of_edges, src_indices, degree, stream); + } + + if (x != 2) { + degree_from_vertex_ids(GraphBase::number_of_edges, dst_indices, degree, stream); + } +} + +template +void GraphCompressedSparseBase::degree(ET *degree, int x) const { + // + // NOTE: We assume offsets/indices are a CSR. If a CSC is passed + // in then x should be modified to reflect the expected direction. + // (e.g. if you have a CSC and you want in-degree (x=1) then pass + // the offsets/indices and request an out-degree (x=2)) + // + + // Calculates the degree of all vertices of the graph + // x = 0: in+out degree + // x = 1: in-degree + // x = 2: out-degree + + cudaStream_t stream{nullptr}; + + if (x != 1) { + degree_from_offsets(GraphBase::number_of_vertices, offsets, degree, stream); + } + + if (x != 2) { + degree_from_vertex_ids(GraphBase::number_of_edges, indices, degree, stream); + } +} + // explicit instantiation +template class GraphBase; +template class GraphBase; +template class GraphCOO; +template class GraphCOO; template class GraphCompressedSparseBase; template class GraphCompressedSparseBase; diff --git a/cpp/src/utilities/degree.cu b/cpp/src/utilities/degree.cu deleted file mode 100644 index b1d7452b062..00000000000 --- a/cpp/src/utilities/degree.cu +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2019, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include "utilities/error_utils.h" -#include "utilities/graph_utils.cuh" - -void degree_impl(int n, int e, gdf_column* col_ptr, gdf_column* degree, bool offsets) { - if(offsets == true) { - dim3 nthreads, nblocks; - nthreads.x = min(n, CUDA_MAX_KERNEL_THREADS); - nthreads.y = 1; - nthreads.z = 1; - nblocks.x = min((n + nthreads.x - 1) / nthreads.x, CUDA_MAX_BLOCKS); - nblocks.y = 1; - nblocks.z = 1; - - switch (col_ptr->dtype) { - case GDF_INT32: cugraph::detail::degree_offsets <<>>(n, e, static_cast(col_ptr->data), static_cast(degree->data));break; - default: CUGRAPH_FAIL("Unsupported data type"); - } - } - else { - dim3 nthreads, nblocks; - nthreads.x = min(e, CUDA_MAX_KERNEL_THREADS); - nthreads.y = 1; - nthreads.z = 1; - nblocks.x = min((e + nthreads.x - 1) / nthreads.x, CUDA_MAX_BLOCKS); - nblocks.y = 1; - nblocks.z = 1; - - switch (col_ptr->dtype) { - case GDF_INT32: cugraph::detail::degree_coo <<>>(n, e, static_cast(col_ptr->data), static_cast(degree->data));break; - default: CUGRAPH_FAIL("Unsupported data type"); - } - } - -} - -namespace cugraph { - -void degree(Graph *graph, gdf_column *degree, int x) { - // Calculates the degree of all vertices of the graph - // x = 0: in+out degree - // x = 1: in-degree - // x = 2: out-degree - CUGRAPH_EXPECTS(graph->adjList != nullptr || graph->transposedAdjList != nullptr, "Invalid API parameter"); - int n; - int e; - if(graph->adjList != nullptr) { - n = graph->adjList->offsets->size -1; - e = graph->adjList->indices->size; - } - else { - n = graph->transposedAdjList->offsets->size - 1; - e = graph->transposedAdjList->indices->size; - } - - if(x!=1) { - // Computes out-degree for x=0 and x=2 - if(graph->adjList) - degree_impl(n, e, graph->adjList->offsets, degree, true); - else - degree_impl(n, e, graph->transposedAdjList->indices, degree, false); - } - - if(x!=2) { - // Computes in-degree for x=0 and x=1 - if(graph->adjList) - degree_impl(n, e, graph->adjList->indices, degree, false); - else - degree_impl(n, e, graph->transposedAdjList->offsets, degree, true); - } -} -} \ No newline at end of file diff --git a/cpp/tests/centrality/katz_centrality_test.cu b/cpp/tests/centrality/katz_centrality_test.cu index 026781da338..e076a13e991 100644 --- a/cpp/tests/centrality/katz_centrality_test.cu +++ b/cpp/tests/centrality/katz_centrality_test.cu @@ -3,10 +3,12 @@ #include "gmock/gmock-generated-matchers.h" #include "high_res_clock.h" #include "cuda_profiler_api.h" -#include +#include #include "test_utils.h" #include #include +#include +#include std::vector getGoldenTopKIds(std::ifstream& fs_result, int k = 10) { @@ -21,30 +23,33 @@ getGoldenTopKIds(std::ifstream& fs_result, int k = 10) { } std::vector -getTopKIds(gdf_column_ptr katz, int k = 10) { - int count = katz.get()->size; +getTopKIds(double * p_katz, int count, int k = 10) { cudaStream_t stream = nullptr; rmm::device_vector id(count); thrust::sequence(rmm::exec_policy(stream)->on(stream), id.begin(), id.end()); - auto colptr = thrust::device_pointer_cast(static_cast(katz.get()->data)); thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), - colptr, colptr + count, id.begin(), thrust::greater()); + p_katz, + p_katz + count, + id.begin(), + thrust::greater()); std::vector topK(k); thrust::copy(id.begin(), id.begin() + k, topK.begin()); return topK; } -int -getMaxDegree(cugraph::Graph * G) { - cugraph::add_adj_list(G); - std::vector out_degree(G->numberOfVertices); - gdf_column_ptr col_out_degree = create_gdf_column(out_degree); - cugraph::degree(G, col_out_degree.get(), 2); - auto degreePtr = thrust::device_pointer_cast(static_cast(col_out_degree.get()->data)); - cudaStream_t stream = nullptr; - int max_out_degree = thrust::reduce(rmm::exec_policy(stream)->on(stream), - degreePtr, degreePtr + col_out_degree.get()->size, static_cast(-1), thrust::maximum()); - return max_out_degree; +template +int getMaxDegree(cugraph::experimental::GraphCSR const &g) { + cudaStream_t stream{nullptr}; + + rmm::device_vector degree_vector(g.number_of_vertices); + ET *p_degree = degree_vector.data().get(); + g.degree(p_degree, 2); + ET max_out_degree = thrust::reduce(rmm::exec_policy(stream)->on(stream), + p_degree, + p_degree + g.number_of_vertices, + static_cast(-1), + thrust::maximum()); + return max_out_degree; } typedef struct Katz_Usecase_t { @@ -72,7 +77,7 @@ typedef struct Katz_Usecase_t { } Katz_Usecase; class Tests_Katz : public ::testing::TestWithParam { - public: +public: Tests_Katz() {} static void SetupTestCase() {} static void TearDownTestCase() {} @@ -80,48 +85,47 @@ class Tests_Katz : public ::testing::TestWithParam { virtual void TearDown() {} void run_current_test(const Katz_Usecase& param) { - Graph_ptr G{new cugraph::Graph, Graph_deleter}; - gdf_column_ptr col_src, col_dest, col_katz_centrality; + FILE* fpin = fopen(param.matrix_file.c_str(),"r"); + ASSERT_NE(fpin, nullptr) << "fopen (" << param.matrix_file << ") failure."; - FILE* fpin = fopen(param.matrix_file.c_str(),"r"); - ASSERT_NE(fpin, nullptr) << "fopen (" << param.matrix_file << ") failure."; + std::ifstream fs_result(param.result_file); + ASSERT_EQ(fs_result.is_open(), true) << "file open (" << param.result_file << ") failure."; - std::ifstream fs_result(param.result_file); - ASSERT_EQ(fs_result.is_open(), true) << "file open (" << param.result_file << ") failure."; + int m, k; + int nnz; + MM_typecode mc; + ASSERT_EQ(mm_properties(fpin, 1, &mc, &m, &k, &nnz),0) << "could not read Matrix Market file properties"<< "\n"; + ASSERT_TRUE(mm_is_matrix(mc)); + ASSERT_TRUE(mm_is_coordinate(mc)); + ASSERT_FALSE(mm_is_complex(mc)); + ASSERT_FALSE(mm_is_skew(mc)); - int m, k; - int nnz; - MM_typecode mc; - ASSERT_EQ(mm_properties(fpin, 1, &mc, &m, &k, &nnz),0) << "could not read Matrix Market file properties"<< "\n"; - ASSERT_TRUE(mm_is_matrix(mc)); - ASSERT_TRUE(mm_is_coordinate(mc)); - ASSERT_FALSE(mm_is_complex(mc)); - ASSERT_FALSE(mm_is_skew(mc)); + // Allocate memory on host + std::vector cooRowInd(nnz), cooColInd(nnz); + std::vector cooVal(nnz); + std::vector katz_centrality(m); - // Allocate memory on host - std::vector cooRowInd(nnz), cooColInd(nnz); - std::vector cooVal(nnz); - std::vector katz_centrality(m); + // Read + ASSERT_EQ( (mm_to_coo(fpin, 1, nnz, &cooRowInd[0], &cooColInd[0], &cooVal[0], NULL)) , 0)<< "could not read matrix data"<< "\n"; + ASSERT_EQ(fclose(fpin),0); - // Read - ASSERT_EQ( (mm_to_coo(fpin, 1, nnz, &cooRowInd[0], &cooColInd[0], &cooVal[0], NULL)) , 0)<< "could not read matrix data"<< "\n"; - ASSERT_EQ(fclose(fpin),0); + CSR_Result result; + ConvertCOOtoCSR(&cooColInd[0], &cooRowInd[0], nnz, result); - // gdf columns - col_src = create_gdf_column(cooRowInd); - col_dest = create_gdf_column(cooColInd); - col_katz_centrality = create_gdf_column(katz_centrality); + cugraph::experimental::GraphCSR G(result.rowOffsets, result.colIndices, nullptr, m, nnz); - cugraph::edge_list_view(G.get(), col_src.get(), col_dest.get(), nullptr); - int max_out_degree = getMaxDegree(G.get()); - double alpha = 1/(static_cast(max_out_degree) + 1); + rmm::device_vector katz_vector(m); + double* d_katz = thrust::raw_pointer_cast(katz_vector.data()); + + int max_out_degree = getMaxDegree(G); + double alpha = 1/(static_cast(max_out_degree) + 1); - cugraph::katz_centrality(G.get(), col_katz_centrality.get(), alpha, 100, 1e-6, false, true); + cugraph::katz_centrality(G, d_katz, alpha, 100, 1e-6, false, true); - std::vector top10CUGraph = getTopKIds(std::move(col_katz_centrality)); - std::vector top10Golden = getGoldenTopKIds(fs_result); + std::vector top10CUGraph = getTopKIds(d_katz, m); + std::vector top10Golden = getGoldenTopKIds(fs_result); - EXPECT_THAT(top10CUGraph, ::testing::ContainerEq(top10Golden)); + EXPECT_THAT(top10CUGraph, ::testing::ContainerEq(top10Golden)); } }; diff --git a/python/cugraph/centrality/betweenness_centrality_wrapper.pyx b/python/cugraph/centrality/betweenness_centrality_wrapper.pyx index 664d26adefc..6582c8710a4 100644 --- a/python/cugraph/centrality/betweenness_centrality_wrapper.pyx +++ b/python/cugraph/centrality/betweenness_centrality_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/centrality/katz_centrality.pxd b/python/cugraph/centrality/katz_centrality.pxd index ff408e8e6e3..4b6855d1ba8 100644 --- a/python/cugraph/centrality/katz_centrality.pxd +++ b/python/cugraph/centrality/katz_centrality.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,14 +16,14 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph cimport * +from cugraph.structure.graph_new cimport * +from libcpp cimport bool +cdef extern from "algorithms.hpp" namespace "cugraph": -cdef extern from "cugraph.h" namespace "cugraph": - - cdef void katz_centrality( - Graph *graph, - gdf_column *katz_centrality, + cdef void katz_centrality[VT,ET,WT,result_t]( + const GraphCSR[VT,ET,WT] &graph, + result_t *katz_centrality, double alpha, int max_iter, double tol, diff --git a/python/cugraph/centrality/katz_centrality_wrapper.pyx b/python/cugraph/centrality/katz_centrality_wrapper.pyx index 17dc7b40e62..05491ff5539 100644 --- a/python/cugraph/centrality/katz_centrality_wrapper.pyx +++ b/python/cugraph/centrality/katz_centrality_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,15 +16,13 @@ # cython: embedsignature = True # cython: language_level = 3 -cimport cugraph.centrality.katz_centrality as c_katz -from cugraph.structure.graph cimport * +from cugraph.centrality.katz_centrality cimport katz_centrality as c_katz_centrality +from cugraph.structure.graph_new cimport * from cugraph.structure import graph_wrapper from cugraph.utilities.column_utils cimport * from cugraph.utilities.unrenumber import unrenumber from libcpp cimport bool from libc.stdint cimport uintptr_t -from libc.stdlib cimport calloc, malloc, free -from libc.float cimport FLT_MAX_EXP import cudf import cudf._lib as libcudf @@ -36,38 +34,28 @@ def katz_centrality(input_graph, alpha=0.1, max_iter=100, tol=1.0e-5, nstart=Non """ Call katz_centrality """ - cdef uintptr_t graph = graph_wrapper.allocate_cpp_graph() - cdef Graph * g = graph + if not input_graph.adjlist: + input_graph.view_adj_list() - if input_graph.adjlist: - [offsets, indices] = graph_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) - [weights] = graph_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) - graph_wrapper.add_adj_list(graph, offsets, indices, weights) - else: - [src, dst] = graph_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) - if input_graph.edgelist.weights: - [weights] = graph_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) - graph_wrapper.add_edge_list(graph, src, dst, weights) - else: - graph_wrapper.add_edge_list(graph, src, dst) - add_adj_list(g) - offsets, indices, values = graph_wrapper.get_adj_list(graph) - input_graph.adjlist = input_graph.AdjList(offsets, indices, values) + [offsets, indices] = graph_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) - # we should add get_number_of_vertices() to Graph (and this should be - # used instead of g.adjList.offsets.size - 1) - num_verts = g.adjList.offsets.size - 1 + num_verts = input_graph.number_of_vertices() + num_edges = len(indices) df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex']) - df['katz_centrality'] = cudf.Series(np.zeros(num_verts, dtype=np.float64)) - cdef bool has_guess = 0 - if nstart is not None: + has_guess = False + + if nstart is None: + df['katz_centrality'] = cudf.Series(np.zeros(num_verts, dtype=np.float64)) + else: + has_guess = True if len(nstart) != num_verts: raise ValueError('nstart must have initial guess for all vertices') + nstart = graph_wrapper.datatype_cast([nstart], [np.float64]) + if input_graph.renumbered is True: renumber_series = cudf.Series(input_graph.edgelist.renumber_map.index, index=input_graph.edgelist.renumber_map) @@ -79,11 +67,18 @@ def katz_centrality(input_graph, alpha=0.1, max_iter=100, tol=1.0e-5, nstart=Non df['katz_centrality'] = cudf.Series(cudf._lib.copying.scatter(nstart['values']._column, nstart['vertex']._column, df['katz_centrality']._column)) - has_guess = 1 - g.adjList.get_vertex_identifiers(&c_identifier_col) - cdef gdf_column c_katz_centrality_col = get_gdf_column_view(df['katz_centrality']) - c_katz.katz_centrality(g, &c_katz_centrality_col, alpha, max_iter, tol, has_guess, normalized) + cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0] + cdef uintptr_t c_katz = df['katz_centrality'].__cuda_array_interface__['data'][0] + cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] + cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] + + cdef GraphCSR[int,int,float] graph + graph = GraphCSR[int,int,float](c_offsets, c_indices, NULL, num_verts, num_edges) + + c_katz_centrality[int,int,float,double](graph, c_katz, alpha, max_iter, tol, has_guess, normalized) + + graph.get_vertex_identifiers(c_identifier) if input_graph.renumbered: df = unrenumber(input_graph.edgelist.renumber_map, df, 'vertex') diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index b3442d9d36f..bcbbdb23428 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -12,6 +12,7 @@ # limitations under the License. from cugraph.structure import graph_wrapper +from cugraph.structure import graph_new_wrapper from cugraph.structure.symmetrize import symmetrize from cugraph.structure.renumber import renumber as rnb from cugraph.structure.renumber import renumber_from_cudf as multi_rnb @@ -607,7 +608,7 @@ def degrees(self, vertex_subset=None): >>> G.add_edge_list(sources, destinations, None) >>> df = G.degrees([0,9,12]) """ - vertex_col, in_degree_col, out_degree_col = graph_wrapper._degrees( + vertex_col, in_degree_col, out_degree_col = graph_new_wrapper._degrees( self) df = cudf.DataFrame() @@ -640,15 +641,10 @@ def degrees(self, vertex_subset=None): np.asarray([out_degree_col[i] for i in vertex_subset], dtype=np.int32)) - # is this necessary??? - del vertex_col - del in_degree_col - del out_degree_col - return df def _degree(self, vertex_subset, x=0): - vertex_col, degree_col = graph_wrapper._degree(self, x) + vertex_col, degree_col = graph_new_wrapper._degree(self, x) df = cudf.DataFrame() if vertex_subset is None: @@ -673,9 +669,6 @@ def _degree(self, vertex_subset, x=0): df['degree'] = cudf.Series(np.asarray( [degree_col[i] for i in vertex_subset], dtype=np.int32 )) - # is this necessary??? - del vertex_col - del degree_col return df diff --git a/python/cugraph/structure/graph_new.pxd b/python/cugraph/structure/graph_new.pxd index ed39bb3a598..0b8393bf21b 100644 --- a/python/cugraph/structure/graph_new.pxd +++ b/python/cugraph/structure/graph_new.pxd @@ -38,6 +38,11 @@ cdef extern from "graph.hpp" namespace "cugraph::experimental": GraphProperties prop VT number_of_vertices ET number_of_edges + + void get_vertex_identifiers(VT *) const + + void degree(ET *,int) + GraphBase(WT*,VT,ET) cdef cppclass GraphCOO[VT,ET,WT](GraphBase[VT,ET,WT]): @@ -50,7 +55,6 @@ cdef extern from "graph.hpp" namespace "cugraph::experimental": const VT *offsets const VT *indices - void get_vertex_identifiers(VT *) const void get_source_indices(VT *) const GraphCompressedSparseBase(const VT *, const ET *, const WT *, size_t, size_t) diff --git a/python/cugraph/structure/graph_new_wrapper.pyx b/python/cugraph/structure/graph_new_wrapper.pyx index 08da0b2a7bf..5257dc973dc 100644 --- a/python/cugraph/structure/graph_new_wrapper.pyx +++ b/python/cugraph/structure/graph_new_wrapper.pyx @@ -16,52 +16,92 @@ # cython: embedsignature = True # cython: language_level = 3 -cimport cugraph.structure.graph_new as c_graph +from cugraph.structure.graph_new cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t -from libc.stdlib cimport calloc, malloc, free +import cudf import rmm import numpy as np +def datatype_cast(cols, dtypes): + cols_out = [] + for col in cols: + if col is None or col.dtype.type in dtypes: + cols_out.append(col) + else: + cols_out.append(col.astype(dtypes[0])) + return cols_out -""" -cdef cppclass GraphBase[WT]: - cdef GraphBase[WT] c_base +def _degree_coo(src, dst, x=0): + # + # Computing the degree of the input graph from COO + # + [src, dst] = datatype_cast([src, dst], [np.int32]) - def __cinit__(self, WT const *edge_data, size_t number_of_vertices, size_t number_of_edges): - self.c_base = GraphBase(edge_data, number_of_vertices, number_of_edges) + num_verts = 1 + max(src.max(), dst.max()) + num_edges = len(src) -cdef cppclass GraphCOO[VT,WT]: - cdef GraphCOO c_base + vertex_col = cudf.Series(np.zeros(num_verts, dtype=np.int32)) + degree_col = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - def __cinit__(self): - self.c_base = GraphCOO() - - def __cinit__(self, VT const *src_indices, VT const *dst_indices, WT const *edge_data, size_t number_of_vertices, size_t number_of_edges): - self.c_base = GraphCOO(src_indices, dst_indices, edge_data, number_of_vertices, number_of_edges) + cdef GraphCOO[int,int,float] graph -cdef cppclass GraphCSRBase[VT,WT]: - cdef GraphCSRBase c_base + cdef uintptr_t c_vertex = vertex_col.__cuda_array_interface__['data'][0] + cdef uintptr_t c_degree = degree_col.__cuda_array_interface__['data'][0] + cdef uintptr_t c_src = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst = dst.__cuda_array_interface__['data'][0] - def __cinit__(self, VT const *src_indices, VT const *dst_indices, WT const *edge_data, size_t number_of_vertices, size_t number_of_edges): - self.c_base = GraphCSRBase(src_indices, dst_indices, edge_data, number_of_vertices, number_of_edges) + graph = GraphCOO[int,int,float](c_src, c_dst, NULL, num_verts, num_edges) + graph.degree( c_degree, x) + graph.get_vertex_identifiers(c_vertex) -cdef cppclass GraphCSR[VT,WT]: - cdef GraphCSR c_base + return vertex_col, degree_col - def __cinit__(self): - self.c_base = GraphCSR() - - def __cinit__(self, VT const *src_indices, VT const *dst_indices, WT const *edge_data, size_t number_of_vertices, size_t number_of_edges): - self.c_base = GraphCSR(src_indices, dst_indices, edge_data, number_of_vertices, number_of_edges) -cdef cppclass GraphCSC[VT,WT]: - cdef GraphCSC c_base +def _degree_csr(offsets, indices, x=0): + [offsets, indices] = datatype_cast([offsets, indices], [np.int32]) - def __cinit__(self): - self.c_base = GraphCSC() - - def __cinit__(self, VT const *src_indices, VT const *dst_indices, WT const *edge_data, size_t number_of_vertices, size_t number_of_edges): - self.c_base = GraphCSC(src_indices, dst_indices, edge_data, number_of_vertices, number_of_edges) -""" + num_verts = len(offsets)-1 + num_edges = len(indices) + + vertex_col = cudf.Series(np.zeros(num_verts, dtype=np.int32)) + degree_col = cudf.Series(np.zeros(num_verts, dtype=np.int32)) + + cdef GraphCSR[int,int,float] graph + + cdef uintptr_t c_vertex = vertex_col.__cuda_array_interface__['data'][0] + cdef uintptr_t c_degree = degree_col.__cuda_array_interface__['data'][0] + cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] + cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] + + graph = GraphCSR[int,int,float](c_offsets, c_indices, NULL, num_verts, num_edges) + graph.degree( c_degree, x) + graph.get_vertex_identifiers(c_vertex) + + return vertex_col, degree_col + + +def _degree(input_graph, x=0): + transpose_x = { 0: 0, 1: 2, 2:1 } + + if input_graph.adjlist is not None: + return _degree_csr(input_graph.adjlist.offsets, input_graph.adjlist.indices, x) + + if input_graph.transposedadjlist is not None: + return _degree_csr(input_graph.transposedadjlist.offsets, + input_graph.transposedadjlist.indices, + transpose_x(x)) + + if input_graph.edgelist is not None: + return _degree_coo(input_graph.edgelist.edgelist_df['src'], + input_graph.edgelist.edgelist_df['dst'], + x) + + raise Exception("input_graph not COO, CSR or CSC") + +def _degrees(input_graph): + verts, indegrees = _degree(input_graph, 1) + verts, outdegrees = _degree(input_graph, 2) + + return verts, indegrees, outdegrees diff --git a/python/cugraph/structure/graph_wrapper.pyx b/python/cugraph/structure/graph_wrapper.pyx index 16e0c99b84b..baaa6fd2383 100644 --- a/python/cugraph/structure/graph_wrapper.pyx +++ b/python/cugraph/structure/graph_wrapper.pyx @@ -385,6 +385,7 @@ def number_of_edges(graph_ptr): return 0 +""" def _degree(input_graph, x=0): cdef uintptr_t graph = allocate_cpp_graph() cdef Graph * g = graph @@ -454,3 +455,4 @@ def _degrees(input_graph): c_graph.degree(g, &c_out_degree_col, 2) return vertex_col, in_degree_col, out_degree_col +""" From 3e6ded9908ab4279d914290078adfae693d86261 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Tue, 24 Mar 2020 10:05:38 -0400 Subject: [PATCH 02/40] missed a file --- cpp/src/utilities/cuda_utils.cuh | 64 ++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 cpp/src/utilities/cuda_utils.cuh diff --git a/cpp/src/utilities/cuda_utils.cuh b/cpp/src/utilities/cuda_utils.cuh new file mode 100644 index 00000000000..fe581af914d --- /dev/null +++ b/cpp/src/utilities/cuda_utils.cuh @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +namespace cugraph { +// +// This should go into RAFT... +// +__device__ static __forceinline__ int64_t atomicMin(int64_t* addr, int64_t val) { + unsigned long long *addr_as_ull{reinterpret_cast(addr)}; + unsigned long long *val_addr_as_ull{reinterpret_cast(&val)}; + unsigned long long old = *addr_as_ull; + unsigned long long val_as_ull = *val_addr_as_ull; + int64_t *p_old{reinterpret_cast(&old)}; + unsigned long long expected; + + do { + expected = old; + old = ::atomicCAS(addr_as_ull, + expected, + thrust::min(val_as_ull, expected)); + } while (expected != old); + return *p_old; +} + +__device__ static __forceinline__ int32_t atomicMin(int32_t* addr, int32_t val) { + return ::atomicMin(addr, val); +} + +__device__ static __forceinline__ int64_t atomicAdd(int64_t* addr, int64_t val) { + unsigned long long *addr_as_ull{reinterpret_cast(addr)}; + unsigned long long *val_addr_as_ull{reinterpret_cast(&val)}; + unsigned long long old = *addr_as_ull; + unsigned long long val_as_ull = *val_addr_as_ull; + int64_t *p_old{reinterpret_cast(&old)}; + unsigned long long expected; + + do { + expected = old; + old = ::atomicCAS(addr_as_ull, + expected, + (expected + val_as_ull)); + } while (expected != old); + return *p_old; +} + +__device__ static __forceinline__ int32_t atomicAdd(int32_t* addr, int32_t val) { + return ::atomicAdd(addr, val); +} + +} //namespace cugraph From ce791420ad311bc914a4fbbadb770e0ba6268cd5 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Tue, 24 Mar 2020 14:05:45 -0400 Subject: [PATCH 03/40] rework two_hop_neighbors to use GraphCSR --- cpp/include/algorithms.hpp | 51 +++-- cpp/include/functions.h | 16 -- cpp/src/traversal/two_hop_neighbors.cu | 174 ++++++------------ cpp/src/traversal/two_hop_neighbors.cuh | 169 +++++++++-------- python/cugraph/structure/graph.py | 2 +- python/cugraph/structure/graph_new.pxd | 8 + .../cugraph/structure/graph_new_wrapper.pyx | 42 +++++ python/cugraph/structure/graph_wrapper.pyx | 45 ----- 8 files changed, 229 insertions(+), 278 deletions(-) diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index aa27850c227..bd630856a3c 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -34,7 +34,7 @@ namespace cugraph { * @tparam ET Type of edge identifiers. Supported value : int (signed, 32-bit) * @tparam WT Type of edge weights. Supported value : float or double. * - * @param[in] graph cuGRAPH graph descriptor, should contain the connectivity information as a transposed adjacency list (CSR). Edge weights are not used for this algorithm. + * @param[in] graph cuGRAPH graph descriptor, should contain the connectivity information as a transposed adjacency list (CSC). Edge weights are not used for this algorithm. * @param[in] alpha The damping factor alpha represents the probability to follow an outgoing edge, standard value is 0.85. Thus, 1.0-alpha is the probability to “teleport” to a random vertex. Alpha should be greater than 0.0 and strictly lower than 1.0. * The initial guess must not be the vector of 0s. Any value other than 1 or 0 is treated as an invalid value. @@ -309,21 +309,20 @@ template void core_number(experimental::GraphCSR const &graph, VT *core_number); /** - * @Synopsis Compute K Core of the graph G - * - * @Param[in] *in_graph cuGRAPH graph descriptor with a valid edgeList or adjList - * - * @Param[in] k Order of the core. This value must not be negative. - * - * @Param[in] *vertex_id User specified vertex identifiers for which core number values are supplied - * - * @Param[in] *core_number User supplied core number values corresponding to vertex_id - * - * @Param[out] *out_graph K Core subgraph - * + * @brief Compute K Core of the graph G + * * @throws cugraph::logic_error when an error occurs. + * + * @tparam VT Type of vertex identifiers. Supported value : int (signed, 32-bit) + * @tparam ET Type of edge identifiers. Supported value : int (signed, 32-bit) + * @tparam WT Type of edge weights. Supported values : float or double. + * + * @param[in] graph cuGRAPH graph descriptor with a valid edgeList or adjList + * @param[in] k Order of the core. This value must not be negative. + * @param[in] vertex_id User specified vertex identifiers for which core number values are supplied + * @param[in] core_number User supplied core number values corresponding to vertex_id + * @param[out] out_graph K Core subgraph */ -/* ----------------------------------------------------------------------------*/ template void k_core(experimental::GraphCSR const &graph, int k, @@ -331,4 +330,28 @@ void k_core(experimental::GraphCSR const &graph, VT *core_number, experimental::GraphCOO &out_graph); +/** + * @brief Find all 2-hop neighbors in the graph + * + * Find pairs of vertices in the input graph such that each pair is connected by + * a path that is two hops in length. + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam VT Type of vertex identifiers. Supported value : int (signed, 32-bit) + * @tparam ET Type of edge identifiers. Supported value : int (signed, 32-bit) + * @tparam WT Type of edge weights. Supported values : float or double. + * + * @param[in] graph The input graph object + * @param[out] first Upon return will be a device pointer pointing to an array containing + * the first entry of each result pair. + * @param[out] second Upon return will be a device pointer pointing to an array containing + * the second entry of each result pair. + * @return The number of pairs + */ +template +ET get_two_hop_neighbors(experimental::GraphCSR const &graph, + VT **first, + VT **second); + } //namespace cugraph diff --git a/cpp/include/functions.h b/cpp/include/functions.h index 6d64655f45f..a39be4c53a0 100644 --- a/cpp/include/functions.h +++ b/cpp/include/functions.h @@ -182,22 +182,6 @@ void delete_edge_list(Graph* graph); /* ----------------------------------------------------------------------------*/ void delete_transposed_adj_list(Graph* graph); -/** - * @Synopsis Find pairs of vertices in the input graph such that each pair is connected by - * a path that is two hops in length. - * - * @param[in]* graph in : graph descriptor with graph->adjList pointing to a gdf_adj_list structure - * - * @param[out] first out : An uninitialized gdf_column which will be initialized to contain the - * first entry of each result pair. - * @param[out] second out : An uninitialized gdf_column which will be initialized to contain the - * second entry of each result pair. - * - * @throws cugraph::logic_error when an error occurs. - */ -/* ----------------------------------------------------------------------------*/ -void get_two_hop_neighbors(Graph* graph, gdf_column* first, gdf_column* second); - /** * @Synopsis Single node Multi GPU CSR sparse matrix multiply, x=Ax. * Should be called in an omp parallel section with one thread per device. diff --git a/cpp/src/traversal/two_hop_neighbors.cu b/cpp/src/traversal/two_hop_neighbors.cu index 3321481e670..9e7628331ed 100644 --- a/cpp/src/traversal/two_hop_neighbors.cu +++ b/cpp/src/traversal/two_hop_neighbors.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ * @file two_hop_neighbors.cu * ---------------------------------------------------------------------------**/ +#include +#include #include "two_hop_neighbors.cuh" #include "utilities/error_utils.h" #include @@ -29,154 +31,96 @@ #include namespace cugraph{ -namespace detail{ -template -void get_two_hop_neighbors_impl(IndexType num_verts, - IndexType* offsets, - IndexType* indices, - IndexType** first, - IndexType** second, - IndexType& outputSize) { - // Get the number of edges from the adjacency representation - IndexType num_edges; - cudaMemcpy(&num_edges, &offsets[num_verts], sizeof(IndexType), cudaMemcpyDefault); - cudaStream_t stream {nullptr}; +template +ET get_two_hop_neighbors(experimental::GraphCSR const &graph, + VT **first, + VT **second) { - // Allocate memory for temporary stuff - IndexType *exsum_degree = nullptr; - IndexType *first_pair = nullptr; - IndexType *second_pair = nullptr; - IndexType *block_bucket_offsets = nullptr; + cudaStream_t stream {nullptr}; - ALLOC_TRY(&exsum_degree, sizeof(IndexType) * (num_edges + 1), stream); + rmm::device_vector exsum_degree(graph.number_of_edges + 1); + ET *d_exsum_degree = exsum_degree.data().get(); // Find the degree of the out vertex of each edge - degree_iterator deg_it(offsets); - deref_functor, IndexType> deref(deg_it); - thrust::fill(rmm::exec_policy(stream)->on(stream), exsum_degree, exsum_degree + 1, 0); + degree_iterator deg_it(graph.offsets); + deref_functor, ET> deref(deg_it); + exsum_degree[0] = ET{0}; thrust::transform(rmm::exec_policy(stream)->on(stream), - indices, - indices + num_edges, - exsum_degree + 1, - deref); + graph.indices, + graph.indices + graph.number_of_edges, + d_exsum_degree + 1, + deref); // Take the inclusive sum of the degrees thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream), - exsum_degree + 1, - exsum_degree + num_edges + 1, - exsum_degree + 1); + d_exsum_degree + 1, + d_exsum_degree + graph.number_of_edges + 1, + d_exsum_degree + 1); // Copy out the last value to get the size of scattered output - IndexType output_size; - cudaMemcpy(&output_size, &exsum_degree[num_edges], sizeof(IndexType), cudaMemcpyDefault); + ET output_size = exsum_degree[graph.number_of_edges]; // Allocate memory for the scattered output - ALLOC_TRY(&second_pair, sizeof(IndexType) * output_size, stream); - ALLOC_TRY(&first_pair, sizeof(IndexType) * output_size, stream); + rmm::device_vector first_pair(output_size); + rmm::device_vector second_pair(output_size); + + VT *d_first_pair = first_pair.data().get(); + VT *d_second_pair = second_pair.data().get(); // Figure out number of blocks and allocate memory for block bucket offsets - IndexType num_blocks = (output_size + TWO_HOP_BLOCK_SIZE - 1) / TWO_HOP_BLOCK_SIZE; - ALLOC_TRY(&block_bucket_offsets, sizeof(IndexType) * (num_blocks + 1), stream); + ET num_blocks = (output_size + TWO_HOP_BLOCK_SIZE - 1) / TWO_HOP_BLOCK_SIZE; + rmm::device_vector block_bucket_offsets(num_blocks+1); + + ET *d_block_bucket_offsets = block_bucket_offsets.data().get(); // Compute the block bucket offsets dim3 grid, block; block.x = 512; - grid.x = min((IndexType) MAXBLOCKS, (num_blocks / 512) + 1); - compute_bucket_offsets_kernel<<>>(exsum_degree, - block_bucket_offsets, - num_edges, + grid.x = min((ET) MAXBLOCKS, (num_blocks / 512) + 1); + compute_bucket_offsets_kernel<<>>(d_exsum_degree, + d_block_bucket_offsets, + graph.number_of_edges, output_size); - cudaMemcpy(&block_bucket_offsets[num_blocks], &num_edges, sizeof(IndexType), cudaMemcpyDefault); + + block_bucket_offsets[num_blocks] = graph.number_of_edges; // Scatter the expanded edge lists into temp space - grid.x = min((IndexType) MAXBLOCKS, num_blocks); - scatter_expand_kernel<<>>(exsum_degree, - indices, - offsets, - block_bucket_offsets, - num_verts, + grid.x = min((ET) MAXBLOCKS, num_blocks); + scatter_expand_kernel<<>>(d_exsum_degree, + graph.indices, + graph.offsets, + d_block_bucket_offsets, + graph.number_of_vertices, output_size, num_blocks, - first_pair, - second_pair); + d_first_pair, + d_second_pair); + // TODO: This would be faster in a hash table (no sorting), unless there's + // some reason that the result has to be sorted // Remove duplicates and self pairings - auto tuple_start = thrust::make_zip_iterator(thrust::make_tuple(first_pair, second_pair)); + auto tuple_start = thrust::make_zip_iterator(thrust::make_tuple(d_first_pair, d_second_pair)); auto tuple_end = tuple_start + output_size; thrust::sort(rmm::exec_policy(stream)->on(stream), tuple_start, tuple_end); tuple_end = thrust::copy_if(rmm::exec_policy(stream)->on(stream), tuple_start, tuple_end, tuple_start, - self_loop_flagger()); + self_loop_flagger()); tuple_end = thrust::unique(rmm::exec_policy(stream)->on(stream), tuple_start, tuple_end); // Get things ready to return - outputSize = tuple_end - tuple_start; - ALLOC_TRY(first, sizeof(IndexType) * outputSize, nullptr); - ALLOC_TRY(second, sizeof(IndexType) * outputSize, nullptr); - cudaMemcpy(*first, first_pair, sizeof(IndexType) * outputSize, cudaMemcpyDefault); - cudaMemcpy(*second, second_pair, sizeof(IndexType) * outputSize, cudaMemcpyDefault); - - // Free up temporary stuff - ALLOC_FREE_TRY(exsum_degree, nullptr); - ALLOC_FREE_TRY(first_pair, nullptr); - ALLOC_FREE_TRY(second_pair, nullptr); - ALLOC_FREE_TRY(block_bucket_offsets, nullptr); - - -} + ET outputSize = tuple_end - tuple_start; + + ALLOC_TRY(first, sizeof(VT) * outputSize, nullptr); + ALLOC_TRY(second, sizeof(VT) * outputSize, nullptr); + cudaMemcpy(*first, d_first_pair, sizeof(VT) * outputSize, cudaMemcpyDefault); + cudaMemcpy(*second, d_second_pair, sizeof(VT) * outputSize, cudaMemcpyDefault); -} //namespace - -void get_two_hop_neighbors(Graph* graph, gdf_column* first, gdf_column* second) { - CHECK_GRAPH(graph) - CUGRAPH_EXPECTS(first != nullptr, "Invalid API parameter: first column is NULL"); - CUGRAPH_EXPECTS(second != nullptr, "Invalid API parameter: second column is NULL"); - - size_t num_verts = graph->adjList->offsets->size - 1; - switch (graph->adjList->offsets->dtype) { - case GDF_INT32: { - int32_t* first_ptr; - int32_t* second_ptr; - int32_t outputSize; - detail::get_two_hop_neighbors_impl((int32_t) num_verts, - (int32_t*) graph->adjList->offsets->data, - (int32_t*) graph->adjList->indices->data, - &first_ptr, - &second_ptr, - outputSize); - first->data = first_ptr; - first->dtype = GDF_INT32; - first->size = outputSize; - second->data = second_ptr; - second->dtype = GDF_INT32; - second->size = outputSize; - break; - } - case GDF_INT64: { - int64_t* first_ptr; - int64_t* second_ptr; - int64_t outputSize; - detail::get_two_hop_neighbors_impl((int64_t) num_verts, - (int64_t*) graph->adjList->offsets->data, - (int64_t*) graph->adjList->indices->data, - &first_ptr, - &second_ptr, - outputSize); - first->data = first_ptr; - first->dtype = GDF_INT64; - first->size = outputSize; - second->data = second_ptr; - second->dtype = GDF_INT64; - second->size = outputSize; - break; - } - default: - CUGRAPH_FAIL("Unsupported data type"); - } - - + return outputSize; } -} //namespace + +template int get_two_hop_neighbors(experimental::GraphCSR const &, int **, int **); + +} //namespace cugraph diff --git a/cpp/src/traversal/two_hop_neighbors.cuh b/cpp/src/traversal/two_hop_neighbors.cuh index 11bc9178246..31b7a7fa885 100644 --- a/cpp/src/traversal/two_hop_neighbors.cuh +++ b/cpp/src/traversal/two_hop_neighbors.cuh @@ -25,111 +25,106 @@ #define MAXBLOCKS 65535 #define TWO_HOP_BLOCK_SIZE 512 -template +template struct degree_iterator { - IndexType* offsets; - degree_iterator(IndexType* _offsets) : - offsets(_offsets) { - } - - __host__ __device__ - IndexType operator[](IndexType place) { - return offsets[place + 1] - offsets[place]; - } + edge_t const * offsets; + degree_iterator(edge_t const* _offsets): offsets(_offsets) { + } + + __host__ __device__ edge_t operator[](edge_t place) { + return offsets[place + 1] - offsets[place]; + } }; -template +template struct deref_functor { - It iterator; - deref_functor(It it) : - iterator(it) { - } - - __host__ __device__ - IndexType operator()(IndexType in) { - return iterator[in]; - } + It iterator; + deref_functor(It it): iterator(it) { + } + + __host__ __device__ edge_t operator()(edge_t in) { + return iterator[in]; + } }; template struct self_loop_flagger { - __host__ __device__ - bool operator()(const thrust::tuple pair) { - if (thrust::get<0>(pair) == thrust::get<1>(pair)) - return false; - return true; - } + __host__ __device__ + bool operator()(const thrust::tuple pair) { + if (thrust::get<0>(pair) == thrust::get<1>(pair)) + return false; + return true; + } }; template __device__ IndexType binsearch_maxle(const IndexType *vec, - const IndexType val, - IndexType low, - IndexType high) { - while (true) { - if (low == high) - return low; //we know it exists - if ((low + 1) == high) - return (vec[high] <= val) ? high : low; - - IndexType mid = low + (high - low) / 2; - - if (vec[mid] > val) - high = mid - 1; - else - low = mid; - } + const IndexType val, + IndexType low, + IndexType high) { + while (true) { + if (low == high) + return low; //we know it exists + if ((low + 1) == high) + return (vec[high] <= val) ? high : low; + + IndexType mid = low + (high - low) / 2; + + if (vec[mid] > val) + high = mid - 1; + else + low = mid; + } } template __global__ void compute_bucket_offsets_kernel(const IndexType *frontier_degrees_exclusive_sum, - IndexType *bucket_offsets, - const IndexType frontier_size, - IndexType total_degree) { - IndexType end = ((total_degree - 1 + TWO_HOP_BLOCK_SIZE) / TWO_HOP_BLOCK_SIZE); - - for (IndexType bid = blockIdx.x * blockDim.x + threadIdx.x; - bid <= end; - bid += gridDim.x * blockDim.x) { - - IndexType eid = min(bid * TWO_HOP_BLOCK_SIZE, total_degree - 1); - - bucket_offsets[bid] = binsearch_maxle(frontier_degrees_exclusive_sum, - eid, - (IndexType) 0, - frontier_size - 1); - - } + IndexType *bucket_offsets, + const IndexType frontier_size, + IndexType total_degree) { + IndexType end = ((total_degree - 1 + TWO_HOP_BLOCK_SIZE) / TWO_HOP_BLOCK_SIZE); + + for (IndexType bid = blockIdx.x * blockDim.x + threadIdx.x; + bid <= end; + bid += gridDim.x * blockDim.x) { + + IndexType eid = min(bid * TWO_HOP_BLOCK_SIZE, total_degree - 1); + + bucket_offsets[bid] = binsearch_maxle(frontier_degrees_exclusive_sum, + eid, + (IndexType) 0, + frontier_size - 1); + } } template __global__ void scatter_expand_kernel(const IndexType *exsum_degree, - const IndexType *indices, - const IndexType *offsets, - const IndexType *bucket_offsets, - IndexType num_verts, - IndexType max_item, - IndexType max_block, - IndexType *output_first, - IndexType *output_second) { - __shared__ IndexType blockRange[2]; - for (IndexType bid = blockIdx.x; bid < max_block; bid += gridDim.x) { - // Copy the start and end of the buckets range into shared memory - if (threadIdx.x == 0) { - blockRange[0] = bucket_offsets[bid]; - blockRange[1] = bucket_offsets[bid + 1]; - } - __syncthreads(); - - // Get the global thread id (for this virtual block) - IndexType tid = bid * blockDim.x + threadIdx.x; - if (tid < max_item) { - IndexType sourceIdx = binsearch_maxle(exsum_degree, tid, blockRange[0], blockRange[1]); - IndexType sourceId = indices[sourceIdx]; - IndexType itemRank = tid - exsum_degree[sourceIdx]; - output_second[tid] = indices[offsets[sourceId] + itemRank]; - IndexType baseSourceId = binsearch_maxle(offsets, sourceIdx, (IndexType)0, num_verts); - output_first[tid] = baseSourceId; - } - } + const IndexType *indices, + const IndexType *offsets, + const IndexType *bucket_offsets, + IndexType num_verts, + IndexType max_item, + IndexType max_block, + IndexType *output_first, + IndexType *output_second) { + __shared__ IndexType blockRange[2]; + for (IndexType bid = blockIdx.x; bid < max_block; bid += gridDim.x) { + // Copy the start and end of the buckets range into shared memory + if (threadIdx.x == 0) { + blockRange[0] = bucket_offsets[bid]; + blockRange[1] = bucket_offsets[bid + 1]; + } + __syncthreads(); + + // Get the global thread id (for this virtual block) + IndexType tid = bid * blockDim.x + threadIdx.x; + if (tid < max_item) { + IndexType sourceIdx = binsearch_maxle(exsum_degree, tid, blockRange[0], blockRange[1]); + IndexType sourceId = indices[sourceIdx]; + IndexType itemRank = tid - exsum_degree[sourceIdx]; + output_second[tid] = indices[offsets[sourceId] + itemRank]; + IndexType baseSourceId = binsearch_maxle(offsets, sourceIdx, (IndexType)0, num_verts); + output_first[tid] = baseSourceId; + } + } } diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index bcbbdb23428..661f8914292 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -407,7 +407,7 @@ def get_two_hop_neighbors(self): df['second'] : cudf.Series the second vertex id of a pair. """ - df = graph_wrapper.get_two_hop_neighbors(self) + df = graph_new_wrapper.get_two_hop_neighbors(self) if self.renumbered is True: if isinstance(self.edgelist.renumber_map, cudf.DataFrame): n_cols = len(self.edgelist.renumber_map.columns) - 1 diff --git a/python/cugraph/structure/graph_new.pxd b/python/cugraph/structure/graph_new.pxd index 0b8393bf21b..a43d5376799 100644 --- a/python/cugraph/structure/graph_new.pxd +++ b/python/cugraph/structure/graph_new.pxd @@ -66,3 +66,11 @@ cdef extern from "graph.hpp" namespace "cugraph::experimental": cdef cppclass GraphCSC[VT,ET,WT](GraphCompressedSparseBase[VT,ET,WT]): GraphCSC() GraphCSC(const VT *, const ET *, const WT *, size_t, size_t) + + +cdef extern from "algorithms.hpp" namespace "cugraph": + + cdef ET get_two_hop_neighbors[VT,ET,WT]( + const GraphCSR[VT, ET, WT] &graph, + VT **first, + VT **second) except + diff --git a/python/cugraph/structure/graph_new_wrapper.pyx b/python/cugraph/structure/graph_new_wrapper.pyx index 5257dc973dc..1c73f8ba868 100644 --- a/python/cugraph/structure/graph_new_wrapper.pyx +++ b/python/cugraph/structure/graph_new_wrapper.pyx @@ -17,6 +17,7 @@ # cython: language_level = 3 from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_new cimport get_two_hop_neighbors as c_get_two_hop_neighbors from libcpp cimport bool from libc.stdint cimport uintptr_t @@ -105,3 +106,44 @@ def _degrees(input_graph): verts, outdegrees = _degree(input_graph, 2) return verts, indegrees, outdegrees + + +def get_two_hop_neighbors(input_graph): + cdef GraphCSR[int,int,float] graph + + offsets = None + indices = None + transposed = False + + if input_graph.adjlist: + [offsets, indices] = datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + elif input_graph.transposedadjlist: + [offsets, indices] = datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + transposed = True + else: + input_graph.view_adj_list() + [offsets, indices] = datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + + + cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] + cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] + cdef uintptr_t c_first = NULL + cdef uintptr_t c_second = NULL + + num_verts = input_graph.number_of_vertices() + num_edges = len(indices) + + graph = GraphCSR[int,int,float](c_offsets, c_indices, NULL, num_verts, num_edges) + + count = c_get_two_hop_neighbors(graph, &c_first, &c_second) + + df = cudf.DataFrame() + df['first'] = rmm.device_array_from_ptr(c_first, + nelem=count, + dtype=np.int32) + df['second'] = rmm.device_array_from_ptr(c_second, + nelem=count, + dtype=np.int32) + + return df + diff --git a/python/cugraph/structure/graph_wrapper.pyx b/python/cugraph/structure/graph_wrapper.pyx index baaa6fd2383..319f4aec36c 100644 --- a/python/cugraph/structure/graph_wrapper.pyx +++ b/python/cugraph/structure/graph_wrapper.pyx @@ -313,51 +313,6 @@ def get_transposed_adj_list(graph_ptr): return offset_col, index_col, value_col -def get_two_hop_neighbors(input_graph): - cdef uintptr_t graph = allocate_cpp_graph() - cdef Graph * g = graph - - if input_graph.adjlist: - [offsets, indices] = datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) - [weights] = datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) - add_adj_list(graph, offsets, indices, weights) - else: - [src, dst] = datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) - if input_graph.edgelist.weights: - [weights] = datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) - add_edge_list(graph, src, dst, weights) - else: - add_edge_list(graph, src, dst) - c_graph.add_adj_list(g) - offsets, indices, values = get_adj_list(graph) - input_graph.adjlist = input_graph.AdjList(offsets, indices, values) - - cdef gdf_column c_first_col - cdef gdf_column c_second_col - c_graph.get_two_hop_neighbors(g, &c_first_col, &c_second_col) - - df = cudf.DataFrame() - if c_first_col.dtype == GDF_INT32: - first_out = rmm.device_array_from_ptr(c_first_col.data, - nelem=c_first_col.size, - dtype=np.int32) - second_out = rmm.device_array_from_ptr(c_second_col.data, - nelem=c_second_col.size, - dtype=np.int32) - df['first'] = first_out - df['second'] = second_out - if c_first_col.dtype == GDF_INT64: - first_out = rmm.device_array_from_ptr(c_first_col.data, - nelem=c_first_col.size, - dtype=np.int64) - second_out = rmm.device_array_from_ptr(c_second_col.data, - nelem=c_second_col.size, - dtype=np.int64) - df['first'] = first_out - df['second'] = second_out - - return df - def number_of_vertices(input_graph): cdef uintptr_t graph = allocate_cpp_graph() cdef Graph * g = graph From 2526841ade9ba62576c30c2c4ed2edb00e1ee3ab Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Tue, 24 Mar 2020 14:47:31 -0400 Subject: [PATCH 04/40] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea99383684e..91cfb822666 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ## Improvements - PR #765 Remove gdf_column from connected components +- PR #780 Remove gdf_column from cuhornet features ## Bug Fixes From d1b245721f6b9d0371bf0c4f547e599692a993a0 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 25 Mar 2020 16:47:24 -0400 Subject: [PATCH 05/40] remove gdf_column from k_core and core_number --- cpp/include/algorithms.hpp | 18 +- cpp/include/graph.hpp | 20 +- cpp/src/cores/core_number.cu | 245 +++++++------------ cpp/src/ktruss/ktruss.cu | 10 +- python/cugraph/cores/core_number.pxd | 13 +- python/cugraph/cores/core_number.py | 5 +- python/cugraph/cores/core_number_wrapper.pyx | 49 ++-- python/cugraph/cores/k_core.pxd | 16 +- python/cugraph/cores/k_core.py | 34 ++- python/cugraph/cores/k_core_wrapper.pyx | 132 +++++----- python/cugraph/structure/graph_new.pxd | 8 +- 11 files changed, 258 insertions(+), 292 deletions(-) diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index bd630856a3c..867d1af5219 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -317,17 +317,19 @@ void core_number(experimental::GraphCSR const &graph, VT *core_numbe * @tparam ET Type of edge identifiers. Supported value : int (signed, 32-bit) * @tparam WT Type of edge weights. Supported values : float or double. * - * @param[in] graph cuGRAPH graph descriptor with a valid edgeList or adjList - * @param[in] k Order of the core. This value must not be negative. - * @param[in] vertex_id User specified vertex identifiers for which core number values are supplied - * @param[in] core_number User supplied core number values corresponding to vertex_id - * @param[out] out_graph K Core subgraph + * @param[in] graph cuGRAPH graph descriptor with a valid edgeList or adjList + * @param[in] k Order of the core. This value must not be negative. + * @param[in] vertex_id User specified vertex identifiers for which core number values are supplied + * @param[in] core_number User supplied core number values corresponding to vertex_id + * @param[in] num_vertex_ids Number of elements in vertex_id/core_number arrays + * @param[out] out_graph K Core subgraph */ template -void k_core(experimental::GraphCSR const &graph, +void k_core(experimental::GraphCOO const &graph, int k, - VT *vertex_id, - VT *core_number, + VT const *vertex_id, + VT const *core_number, + VT num_vertex_ids, experimental::GraphCOO &out_graph); /** diff --git a/cpp/include/graph.hpp b/cpp/include/graph.hpp index bdc04d50e25..5c9e990b091 100644 --- a/cpp/include/graph.hpp +++ b/cpp/include/graph.hpp @@ -40,7 +40,7 @@ struct GraphProperties { template class GraphBase { public: - WT const *edge_data; ///< edge weight + WT *edge_data; ///< edge weight GraphProperties prop; @@ -54,7 +54,7 @@ class GraphBase { */ void get_vertex_identifiers(VT *identifiers) const; - GraphBase(WT const *edge_data_, VT number_of_vertices_, ET number_of_edges_): + GraphBase(WT *edge_data_, VT number_of_vertices_, ET number_of_edges_): edge_data(edge_data_), prop(), number_of_vertices(number_of_vertices_), @@ -72,8 +72,8 @@ class GraphBase { template class GraphCOO: public GraphBase { public: - VT const *src_indices{nullptr}; ///< rowInd - VT const *dst_indices{nullptr}; ///< colInd + VT *src_indices{nullptr}; ///< rowInd + VT *dst_indices{nullptr}; ///< colInd /** * @brief Computes degree(in, out, in+out) of all the nodes of a Graph @@ -109,7 +109,7 @@ class GraphCOO: public GraphBase { * @param number_of_vertices The number of vertices in the graph * @param number_of_edges The number of edges in the graph */ - GraphCOO(VT const *src_indices_, VT const *dst_indices_, WT const *edge_data_, + GraphCOO(VT *src_indices_, VT *dst_indices_, WT *edge_data_, VT number_of_vertices_, ET number_of_edges_): GraphBase(edge_data_, number_of_vertices_, number_of_edges_), src_indices(src_indices_), dst_indices(dst_indices_) @@ -126,8 +126,8 @@ class GraphCOO: public GraphBase { template class GraphCompressedSparseBase: public GraphBase { public: - ET const *offsets{nullptr}; ///< CSR offsets - VT const *indices{nullptr}; ///< CSR indices + ET *offsets{nullptr}; ///< CSR offsets + VT *indices{nullptr}; ///< CSR indices /** * @brief Fill the identifiers in the array with the source vertex identifiers @@ -164,7 +164,7 @@ class GraphCompressedSparseBase: public GraphBase { * @param number_of_vertices The number of vertices in the graph * @param number_of_edges The number of edges in the graph */ - GraphCompressedSparseBase(ET const *offsets_, VT const *indices_, WT const *edge_data_, + GraphCompressedSparseBase(ET *offsets_, VT *indices_, WT *edge_data_, VT number_of_vertices_, ET number_of_edges_): GraphBase(edge_data_, number_of_vertices_, number_of_edges_), offsets{offsets_}, @@ -201,7 +201,7 @@ class GraphCSR: public GraphCompressedSparseBase { * @param number_of_vertices The number of vertices in the graph * @param number_of_edges The number of edges in the graph */ - GraphCSR(ET const *offsets_, VT const *indices_, WT const *edge_data_, + GraphCSR(ET *offsets_, VT *indices_, WT *edge_data_, VT number_of_vertices_, ET number_of_edges_): GraphCompressedSparseBase(offsets_, indices_, edge_data_, number_of_vertices_, number_of_edges_) {} @@ -236,7 +236,7 @@ class GraphCSC: public GraphCompressedSparseBase { * @param number_of_vertices The number of vertices in the graph * @param number_of_edges The number of edges in the graph */ - GraphCSC(ET const *offsets_, VT const *indices_, WT const *edge_data_, + GraphCSC(ET *offsets_, VT *indices_, WT *edge_data_, VT number_of_vertices_, ET number_of_edges_): GraphCompressedSparseBase(offsets_, indices_, edge_data_, number_of_vertices_, number_of_edges_) {} diff --git a/cpp/src/cores/core_number.cu b/cpp/src/cores/core_number.cu index 1281ffe55ff..478eba6a234 100644 --- a/cpp/src/cores/core_number.cu +++ b/cpp/src/cores/core_number.cu @@ -14,43 +14,38 @@ * limitations under the License. */ -/** - * ---------------------------------------------------------------------------* - * @brief Core Number implementation - * - * @file core_number.cu - * --------------------------------------------------------------------------*/ - -#include +#include #include "utilities/error_utils.h" #include #include #include -#include +//#include namespace cugraph { namespace detail { -void core_number_impl(Graph *graph, - int *core_number) { +template +void core_number(experimental::GraphCSR const &graph, + int *core_number) { + using HornetGraph = hornet::gpu::HornetStatic; - using HornetInit = hornet::HornetInit; + using HornetInit = hornet::HornetInit; using CoreNumber = hornets_nest::CoreNumberStatic; - HornetInit init(graph->numberOfVertices, graph->adjList->indices->size, - static_cast(graph->adjList->offsets->data), - static_cast(graph->adjList->indices->data)); + HornetInit init(graph.number_of_vertices, + graph.number_of_edges, + graph.offsets, + graph.indices); HornetGraph hnt(init, hornet::DeviceType::DEVICE); CoreNumber cn(hnt, core_number); cn.run(); - } struct FilterEdges { int k; int* core_number; - FilterEdges(int _k, thrust::device_ptr core_num) : - k(_k), core_number(core_num.get()) {} + FilterEdges(int _k, int *d_core_num) : + k(_k), core_number(d_core_num) {} template __host__ __device__ @@ -61,172 +56,118 @@ struct FilterEdges { } }; -template -void extract_edges( - Graph *i_graph, - Graph *o_graph, - thrust::device_ptr c_ptr, - int k, - int filteredEdgeCount) { - cudaStream_t stream{nullptr}; - - //Allocate output columns - o_graph->edgeList = new gdf_edge_list; - o_graph->edgeList->src_indices = new gdf_column; - o_graph->edgeList->dest_indices = new gdf_column; - o_graph->edgeList->ownership = 2; +template +void extract_edges(experimental::GraphCOO const &i_graph, + experimental::GraphCOO &o_graph, + VT *d_core, + int k, + ET filteredEdgeCount) { - bool hasData = (i_graph->edgeList->edge_data != nullptr); - - //Allocate underlying memory for output columns - int *o_src, *o_dst, *o_wgt; - ALLOC_TRY((void**)&o_src, sizeof(int) * filteredEdgeCount, stream); - ALLOC_TRY((void**)&o_dst, sizeof(int) * filteredEdgeCount, stream); - - int *i_src = static_cast(i_graph->edgeList->src_indices->data); - int *i_dst = static_cast(i_graph->edgeList->dest_indices->data); - WT *i_wgt = nullptr; + cudaStream_t stream{nullptr}; - gdf_column_view(o_graph->edgeList->src_indices, o_src, - nullptr, filteredEdgeCount, GDF_INT32); - gdf_column_view(o_graph->edgeList->dest_indices, o_dst, - nullptr, filteredEdgeCount, GDF_INT32); + ALLOC_TRY(&o_graph.src_indices, sizeof(VT) * filteredEdgeCount, stream); + ALLOC_TRY(&o_graph.dst_indices, sizeof(VT) * filteredEdgeCount, stream); + o_graph.edge_data = nullptr; - //Set pointers and allocate memory/columns in case input graph has edge_data - if (hasData) { - o_graph->edgeList->edge_data = new gdf_column; - ALLOC_TRY((void**)&o_wgt, sizeof(WT) * filteredEdgeCount, stream); - i_wgt = static_cast(i_graph->edgeList->edge_data->data); - gdf_column_view(o_graph->edgeList->edge_data, o_wgt, - nullptr, filteredEdgeCount, i_graph->edgeList->edge_data->dtype); - } + bool hasData = (i_graph.edge_data != nullptr); - gdf_size_type nE = i_graph->edgeList->src_indices->size; //If an edge satisfies k-core conditions i.e. core_num[src] and core_num[dst] //are both greater than or equal to k, copy it to the output graph if (hasData) { - auto inEdge = thrust::make_zip_iterator(thrust::make_tuple( - thrust::device_pointer_cast(i_src), - thrust::device_pointer_cast(i_dst), - thrust::device_pointer_cast(i_wgt))); - auto outEdge = thrust::make_zip_iterator(thrust::make_tuple( - thrust::device_pointer_cast(o_src), - thrust::device_pointer_cast(o_dst), - thrust::device_pointer_cast(o_wgt))); + ALLOC_TRY(&o_graph.edge_data, sizeof(WT) * filteredEdgeCount, stream); + + auto inEdge = thrust::make_zip_iterator(thrust::make_tuple(i_graph.src_indices, + i_graph.dst_indices, + i_graph.edge_data)); + auto outEdge = thrust::make_zip_iterator(thrust::make_tuple(o_graph.src_indices, + o_graph.dst_indices, + o_graph.edge_data)); auto ptr = thrust::copy_if(rmm::exec_policy(stream)->on(stream), - inEdge, inEdge + nE, - outEdge, - FilterEdges(k, c_ptr)); - if ((ptr - outEdge) != filteredEdgeCount) { CUGRAPH_FAIL("Edge extraction failed"); } + inEdge, inEdge + i_graph.number_of_edges, + outEdge, + FilterEdges(k, d_core)); + if (thrust::distance(outEdge, ptr) != filteredEdgeCount) { CUGRAPH_FAIL("Edge extraction failed"); } } else { - auto inEdge = thrust::make_zip_iterator(thrust::make_tuple( - thrust::device_pointer_cast(i_src), - thrust::device_pointer_cast(i_dst))); - auto outEdge = thrust::make_zip_iterator(thrust::make_tuple( - thrust::device_pointer_cast(o_src), - thrust::device_pointer_cast(o_dst))); + auto inEdge = thrust::make_zip_iterator(thrust::make_tuple(i_graph.src_indices, + i_graph.dst_indices)); + auto outEdge = thrust::make_zip_iterator(thrust::make_tuple(o_graph.src_indices, + o_graph.dst_indices)); auto ptr = thrust::copy_if(rmm::exec_policy(stream)->on(stream), - inEdge, inEdge + nE, - outEdge, - FilterEdges(k, c_ptr)); - if ((ptr - outEdge) != filteredEdgeCount) { CUGRAPH_FAIL("Edge extraction failed"); } + inEdge, inEdge + i_graph.number_of_edges, + outEdge, + FilterEdges(k, d_core)); + if (thrust::distance(outEdge, ptr) != filteredEdgeCount) { CUGRAPH_FAIL("Edge extraction failed"); } } - } -} //namespace - //Extract a subgraph from in_graph (with or without weights) //to out_graph based on whether edges in in_graph satisfy kcore //conditions. //i.e. All edges (s,d,w) in in_graph are copied over to out_graph //if core_num[s] and core_num[d] are greater than or equal to k. -void extract_subgraph(Graph *in_graph, - Graph *out_graph, - int * vid, - int * core_num, - int k, - gdf_size_type len, - gdf_size_type nV) { +template +void extract_subgraph(experimental::GraphCOO const &in_graph, + experimental::GraphCOO &out_graph, + int const *vid, + int const *core_num, + int k, + int len, + int num_verts) { + cudaStream_t stream{nullptr}; - rmm::device_vector c; - thrust::device_ptr c_ptr = thrust::device_pointer_cast(core_num); - //We cannot assume that the user provided core numbers per vertex will be in - //order. Therefore, they need to be reordered by the vertex ids in a temporary - //array. - c.resize(nV, 0); - thrust::device_ptr v_ptr = thrust::device_pointer_cast(vid); + rmm::device_vector sorted_core_num(num_verts); + thrust::scatter(rmm::exec_policy(stream)->on(stream), - c_ptr, c_ptr + len, - v_ptr, c.begin()); - c_ptr = thrust::device_pointer_cast(c.data().get()); + core_num, core_num + len, + vid, sorted_core_num.begin()); - cugraph::add_edge_list(in_graph); - thrust::device_ptr src = - thrust::device_pointer_cast(static_cast(in_graph->edgeList->src_indices->data)); - thrust::device_ptr dst = - thrust::device_pointer_cast(static_cast(in_graph->edgeList->dest_indices->data)); + VT *d_sorted_core_num = sorted_core_num.data().get(); //Count number of edges in the input graph that satisfy kcore conditions //i.e. core_num[src] and core_num[dst] are both greater than or equal to k - gdf_size_type nE = in_graph->edgeList->src_indices->size; - auto edge = thrust::make_zip_iterator(thrust::make_tuple(src, dst)); - int filteredEdgeCount = thrust::count_if(rmm::exec_policy(stream)->on(stream), - edge, edge + nE, detail::FilterEdges(k, c_ptr)); - - //Extract the relevant edges that have satisfied k-core conditions and put them in the output graph - if (in_graph->edgeList->edge_data != nullptr) { - switch (in_graph->edgeList->edge_data->dtype) { - case GDF_FLOAT32: return detail::extract_edges (in_graph, out_graph, c_ptr, k, filteredEdgeCount); - case GDF_FLOAT64: return detail::extract_edges(in_graph, out_graph, c_ptr, k, filteredEdgeCount); - default: CUGRAPH_FAIL("Unsupported data type: edge data needs to be float32 or float64"); - } - } - else { - return detail::extract_edges (in_graph, out_graph, c_ptr, k, filteredEdgeCount); - } + auto edge = thrust::make_zip_iterator(thrust::make_tuple(in_graph.src_indices, + in_graph.dst_indices)); + + out_graph.number_of_vertices = in_graph.number_of_vertices; + + out_graph.number_of_edges = thrust::count_if(rmm::exec_policy(stream)->on(stream), + edge, edge + in_graph.number_of_edges, + detail::FilterEdges(k, d_sorted_core_num)); + + return extract_edges(in_graph, out_graph, d_sorted_core_num, k, out_graph.number_of_edges); } -void core_number(Graph *graph, - gdf_column *core_number) { +} //namespace detail - CHECK_GRAPH(graph) - CUGRAPH_EXPECTS(graph->adjList->offsets->dtype == GDF_INT32, "Unsupported data type: graph needs to be int32"); - CUGRAPH_EXPECTS(graph->adjList->indices->dtype == GDF_INT32, "Unsupported data type: graph needs to be int32"); - CUGRAPH_EXPECTS(core_number->dtype == GDF_INT32, "Unsupported data type: core number needs to be int32"); - CUGRAPH_EXPECTS(core_number->size == graph->numberOfVertices, "Column size mismatch"); - return detail::core_number_impl(graph, static_cast(core_number->data)); +template +void core_number(experimental::GraphCSR const &graph, VT *core_number) { + return detail::core_number(graph, core_number); } -void k_core(Graph *in_graph, - int k, - gdf_column *vertex_id, - gdf_column *core_number, - Graph *out_graph) { - - CUGRAPH_EXPECTS(out_graph != nullptr, "Invalid API parameter: out_graph is NULL"); - CUGRAPH_EXPECTS(in_graph != nullptr, "Invalid API parameter: in_graph is NULL"); - gdf_size_type nV = in_graph->numberOfVertices; - - CUGRAPH_EXPECTS(in_graph->adjList->offsets->dtype == GDF_INT32, "Unsupported data type: graph needs to be int32"); - CUGRAPH_EXPECTS(in_graph->adjList->indices->dtype == GDF_INT32, "Unsupported data type: graph needs to be int32"); - CUGRAPH_EXPECTS((vertex_id != nullptr) && (core_number != nullptr), "Invalid API parameter"); - CUGRAPH_EXPECTS(vertex_id->dtype == GDF_INT32, "Unsupported data type"); - CUGRAPH_EXPECTS(core_number->dtype == GDF_INT32, "Unsupported data type"); - CUGRAPH_EXPECTS(core_number->size == vertex_id->size, "Invalid API parameter"); - CUGRAPH_EXPECTS(core_number->size == nV, "Invalid API parameter"); - CUGRAPH_EXPECTS(k >= 0, "Invalid API parameter"); - - int * vertex_identifier_ptr = static_cast(vertex_id->data); - int * core_number_ptr = static_cast(core_number->data); - gdf_size_type vLen = vertex_id->size; - - extract_subgraph(in_graph, out_graph, - vertex_identifier_ptr, core_number_ptr, - k, vLen, nV); +template +void k_core(experimental::GraphCOO const &in_graph, + int k, + VT const *vertex_id, + VT const *core_number, + VT num_vertex_ids, + experimental::GraphCOO &out_graph) { + + CUGRAPH_EXPECTS(vertex_id != nullptr, "Invalid API parameter: vertex_id is NULL"); + CUGRAPH_EXPECTS(core_number != nullptr, "Invalid API parameter: core_number is NULL"); + CUGRAPH_EXPECTS(k >= 0, "Invalid API parameter: k must be >= 0"); + + detail::extract_subgraph(in_graph, out_graph, + vertex_id, core_number, + k, num_vertex_ids, in_graph.number_of_vertices); } +template void core_number(experimental::GraphCSR const &, int32_t *core_number); +template void k_core(experimental::GraphCOO const &, int, int32_t const *, + int32_t const *, int32_t, experimental::GraphCOO &); +template void k_core(experimental::GraphCOO const &, int, int32_t const *, + int32_t const *, int32_t, experimental::GraphCOO &); + } //namespace cugraph diff --git a/cpp/src/ktruss/ktruss.cu b/cpp/src/ktruss/ktruss.cu index a0cf4449514..3d0bdf1c72a 100644 --- a/cpp/src/ktruss/ktruss.cu +++ b/cpp/src/ktruss/ktruss.cu @@ -82,9 +82,8 @@ void ktruss_subgraph_impl(experimental::GraphCOO const &graph, kt.copyGraph(out_src, out_dst); - experimental::GraphCOO subgraph( - const_cast(out_src), const_cast(out_dst), - nullptr, graph.number_of_vertices, subgraph_edge_count); + experimental::GraphCOO subgraph(out_src, out_dst, nullptr, + graph.number_of_vertices, subgraph_edge_count); output_graph = subgraph; output_graph.prop.directed = true; @@ -140,9 +139,8 @@ void weighted_ktruss_subgraph_impl(experimental::GraphCOO const &gra kt.copyGraph(out_src, out_dst, out_wgt); - experimental::GraphCOO subgraph( - const_cast(out_src), const_cast(out_dst), - const_cast(out_wgt), graph.number_of_vertices, subgraph_edge_count); + experimental::GraphCOO subgraph(out_src, out_dst, out_wgt, + graph.number_of_vertices, subgraph_edge_count); output_graph = subgraph; output_graph.prop.directed = true; diff --git a/python/cugraph/cores/core_number.pxd b/python/cugraph/cores/core_number.pxd index 02aa1be098d..e443aa2a4c1 100644 --- a/python/cugraph/cores/core_number.pxd +++ b/python/cugraph/cores/core_number.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,12 +16,11 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph cimport * +from cugraph.structure.graph_new cimport * +cdef extern from "algorithms.hpp" namespace "cugraph": -cdef extern from "cugraph.h" namespace "cugraph": - - cdef void core_number( - Graph *graph, - gdf_column *core_number) except + + cdef void core_number[VT,ET,WT]( + const GraphCSR[VT,ET,WT] &graph, + VT *core_number) except + diff --git a/python/cugraph/cores/core_number.py b/python/cugraph/cores/core_number.py index f6ae00296d5..b996d022f55 100644 --- a/python/cugraph/cores/core_number.py +++ b/python/cugraph/cores/core_number.py @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.cores import core_number_wrapper - +from cugraph.utilities.unrenumber import unrenumber def core_number(G): """ @@ -54,4 +54,7 @@ def core_number(G): df = core_number_wrapper.core_number(G) + if G.renumbered: + df = unrenumber(G.edgelist.renumber_map, df, 'vertex') + return df diff --git a/python/cugraph/cores/core_number_wrapper.pyx b/python/cugraph/cores/core_number_wrapper.pyx index 88519d302d8..a8bc8ba600c 100644 --- a/python/cugraph/cores/core_number_wrapper.pyx +++ b/python/cugraph/cores/core_number_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,14 +17,10 @@ # cython: language_level = 3 cimport cugraph.cores.core_number as c_core -from cugraph.structure.graph cimport * -from cugraph.structure import graph_wrapper +from cugraph.structure.graph_new cimport * +from cugraph.structure import graph_new_wrapper from cugraph.utilities.column_utils cimport * -from cugraph.utilities.unrenumber import unrenumber -from libcpp cimport bool from libc.stdint cimport uintptr_t -from libc.stdlib cimport calloc, malloc, free -from libc.float cimport FLT_MAX_EXP import cudf import cudf._lib as libcudf @@ -36,39 +32,26 @@ def core_number(input_graph): """ Call core_number """ - cdef uintptr_t graph = graph_wrapper.allocate_cpp_graph() - cdef Graph * g = graph + if not input_graph.adjlist: + input_graph.view_adj_list() - if input_graph.adjlist: - [offsets, indices] = graph_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) - [weights] = graph_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) - graph_wrapper.add_adj_list(graph, offsets, indices, weights) - else: - [src, dst] = graph_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) - if input_graph.edgelist.weights: - [weights] = graph_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) - graph_wrapper.add_edge_list(graph, src, dst, weights) - else: - graph_wrapper.add_edge_list(graph, src, dst) - add_adj_list(g) - offsets, indices, values = graph_wrapper.get_adj_list(graph) - input_graph.adjlist = input_graph.AdjList(offsets, indices, values) + [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) - # we should add get_number_of_vertices() to Graph (and this should be - # used instead of g.adjList.offsets.size - 1) - num_verts = g.adjList.offsets.size - 1 + num_verts = input_graph.number_of_vertices() + num_edges = len(indices) df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex']) df['core_number'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - cdef gdf_column c_core_number_col = get_gdf_column_view(df['core_number']) - g.adjList.get_vertex_identifiers(&c_identifier_col) - - c_core.core_number(g, &c_core_number_col) + cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] + cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] + cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; + cdef uintptr_t c_core_number = df['core_number'].__cuda_array_interface__['data'][0]; - if input_graph.renumbered: - df = unrenumber(input_graph.edgelist.renumber_map, df, 'vertex') + cdef GraphCSR[int,int,float] graph = GraphCSR[int,int,float](c_offsets, c_indices, NULL, num_verts, num_edges) + + graph.get_vertex_identifiers(c_identifier) + c_core.core_number(graph, c_core_number) return df diff --git a/python/cugraph/cores/k_core.pxd b/python/cugraph/cores/k_core.pxd index 1bb1e40b3ff..ac15bd92079 100644 --- a/python/cugraph/cores/k_core.pxd +++ b/python/cugraph/cores/k_core.pxd @@ -16,15 +16,15 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph cimport * +from cugraph.structure.graph_new cimport * +cdef extern from "algorithms.hpp" namespace "cugraph": -cdef extern from "cugraph.h" namespace "cugraph": - - cdef void k_core( - Graph *in_graph, + cdef void k_core[VT,ET,WT]( + const GraphCOO[VT,ET,WT] &in_graph, int k, - gdf_column *vertex_id, - gdf_column *core_number, - Graph *out_graph) except + + const VT *vertex_id, + const VT *core_number, + VT num_vertex_ids, + GraphCOO[VT,ET,WT] &out_graph) except + diff --git a/python/cugraph/cores/k_core.py b/python/cugraph/cores/k_core.py index 3353a7569fd..ff9cc41e299 100644 --- a/python/cugraph/cores/k_core.py +++ b/python/cugraph/cores/k_core.py @@ -12,7 +12,8 @@ # limitations under the License. from cugraph.cores import k_core_wrapper, core_number_wrapper -from cugraph.structure.graph import DiGraph +#from cugraph.structure.graph import DiGraph +from cugraph.utilities.unrenumber import unrenumber def k_core(G, @@ -61,17 +62,36 @@ def k_core(G, >>> KCoreGraph = cugraph.k_core(G) """ - KCoreGraph = DiGraph() - if core_number is None: + mytype = type(G) + KCoreGraph = mytype() + + if core_number is not None: + if G.renumbered is True: + renumber_df = cudf.DataFrame() + renumber_df['map'] = G.edgelist.renumber_map + renumber_df['id'] = G.edgelist.renumber_map.index.astype(np.int32) + core_number = core_number.merge(renumber_df, left_on='vertex', right_on='map', how='left').drop('map') + else: core_number = core_number_wrapper.core_number(G) core_number = core_number.rename(columns={"core_number": "values"}) if k is None: k = core_number['values'].max() - k_core_wrapper.k_core(G, - KCoreGraph, - k, - core_number) + k_core_df = k_core_wrapper.k_core(G, k, core_number) + + if G.renumbered: + k_core_df = unrenumber(G.edgelist.renumber_map, k_core_df, 'src') + k_core_df = unrenumber(G.edgelist.renumber_map, k_core_df, 'dst') + + if G.edgelist.weights: + KCoreGraph.from_cudf_edgelist(k_core_df, + source='src', + destination='dst', + edge_attr='weight') + else: + KCoreGraph.from_cudf_edgelist(k_core_df, + source='src', + destination='dst') return KCoreGraph diff --git a/python/cugraph/cores/k_core_wrapper.pyx b/python/cugraph/cores/k_core_wrapper.pyx index 31f3ba753fb..1d2426eb8d2 100644 --- a/python/cugraph/cores/k_core_wrapper.pyx +++ b/python/cugraph/cores/k_core_wrapper.pyx @@ -16,9 +16,9 @@ # cython: embedsignature = True # cython: language_level = 3 -cimport cugraph.cores.k_core as c_k_core -from cugraph.structure.graph cimport * -from cugraph.structure import graph_wrapper +from cugraph.cores.k_core cimport k_core as c_k_core +from cugraph.structure.graph_new cimport * +from cugraph.structure import graph_new_wrapper from cugraph.utilities.column_utils cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t @@ -31,61 +31,81 @@ import rmm import numpy as np -def k_core(input_graph, k_core_graph, k, core_number): +#### FIXME: Should return data frame instead of passing in k_core_graph... +#### Ripple down through implementation (algorithms.hpp, core_number.cu) + +def k_core(input_graph, k, core_number): """ Call k_core """ - cdef uintptr_t graph = graph_wrapper.allocate_cpp_graph() - cdef Graph * g = graph + if not input_graph.edgelist: + input_graph.view_edge_list() - if input_graph.adjlist: - [offsets, indices] = graph_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) - [weights] = graph_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) - graph_wrapper.add_adj_list(graph, offsets, indices, weights) - else: - [src, dst] = graph_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) - if input_graph.edgelist.weights: - [weights] = graph_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) - graph_wrapper.add_edge_list(graph, src, dst, weights) - else: - graph_wrapper.add_edge_list(graph, src, dst) - add_adj_list(g) - offsets, indices, values = graph_wrapper.get_adj_list(graph) - input_graph.adjlist = input_graph.AdjList(offsets, indices, values) - - cdef uintptr_t rGraph = graph_wrapper.allocate_cpp_graph() - cdef Graph* rg = rGraph - - cdef gdf_column c_vertex - cdef gdf_column c_values - [core_number['vertex'], core_number['values']] = graph_wrapper.datatype_cast([core_number['vertex'], core_number['values']], [np.int32]) - if input_graph.renumbered is True: - renumber_df = cudf.DataFrame() - renumber_df['map'] = input_graph.edgelist.renumber_map - renumber_df['id'] = input_graph.edgelist.renumber_map.index.astype(np.int32) - cn = core_number.merge(renumber_df, left_on='vertex', right_on='map', how='left').drop('map') - c_vertex = get_gdf_column_view(cn['id']) - c_values = get_gdf_column_view(cn['values']) + [src, dst] = graph_new_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) + weights = None + weights_type = np.float32 + + num_verts = input_graph.number_of_vertices() + num_edges = len(src) + + [core_number['vertex'], core_number['values']] = graph_new_wrapper.datatype_cast([core_number['vertex'], core_number['values']], [np.int32]) + + cdef uintptr_t c_src = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_vertex = core_number['vertex'].__cuda_array_interface__['data'][0] + cdef uintptr_t c_values = core_number['values'].__cuda_array_interface__['data'][0] + cdef uintptr_t c_weights = NULL + + if input_graph.edgelist.weights: + [weights] = graph_new_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) + weight_type = weights.dtype + c_weights = weights.__cuda_array_interface__['data'][0] + + cdef GraphCOO[int,int,float] in_graph_float + cdef GraphCOO[int,int,float] out_graph_float + cdef GraphCOO[int,int,double] in_graph_double + cdef GraphCOO[int,int,double] out_graph_double + + df = cudf.DataFrame() + + if weights_type == np.float32: + in_graph_float = GraphCOO[int,int,float](c_src, c_dst, c_weights, num_verts, num_edges) + c_k_core[int,int,float](in_graph_float, k, c_vertex, c_values, len(core_number), out_graph_float) + + tmp = rmm.device_array_from_ptr(out_graph_float.src_indices, + nelem=out_graph_float.number_of_edges, + dtype=np.int32) + df['src'] = cudf.Series(tmp) + + tmp = rmm.device_array_from_ptr(out_graph_float.dst_indices, + nelem=out_graph_float.number_of_edges, + dtype=np.int32) + df['dst'] = cudf.Series(tmp) + + if weights is not None: + tmp = rmm.device_array_from_ptr(out_graph_float.edge_data, + nelem=out_graph_float.number_of_edges, + dtype=np.int32) + df['weight'] = tmp else: - c_vertex = get_gdf_column_view(core_number['vertex']) - c_values = get_gdf_column_view(core_number['values']) - c_k_core.k_core(g, k, &c_vertex, &c_values, rg) - - if rg.edgeList is not NULL: - df = cudf.DataFrame() - df['src'], df['dst'], vals = graph_wrapper.get_edge_list(rGraph) - if vals is not None: - df['val'] = vals - k_core_graph.from_cudf_edgelist(df, source='src', destination='dst', edge_attr='val', renumber=False) - else: - k_core_graph.from_cudf_edgelist(df, source='src', destination='dst', renumber=False) - if input_graph.edgelist is not None: - k_core_graph.renumbered = input_graph.renumbered - k_core_graph.edgelist.renumber_map = input_graph.edgelist.renumber_map - - if rg.adjList is not NULL: - off, ind, vals = graph_wrapper.get_adj_list(rGraph) - k_core_graph.from_cudf_adjlist(off, ind, vals) - if rg.transposedAdjList is not NULL: - off, ind, vals = graph_wrapper.get_transposed_adj_list(rGraph) - k_core_graph.transposedadjlist = k_core_graph.transposedAdjList(off, ind, vals) + in_graph_double = GraphCOO[int,int,double](c_src, c_dst, c_weights, num_verts, num_edges) + c_k_core[int,int,double](in_graph_double, k, &c_vertex, &c_values, len(core_number), out_graph_double) + + tmp = rmm.device_array_from_ptr(out_graph_double.src_indices, + nelem=out_graph_double.number_of_edges, + dtype=np.int32) + df['src'] = cudf.Series(tmp) + + tmp = rmm.device_array_from_ptr(out_graph_double.dst_indices, + nelem=out_graph_double.number_of_edges, + dtype=np.int32) + df['dst'] = cudf.Series(tmp) + + if weights is not None: + tmp = rmm.device_array_from_ptr(out_graph_double.edge_data, + nelem=out_graph_double.number_of_edges, + dtype=np.int32) + df['weight'] = tmp + + + return df diff --git a/python/cugraph/structure/graph_new.pxd b/python/cugraph/structure/graph_new.pxd index a43d5376799..762e82adaf5 100644 --- a/python/cugraph/structure/graph_new.pxd +++ b/python/cugraph/structure/graph_new.pxd @@ -46,14 +46,14 @@ cdef extern from "graph.hpp" namespace "cugraph::experimental": GraphBase(WT*,VT,ET) cdef cppclass GraphCOO[VT,ET,WT](GraphBase[VT,ET,WT]): - const VT *src_indices - const VT *dst_indices + VT *src_indices + VT *dst_indices GraphCOO() GraphCOO(const VT *, const ET *, const WT *, size_t, size_t) cdef cppclass GraphCompressedSparseBase[VT,ET,WT](GraphBase[VT,ET,WT]): - const VT *offsets - const VT *indices + VT *offsets + VT *indices void get_source_indices(VT *) const From d8460d44e6a70eb91f1659085091cf037babff93 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 25 Mar 2020 16:55:35 -0400 Subject: [PATCH 06/40] fix flake8 format issues --- python/cugraph/cores/core_number.py | 1 + python/cugraph/cores/k_core.py | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/python/cugraph/cores/core_number.py b/python/cugraph/cores/core_number.py index b996d022f55..0adb652c237 100644 --- a/python/cugraph/cores/core_number.py +++ b/python/cugraph/cores/core_number.py @@ -14,6 +14,7 @@ from cugraph.cores import core_number_wrapper from cugraph.utilities.unrenumber import unrenumber + def core_number(G): """ Compute the core numbers for the nodes of the graph G. A k-core of a graph diff --git a/python/cugraph/cores/k_core.py b/python/cugraph/cores/k_core.py index ff9cc41e299..1e7bd97e177 100644 --- a/python/cugraph/cores/k_core.py +++ b/python/cugraph/cores/k_core.py @@ -12,9 +12,11 @@ # limitations under the License. from cugraph.cores import k_core_wrapper, core_number_wrapper -#from cugraph.structure.graph import DiGraph from cugraph.utilities.unrenumber import unrenumber +import cudf +import numpy as np + def k_core(G, k=None, @@ -70,7 +72,10 @@ def k_core(G, renumber_df = cudf.DataFrame() renumber_df['map'] = G.edgelist.renumber_map renumber_df['id'] = G.edgelist.renumber_map.index.astype(np.int32) - core_number = core_number.merge(renumber_df, left_on='vertex', right_on='map', how='left').drop('map') + core_number = core_number.merge(renumber_df, + left_on='vertex', + right_on='map', + how='left').drop('map') else: core_number = core_number_wrapper.core_number(G) core_number = core_number.rename(columns={"core_number": "values"}) From 6093541d54b55638b69e7d4182a3dcc51e78a21d Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Fri, 27 Mar 2020 10:31:01 -0400 Subject: [PATCH 07/40] fix syntax error --- python/cugraph/structure/graph_new_wrapper.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/structure/graph_new_wrapper.pyx b/python/cugraph/structure/graph_new_wrapper.pyx index 1c73f8ba868..bb737977273 100644 --- a/python/cugraph/structure/graph_new_wrapper.pyx +++ b/python/cugraph/structure/graph_new_wrapper.pyx @@ -92,7 +92,7 @@ def _degree(input_graph, x=0): if input_graph.transposedadjlist is not None: return _degree_csr(input_graph.transposedadjlist.offsets, input_graph.transposedadjlist.indices, - transpose_x(x)) + transpose_x[x]) if input_graph.edgelist is not None: return _degree_coo(input_graph.edgelist.edgelist_df['src'], From 1aef3425614460c8855a2fd80ae22d00341e1238 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Tue, 31 Mar 2020 14:32:42 -0400 Subject: [PATCH 08/40] Address PR comments 1. Make C++ parameter for direction into a class enum 2. Update template parameters for other two-hop neighbor kernels 3. Fixed a few template parameter errors from review --- cpp/include/algorithms.hpp | 15 ++-- cpp/include/graph.hpp | 16 +++-- cpp/src/structure/graph.cu | 24 ++----- cpp/src/traversal/two_hop_neighbors.cu | 4 +- cpp/src/traversal/two_hop_neighbors.cuh | 69 ++++++++++--------- cpp/tests/centrality/katz_centrality_test.cu | 2 +- python/cugraph/structure/graph_new.pxd | 11 ++- .../cugraph/structure/graph_new_wrapper.pyx | 38 ++++++++-- 8 files changed, 105 insertions(+), 74 deletions(-) diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 867d1af5219..02f5db3a887 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -221,13 +221,14 @@ enum class cugraph_cc_t { * * @throws cugraph::logic_error when an error occurs. * - * @tparam VT Type of vertex identifiers. Supported value : int (signed, 32-bit) - * @tparam ET Type of edge identifiers. Supported value : int (signed, 32-bit) - * @tparam WT Type of edge weights. Supported values : float or double. - * - * @param[in] graph cuGRAPH graph descriptor, should contain the connectivity information as a CSR - * @param[out] labels Device array of component labels (labels[i] indicates the label associated with - * vertex id i. + * @tparam VT Type of vertex identifiers. Supported value : int (signed, 32-bit) + * @tparam ET Type of edge identifiers. Supported value : int (signed, 32-bit) + * @tparam WT Type of edge weights. Supported values : float or double. + * + * @param[in] graph cuGRAPH graph descriptor, should contain the connectivity information as a CSR + * @param[in] connectivity_type STRONG or WEAK + * @param[out] labels Device array of component labels (labels[i] indicates the label associated with + * vertex id i. */ template void connected_components(experimental::GraphCSR const &graph, diff --git a/cpp/include/graph.hpp b/cpp/include/graph.hpp index 5c9e990b091..8b7a163239e 100644 --- a/cpp/include/graph.hpp +++ b/cpp/include/graph.hpp @@ -30,6 +30,13 @@ struct GraphProperties { GraphProperties() = default; }; +enum class DegreeDirection { + IN_PLUS_OUT = 0, ///> Compute sum of in and out degree + IN, ///> Compute in degree + OUT, ///> Compute out degree + DEGREE_DIRECTION_COUNT +}; + /** * @brief Base class graphs, all but vertices and edges * @@ -82,12 +89,9 @@ class GraphCOO: public GraphBase { * * @param[out] degree Device array of size V (V is number of vertices) initialized to zeros. * Will contain the computed degree of every vertex. - * @param[in] x Integer value indicating type of degree calculation - * 0 : in+out degree - * 1 : in-degree - * 2 : out-degree + * @param[in] direction IN_PLUS_OUT, IN or OUT */ - void degree(ET *degree, int x) const; + void degree(ET *degree, DegreeDirection direction) const; /** * @brief Default constructor @@ -148,7 +152,7 @@ class GraphCompressedSparseBase: public GraphBase { * 1 : in-degree * 2 : out-degree */ - void degree(ET *degree, int x) const; + void degree(ET *degree, DegreeDirection direction) const; /** * @brief Wrap existing arrays representing adjacency lists in a Graph. diff --git a/cpp/src/structure/graph.cu b/cpp/src/structure/graph.cu index 0a8eb62150a..98f24b4aee0 100644 --- a/cpp/src/structure/graph.cu +++ b/cpp/src/structure/graph.cu @@ -62,51 +62,39 @@ void GraphCompressedSparseBase::get_source_indices(VT *src_indices) co } template -void GraphCOO::degree(ET *degree, int x) const { +void GraphCOO::degree(ET *degree, DegreeDirection direction) const { // // NOTE: We assume offsets/indices are a CSR. If a CSC is passed // in then x should be modified to reflect the expected direction. // (e.g. if you have a CSC and you want in-degree (x=1) then pass // the offsets/indices and request an out-degree (x=2)) // - - // Calculates the degree of all vertices of the graph - // x = 0: in+out degree - // x = 1: in-degree - // x = 2: out-degree - cudaStream_t stream{nullptr}; - if (x != 1) { + if (direction != DegreeDirection::IN) { degree_from_vertex_ids(GraphBase::number_of_edges, src_indices, degree, stream); } - if (x != 2) { + if (direction != DegreeDirection::OUT) { degree_from_vertex_ids(GraphBase::number_of_edges, dst_indices, degree, stream); } } template -void GraphCompressedSparseBase::degree(ET *degree, int x) const { +void GraphCompressedSparseBase::degree(ET *degree, DegreeDirection direction) const { // // NOTE: We assume offsets/indices are a CSR. If a CSC is passed // in then x should be modified to reflect the expected direction. // (e.g. if you have a CSC and you want in-degree (x=1) then pass // the offsets/indices and request an out-degree (x=2)) // - - // Calculates the degree of all vertices of the graph - // x = 0: in+out degree - // x = 1: in-degree - // x = 2: out-degree - cudaStream_t stream{nullptr}; - if (x != 1) { + if (direction != DegreeDirection::IN) { degree_from_offsets(GraphBase::number_of_vertices, offsets, degree, stream); } - if (x != 2) { + if (direction != DegreeDirection::OUT) { degree_from_vertex_ids(GraphBase::number_of_edges, indices, degree, stream); } } diff --git a/cpp/src/traversal/two_hop_neighbors.cu b/cpp/src/traversal/two_hop_neighbors.cu index 9e7628331ed..cb9109c90f3 100644 --- a/cpp/src/traversal/two_hop_neighbors.cu +++ b/cpp/src/traversal/two_hop_neighbors.cu @@ -107,7 +107,7 @@ ET get_two_hop_neighbors(experimental::GraphCSR const &graph, tuple_start, tuple_end, tuple_start, - self_loop_flagger()); + self_loop_flagger()); tuple_end = thrust::unique(rmm::exec_policy(stream)->on(stream), tuple_start, tuple_end); // Get things ready to return @@ -123,4 +123,6 @@ ET get_two_hop_neighbors(experimental::GraphCSR const &graph, template int get_two_hop_neighbors(experimental::GraphCSR const &, int **, int **); +template int64_t get_two_hop_neighbors(experimental::GraphCSR const &, int32_t **, int32_t **); + } //namespace cugraph diff --git a/cpp/src/traversal/two_hop_neighbors.cuh b/cpp/src/traversal/two_hop_neighbors.cuh index 31b7a7fa885..7009d0a71fc 100644 --- a/cpp/src/traversal/two_hop_neighbors.cuh +++ b/cpp/src/traversal/two_hop_neighbors.cuh @@ -47,28 +47,28 @@ struct deref_functor { } }; -template +template struct self_loop_flagger { __host__ __device__ - bool operator()(const thrust::tuple pair) { + bool operator()(const thrust::tuple pair) { if (thrust::get<0>(pair) == thrust::get<1>(pair)) return false; return true; } }; -template -__device__ IndexType binsearch_maxle(const IndexType *vec, - const IndexType val, - IndexType low, - IndexType high) { +template +__device__ edge_t binsearch_maxle(const edge_t *vec, + const edge_t val, + edge_t low, + edge_t high) { while (true) { if (low == high) return low; //we know it exists if ((low + 1) == high) return (vec[high] <= val) ? high : low; - IndexType mid = low + (high - low) / 2; + edge_t mid = low + (high - low) / 2; if (vec[mid] > val) high = mid - 1; @@ -77,38 +77,39 @@ __device__ IndexType binsearch_maxle(const IndexType *vec, } } -template -__global__ void compute_bucket_offsets_kernel(const IndexType *frontier_degrees_exclusive_sum, - IndexType *bucket_offsets, - const IndexType frontier_size, - IndexType total_degree) { - IndexType end = ((total_degree - 1 + TWO_HOP_BLOCK_SIZE) / TWO_HOP_BLOCK_SIZE); +template +__global__ void compute_bucket_offsets_kernel(const edge_t *frontier_degrees_exclusive_sum, + edge_t *bucket_offsets, + const edge_t frontier_size, + edge_t total_degree) { + edge_t end = ((total_degree - 1 + TWO_HOP_BLOCK_SIZE) / TWO_HOP_BLOCK_SIZE); - for (IndexType bid = blockIdx.x * blockDim.x + threadIdx.x; + for (edge_t bid = blockIdx.x * blockDim.x + threadIdx.x; bid <= end; bid += gridDim.x * blockDim.x) { - IndexType eid = min(bid * TWO_HOP_BLOCK_SIZE, total_degree - 1); + edge_t eid = min(bid * TWO_HOP_BLOCK_SIZE, total_degree - 1); bucket_offsets[bid] = binsearch_maxle(frontier_degrees_exclusive_sum, eid, - (IndexType) 0, + edge_t{0}, frontier_size - 1); } } -template -__global__ void scatter_expand_kernel(const IndexType *exsum_degree, - const IndexType *indices, - const IndexType *offsets, - const IndexType *bucket_offsets, - IndexType num_verts, - IndexType max_item, - IndexType max_block, - IndexType *output_first, - IndexType *output_second) { - __shared__ IndexType blockRange[2]; - for (IndexType bid = blockIdx.x; bid < max_block; bid += gridDim.x) { +template +__global__ void scatter_expand_kernel(const edge_t *exsum_degree, + const vertex_t *indices, + const edge_t *offsets, + const edge_t *bucket_offsets, + vertex_t num_verts, + edge_t max_item, + edge_t max_block, + vertex_t *output_first, + vertex_t *output_second) { + + __shared__ edge_t blockRange[2]; + for (edge_t bid = blockIdx.x; bid < max_block; bid += gridDim.x) { // Copy the start and end of the buckets range into shared memory if (threadIdx.x == 0) { blockRange[0] = bucket_offsets[bid]; @@ -117,13 +118,13 @@ __global__ void scatter_expand_kernel(const IndexType *exsum_degree, __syncthreads(); // Get the global thread id (for this virtual block) - IndexType tid = bid * blockDim.x + threadIdx.x; + edge_t tid = bid * blockDim.x + threadIdx.x; if (tid < max_item) { - IndexType sourceIdx = binsearch_maxle(exsum_degree, tid, blockRange[0], blockRange[1]); - IndexType sourceId = indices[sourceIdx]; - IndexType itemRank = tid - exsum_degree[sourceIdx]; + edge_t sourceIdx = binsearch_maxle(exsum_degree, tid, blockRange[0], blockRange[1]); + vertex_t sourceId = indices[sourceIdx]; + edge_t itemRank = tid - exsum_degree[sourceIdx]; output_second[tid] = indices[offsets[sourceId] + itemRank]; - IndexType baseSourceId = binsearch_maxle(offsets, sourceIdx, (IndexType)0, num_verts); + edge_t baseSourceId = binsearch_maxle(offsets, sourceIdx, edge_t{0}, edge_t{num_verts}); output_first[tid] = baseSourceId; } } diff --git a/cpp/tests/centrality/katz_centrality_test.cu b/cpp/tests/centrality/katz_centrality_test.cu index e076a13e991..5f2e33e7adc 100644 --- a/cpp/tests/centrality/katz_centrality_test.cu +++ b/cpp/tests/centrality/katz_centrality_test.cu @@ -43,7 +43,7 @@ int getMaxDegree(cugraph::experimental::GraphCSR const &g) { rmm::device_vector degree_vector(g.number_of_vertices); ET *p_degree = degree_vector.data().get(); - g.degree(p_degree, 2); + g.degree(p_degree, cugraph::experimental::DegreeDirection::OUT); ET max_out_degree = thrust::reduce(rmm::exec_policy(stream)->on(stream), p_degree, p_degree + g.number_of_vertices, diff --git a/python/cugraph/structure/graph_new.pxd b/python/cugraph/structure/graph_new.pxd index 762e82adaf5..73e5510f737 100644 --- a/python/cugraph/structure/graph_new.pxd +++ b/python/cugraph/structure/graph_new.pxd @@ -25,6 +25,11 @@ cdef extern from "graph.hpp" namespace "cugraph::experimental": PROP_FALSE "cugraph::experimental::PROP_FALSE" PROP_TRUE "cugraph::experimental::PROP_TRUE" + ctypedef enum DegreeDirection: + DIRECTION_IN_PLUS_OUT "cugraph::experimental::DegreeDirection::IN_PLUS_OUT" + DIRECTION_IN "cugraph::experimental::DegreeDirection::IN" + DIRECTION_OUT "cugraph::experimental::DegreeDirection::OUT" + struct GraphProperties: bool directed bool weighted @@ -41,13 +46,14 @@ cdef extern from "graph.hpp" namespace "cugraph::experimental": void get_vertex_identifiers(VT *) const - void degree(ET *,int) - GraphBase(WT*,VT,ET) cdef cppclass GraphCOO[VT,ET,WT](GraphBase[VT,ET,WT]): VT *src_indices VT *dst_indices + + void degree(ET *,DegreeDirection) const + GraphCOO() GraphCOO(const VT *, const ET *, const WT *, size_t, size_t) @@ -56,6 +62,7 @@ cdef extern from "graph.hpp" namespace "cugraph::experimental": VT *indices void get_source_indices(VT *) const + void degree(ET *,DegreeDirection) const GraphCompressedSparseBase(const VT *, const ET *, const WT *, size_t, size_t) diff --git a/python/cugraph/structure/graph_new_wrapper.pyx b/python/cugraph/structure/graph_new_wrapper.pyx index bb737977273..39799b71c51 100644 --- a/python/cugraph/structure/graph_new_wrapper.pyx +++ b/python/cugraph/structure/graph_new_wrapper.pyx @@ -38,6 +38,17 @@ def _degree_coo(src, dst, x=0): # # Computing the degree of the input graph from COO # + cdef DegreeDirection dir + + if x == 0: + dir = DIRECTION_IN_PLUS_OUT + elif x == 1: + dir = DIRECTION_IN + elif x == 2: + dir = DIRECTION_OUT + else: + raise Exception("x should be 0, 1 or 2") + [src, dst] = datatype_cast([src, dst], [np.int32]) num_verts = 1 + max(src.max(), dst.max()) @@ -54,13 +65,25 @@ def _degree_coo(src, dst, x=0): cdef uintptr_t c_dst = dst.__cuda_array_interface__['data'][0] graph = GraphCOO[int,int,float](c_src, c_dst, NULL, num_verts, num_edges) - graph.degree( c_degree, x) + + graph.degree( c_degree, dir) graph.get_vertex_identifiers(c_vertex) return vertex_col, degree_col def _degree_csr(offsets, indices, x=0): + cdef DegreeDirection dir + + if x == 0: + dir = DIRECTION_IN_PLUS_OUT + elif x == 1: + dir = DIRECTION_IN + elif x == 2: + dir = DIRECTION_OUT + else: + raise Exception("x should be 0, 1 or 2") + [offsets, indices] = datatype_cast([offsets, indices], [np.int32]) num_verts = len(offsets)-1 @@ -77,17 +100,22 @@ def _degree_csr(offsets, indices, x=0): cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] graph = GraphCSR[int,int,float](c_offsets, c_indices, NULL, num_verts, num_edges) - graph.degree( c_degree, x) + + graph.degree( c_degree, dir) graph.get_vertex_identifiers(c_vertex) return vertex_col, degree_col def _degree(input_graph, x=0): - transpose_x = { 0: 0, 1: 2, 2:1 } + transpose_x = { 0: 0, + 2: 1, + 1: 2 } if input_graph.adjlist is not None: - return _degree_csr(input_graph.adjlist.offsets, input_graph.adjlist.indices, x) + return _degree_csr(input_graph.adjlist.offsets, + input_graph.adjlist.indices, + x) if input_graph.transposedadjlist is not None: return _degree_csr(input_graph.transposedadjlist.offsets, @@ -102,7 +130,7 @@ def _degree(input_graph, x=0): raise Exception("input_graph not COO, CSR or CSC") def _degrees(input_graph): - verts, indegrees = _degree(input_graph, 1) + verts, indegrees = _degree(input_graph,1) verts, outdegrees = _degree(input_graph, 2) return verts, indegrees, outdegrees From abfc62a84eaa3119ac92e76f71493948fa2a29de Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Tue, 31 Mar 2020 17:23:06 -0400 Subject: [PATCH 09/40] update some documentation --- docs/source/api.rst | 7 ++++++ python/cugraph/structure/convert_matrix.py | 3 +++ python/cugraph/structure/graph.py | 25 ++++++++++++++++++++++ python/cugraph/structure/renumber.py | 2 +- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/docs/source/api.rst b/docs/source/api.rst index b84d8344d41..7e7c6ce7d14 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -46,6 +46,13 @@ Katz Centrality :members: :undoc-members: +Betweenness Centrality +---------------------- + +.. automodule:: cugraph.centrality.betweenness_centrality + :members: + :undoc-members: + Community ========= diff --git a/python/cugraph/structure/convert_matrix.py b/python/cugraph/structure/convert_matrix.py index 835a9f37969..4bc10c8d0ab 100644 --- a/python/cugraph/structure/convert_matrix.py +++ b/python/cugraph/structure/convert_matrix.py @@ -24,6 +24,7 @@ def from_cudf_edgelist(df, source='source', destination='destination', Return a new graph created from the edge list representaion. This function is added for NetworkX compatibility (this function is a RAPIDS version of NetworkX's from_pandas_edge_list()). + Parameters ---------- df : cudf.DataFrame @@ -38,12 +39,14 @@ def from_cudf_edgelist(df, source='source', destination='destination', weight : string or integer, optional This pointer can be ``None``. If not, this is used to index the weight column. + Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2') + """ if create_using is Graph: G = Graph() diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index b3442d9d36f..86051494a88 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -67,6 +67,7 @@ def __init__(self, m_graph=None, edge_attr=None, symmetrized=False, -------- >>> import cuGraph >>> G = cuGraph.Graph() + """ self.symmetrized = symmetrized self.renumbered = False @@ -146,6 +147,7 @@ def from_cudf_edgelist(self, input_df, source='source', >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2', renumber=False) + """ if self.edgelist is not None or self.adjlist is not None: @@ -288,6 +290,7 @@ def from_cudf_adjlist(self, offset_col, index_col, value_col=None): stores references to the deep-copies of the passed objects pointed by offset_col and index_col. Undirected edges must be stored as directed edges in both directions. + Parameters ---------- offset_col : cudf.Series @@ -319,6 +322,7 @@ def from_cudf_adjlist(self, offset_col, index_col, value_col=None): >>> indices = cudf.Series(M.indices) >>> G = cugraph.Graph() >>> G.from_cudf_adjlist(offsets, indices, None) + """ if self.edgelist is not None or self.adjlist is not None: raise Exception('Graph already has values') @@ -380,6 +384,7 @@ def view_transposed_adj_list(self): The gdf column contains the weight value for each edge. The expected type of the gdf_column element is floating point number. + """ if self.transposedadjlist is None: graph_wrapper.view_transposed_adj_list(self) @@ -398,6 +403,7 @@ def get_two_hop_neighbors(self): """ Compute vertex pairs that are two hops apart. The resulting pairs are sorted before returning. + Returns ------- df : cudf.DataFrame @@ -405,6 +411,7 @@ def get_two_hop_neighbors(self): the first vertex id of a pair. df['second'] : cudf.Series the second vertex id of a pair. + """ df = graph_wrapper.get_two_hop_neighbors(self) if self.renumbered is True: @@ -445,12 +452,14 @@ def number_of_nodes(self): """ An alias of number_of_vertices(). This function is added for NetworkX compatibility. + """ return self.number_of_vertices() def number_of_edges(self): """ Get the number of edges in the graph. + """ if self.edge_count is None: if self.edgelist is not None: @@ -476,11 +485,13 @@ def in_degree(self, vertex_subset=None): degrees for the entire set of vertices. If vertex_subset is provided, this method optionally filters out all but those listed in vertex_subset. + Parameters ---------- vertex_subset : cudf.Series or iterable container, optional A container of vertices for displaying corresponding in-degree. If not set, degrees are computed for the entire set of vertices. + Returns ------- df : cudf.DataFrame @@ -493,6 +504,7 @@ def in_degree(self, vertex_subset=None): specified). df['degree'] : cudf.Series The computed in-degree of the corresponding vertex. + Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -502,6 +514,7 @@ def in_degree(self, vertex_subset=None): >>> G = cugraph.Graph() >>> G.add_edge_list(sources, destinations, None) >>> df = G.in_degree([0,9,12]) + """ return self._degree(vertex_subset, x=1) @@ -512,11 +525,13 @@ def out_degree(self, vertex_subset=None): degrees for the entire set of vertices. If vertex_subset is provided, this method optionally filters out all but those listed in vertex_subset. + Parameters ---------- vertex_subset : cudf.Series or iterable container, optional A container of vertices for displaying corresponding out-degree. If not set, degrees are computed for the entire set of vertices. + Returns ------- df : cudf.DataFrame @@ -529,6 +544,7 @@ def out_degree(self, vertex_subset=None): specified). df['degree'] : cudf.Series The computed out-degree of the corresponding vertex. + Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -538,6 +554,7 @@ def out_degree(self, vertex_subset=None): >>> G = cugraph.Graph() >>> G.add_edge_list(sources, destinations, None) >>> df = G.out_degree([0,9,12]) + """ return self._degree(vertex_subset, x=2) @@ -547,11 +564,13 @@ def degree(self, vertex_subset=None): degrees for the entire set of vertices. If vertex_subset is provided, this method optionally filters out all but those listed in vertex_subset. + Parameters ---------- vertex_subset : cudf.Series or iterable container, optional A container of vertices for displaying corresponding degree. If not set, degrees are computed for the entire set of vertices. + Returns ------- df : cudf.DataFrame @@ -564,6 +583,7 @@ def degree(self, vertex_subset=None): specified). df['degree'] : cudf.Series The computed degree of the corresponding vertex. + Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -573,6 +593,7 @@ def degree(self, vertex_subset=None): >>> G = cugraph.Graph() >>> G.add_edge_list(sources, destinations, None) >>> df = G.degree([0,9,12]) + """ return self._degree(vertex_subset) @@ -582,11 +603,13 @@ def degrees(self, vertex_subset=None): computes vertex degrees for the entire set of vertices. If vertex_subset is provided, this method optionally filters out all but those listed in vertex_subset. + Parameters ---------- vertex_subset : cudf.Series or iterable container, optional A container of vertices for displaying corresponding degree. If not set, degrees are computed for the entire set of vertices. + Returns ------- df : cudf.DataFrame @@ -597,6 +620,7 @@ def degrees(self, vertex_subset=None): The in-degree of the vertex. df['out_degree'] : cudf.Series The out-degree of the vertex. + Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -606,6 +630,7 @@ def degrees(self, vertex_subset=None): >>> G = cugraph.Graph() >>> G.add_edge_list(sources, destinations, None) >>> df = G.degrees([0,9,12]) + """ vertex_col, in_degree_col, out_degree_col = graph_wrapper._degrees( self) diff --git a/python/cugraph/structure/renumber.py b/python/cugraph/structure/renumber.py index 1988c92ec03..1ef7380c44f 100644 --- a/python/cugraph/structure/renumber.py +++ b/python/cugraph/structure/renumber.py @@ -45,7 +45,7 @@ def renumber(source_col, dest_col): Destination indices must be an integer type. numbering_map : cudf.Series This cudf.Series wraps a gdf column of size V (V: number of vertices). - The gdf column contains a numbering map that mpas the new ids to the + The gdf column contains a numbering map that maps the new ids to the original ids. Examples From 7bfcdb745893e2b45ece8b5df25186b93ebf00f2 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 1 Apr 2020 15:14:48 -0400 Subject: [PATCH 10/40] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52210d977fd..3e44914ba43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ ## Bug Fixes - PR #763 Update RAPIDS conda dependencies to v0.14 +- PR #795 Fix some documentation # cuGraph 0.13.0 (Date TBD) From 1971db5fb39cc6358fda0495b3877d0f640efe28 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Thu, 2 Apr 2020 01:38:46 -0500 Subject: [PATCH 11/40] change returned graph type --- python/cugraph/community/subgraph_extraction.py | 2 +- python/cugraph/cores/k_core.py | 2 +- python/cugraph/cores/ktruss_subgraph.py | 11 ++++------- python/cugraph/traversal/bfs.py | 12 ++++++------ 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index 63edf72de0c..f1b0283a048 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -52,7 +52,7 @@ def subgraph(G, vertices): null_check(vertices) - result_graph = DiGraph() + result_graph = type(G)() subgraph_extraction_wrapper.subgraph( G, diff --git a/python/cugraph/cores/k_core.py b/python/cugraph/cores/k_core.py index 3353a7569fd..51a55ae69ec 100644 --- a/python/cugraph/cores/k_core.py +++ b/python/cugraph/cores/k_core.py @@ -61,7 +61,7 @@ def k_core(G, >>> KCoreGraph = cugraph.k_core(G) """ - KCoreGraph = DiGraph() + KCoreGraph = type(G)() if core_number is None: core_number = core_number_wrapper.core_number(G) core_number = core_number.rename(columns={"core_number": "values"}) diff --git a/python/cugraph/cores/ktruss_subgraph.py b/python/cugraph/cores/ktruss_subgraph.py index 60397bbe6a9..f7cfd4e95da 100644 --- a/python/cugraph/cores/ktruss_subgraph.py +++ b/python/cugraph/cores/ktruss_subgraph.py @@ -15,7 +15,7 @@ from cugraph.structure.graph import DiGraph -def ktruss_subgraph(G, k, use_weights=True): +def ktruss_subgraph(G, k): """ Returns the subgraph of the k-truss of a graph for a specific k. @@ -59,9 +59,6 @@ def ktruss_subgraph(G, k, use_weights=True): k : int The desired k to be used for extracting the k-truss subgraph. - use_weights : Bool - whether the output should contain the edge weights if G has them - Returns ------- @@ -74,12 +71,12 @@ def ktruss_subgraph(G, k, use_weights=True): >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edge_list(M, source='0', destination='1') - >>> k_subgraph = cugraph.ktruss_subgraph(G, 3) + >>> k_subgraph = cugraph.ktruss_subgraph(G) """ - KTrussSubgraph = DiGraph() + KTrussSubgraph = type(G)() - ktruss_subgraph_wrapper.ktruss_subgraph(G, k, use_weights, + ktruss_subgraph_wrapper.ktruss_subgraph(G, k, KTrussSubgraph) return KTrussSubgraph diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py index 194ff93189a..4ecc37d8fba 100644 --- a/python/cugraph/traversal/bfs.py +++ b/python/cugraph/traversal/bfs.py @@ -12,9 +12,9 @@ # limitations under the License. from cugraph.traversal import bfs_wrapper +from cugraph.structure.graph import * - -def bfs(G, start, directed=True): +def bfs(G, start): """ Find the distances and predecessors for a breadth first traversal of a graph. @@ -26,10 +26,6 @@ def bfs(G, start, directed=True): as an adjacency list. start : Integer The index of the graph vertex from which the traversal begins - directed : bool - Indicates whether the graph in question is a directed graph, or whether - each edge has a corresponding reverse edge. (Allows optimizations if - the graph is undirected) Returns ------- @@ -53,6 +49,10 @@ def bfs(G, start, directed=True): >>> df = cugraph.bfs(G, 0) """ + if type(G) is Graph: + directed = False + else: + directed = True df = bfs_wrapper.bfs(G, start, directed) return df From b18b91cfa0800c9697db5042d9ac809a1fedcf2f Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Thu, 2 Apr 2020 01:39:31 -0500 Subject: [PATCH 12/40] add tests --- python/cugraph/tests/test_k_core.py | 35 ++++++++-- python/cugraph/tests/test_k_truss_subgraph.py | 69 ++++++++++++------- .../cugraph/tests/test_subgraph_extraction.py | 44 ++++++++++-- 3 files changed, 111 insertions(+), 37 deletions(-) diff --git a/python/cugraph/tests/test_k_core.py b/python/cugraph/tests/test_k_core.py index 3535d41c4ae..2f2ef57186b 100644 --- a/python/cugraph/tests/test_k_core.py +++ b/python/cugraph/tests/test_k_core.py @@ -34,14 +34,19 @@ print('Networkx version : {} '.format(nx.__version__)) -def calc_k_cores(graph_file): +def calc_k_cores(graph_file, directed=True): cu_M = utils.read_csv_file(graph_file) - G = cugraph.DiGraph() + NM = utils.read_csv_for_nx(graph_file) + if directed: + G = cugraph.DiGraph() + Gnx = nx.from_pandas_edgelist(NM, source='0', target='1', + create_using=nx.DiGraph()) + else: + G = cugraph.Graph() + Gnx = nx.from_pandas_edgelist(NM, source='0', target='1', + create_using=nx.Graph()) G.from_cudf_edgelist(cu_M, source='0', destination='1') ck = cugraph.k_core(G) - NM = utils.read_csv_for_nx(graph_file) - Gnx = nx.from_pandas_edgelist(NM, source='0', target='1', - create_using=nx.DiGraph()) nk = nx.k_core(Gnx) return ck, nk @@ -63,7 +68,7 @@ def compare_edges(cg, nxg): @pytest.mark.parametrize('managed, pool', list(product([False, True], [False, True]))) @pytest.mark.parametrize('graph_file', DATASETS) -def test_core_number(managed, pool, graph_file): +def test_core_number_DiGraph(managed, pool, graph_file): gc.collect() rmm.reinitialize( @@ -76,3 +81,21 @@ def test_core_number(managed, pool, graph_file): cu_kcore, nx_kcore = calc_k_cores(graph_file) assert compare_edges(cu_kcore, nx_kcore) + + +@pytest.mark.parametrize('managed, pool', + list(product([False, True], [False, True]))) +@pytest.mark.parametrize('graph_file', DATASETS) +def test_core_number_Graph(managed, pool, graph_file): + gc.collect() + + rmm.reinitialize( + managed_memory=managed, + pool_allocator=pool + ) + + assert(rmm.is_initialized()) + + cu_kcore, nx_kcore = calc_k_cores(graph_file, False) + + assert compare_edges(cu_kcore, nx_kcore) diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index 19260735cfe..2514dcc35bb 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import scipy import gc from itertools import product @@ -20,7 +21,6 @@ from cugraph.tests import utils import rmm -import numpy as np # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -41,36 +41,40 @@ # parameter k. This fix (https://github.com/networkx/networkx/pull/3713) is # currently in networkx master and will hopefully will make it to a release # soon. -def ktruss_ground_truth(graph_file): - G = nx.read_edgelist(graph_file, nodetype=int, data=(('weights', float),)) - df = nx.to_pandas_edgelist(G) - return df - - -def cugraph_k_truss_subgraph(graph_file, k): +def ktruss_ground_truth(graph_file, directed): + Mnx = utils.read_csv_for_nx(graph_file) + N = max(max(Mnx['0']), max(Mnx['1'])) + 1 + Mcsr = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])), + shape=(N, N)) + if directed: + nxktruss_subgraph = nx.DiGraph(Mcsr) + else: + nxktruss_subgraph = nx.Graph(Mcsr) + return nxktruss_subgraph + + +def cugraph_k_truss_subgraph(graph_file, k, directed): cu_M = utils.read_csv_file(graph_file) - G = cugraph.DiGraph() - G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2') + if directed: + G = cugraph.DiGraph() + else: + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source='0', destination='1') k_subgraph = cugraph.ktruss_subgraph(G, k) return k_subgraph -def compare_k_truss(graph_file, k, ground_truth_file): - k_truss_cugraph = cugraph_k_truss_subgraph(graph_file, k) - k_truss_nx = ktruss_ground_truth(ground_truth_file) +def compare_k_truss(graph_file, k, ground_truth_file, directed=True): + k_truss_cugraph = cugraph_k_truss_subgraph(graph_file, k, directed) + k_truss_nx = ktruss_ground_truth(ground_truth_file, directed) edgelist_df = k_truss_cugraph.view_edge_list() - src = edgelist_df['src'] - dst = edgelist_df['dst'] - wgt = edgelist_df['weights'] + if not directed: + assert len(edgelist_df) == k_truss_nx.size() + src, dest = edgelist_df['src'], edgelist_df['dst'] for i in range(len(src)): - has_edge = ((k_truss_nx['source'] == src[i]) & - (k_truss_nx['target'] == dst[i]) & - np.isclose(k_truss_nx['weights'], wgt[i])).any() - has_opp_edge = ((k_truss_nx['source'] == dst[i]) & - (k_truss_nx['target'] == src[i]) & - np.isclose(k_truss_nx['weights'], wgt[i])).any() - assert(has_edge or has_opp_edge) + assert (k_truss_nx.has_edge(src[i], dest[i]) or + k_truss_nx.has_edge(dest[i], src[i])) return True @@ -81,9 +85,9 @@ def compare_k_truss(graph_file, k, ground_truth_file): @pytest.mark.parametrize('managed, pool', - list(product([False], [False]))) + list(product([False, True], [False, True]))) @pytest.mark.parametrize('graph_file, nx_ground_truth', DATASETS) -def test_ktruss_subgraph(managed, pool, graph_file, nx_ground_truth): +def test_ktruss_subgraph_DiGraph(managed, pool, graph_file, nx_ground_truth): gc.collect() rmm.reinitialize( @@ -93,3 +97,18 @@ def test_ktruss_subgraph(managed, pool, graph_file, nx_ground_truth): assert(rmm.is_initialized()) compare_k_truss(graph_file, 5, nx_ground_truth) + + +@pytest.mark.parametrize('managed, pool', + list(product([False, True], [False, True]))) +@pytest.mark.parametrize('graph_file, nx_ground_truth', DATASETS) +def test_ktruss_subgraph_Graph(managed, pool, graph_file, nx_ground_truth): + gc.collect() + + rmm.reinitialize( + managed_memory=managed, + pool_allocator=pool) + + assert(rmm.is_initialized()) + + compare_k_truss(graph_file, 5, nx_ground_truth, False) diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index a67feacf6fe..e662e98523a 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -43,8 +43,11 @@ def compare_edges(cg, nxg, verts): return True -def cugraph_call(M, verts): - G = cugraph.DiGraph() +def cugraph_call(M, verts, directed=True): + if directed: + G = cugraph.DiGraph() + else: + G = cugraph.Graph() cu_M = cudf.DataFrame() cu_M['src'] = cudf.Series(M['0']) cu_M['dst'] = cudf.Series(M['1']) @@ -53,9 +56,13 @@ def cugraph_call(M, verts): return cugraph.subgraph(G, cu_verts) -def nx_call(M, verts): - G = nx.from_pandas_edgelist(M, source='0', target='1', - create_using=nx.DiGraph()) +def nx_call(M, verts, directed=True): + if directed: + G = nx.from_pandas_edgelist(M, source='0', target='1', + create_using=nx.DiGraph()) + else: + G = nx.from_pandas_edgelist(M, source='0', target='1', + create_using=nx.Graph()) return nx.subgraph(G, verts) @@ -69,7 +76,7 @@ def nx_call(M, verts): @pytest.mark.parametrize('managed, pool', list(product([False, True], [False, True]))) @pytest.mark.parametrize('graph_file', DATASETS) -def test_subgraph_extraction(managed, pool, graph_file): +def test_subgraph_extraction_DiGraph(managed, pool, graph_file): gc.collect() rmm.reinitialize( @@ -88,3 +95,28 @@ def test_subgraph_extraction(managed, pool, graph_file): cu_sg = cugraph_call(M, verts) nx_sg = nx_call(M, verts) assert compare_edges(cu_sg, nx_sg, verts) + + +# Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.parametrize('managed, pool', + list(product([False, True], [False, True]))) +@pytest.mark.parametrize('graph_file', DATASETS) +def test_subgraph_extraction_Graph(managed, pool, graph_file): + gc.collect() + + rmm.reinitialize( + managed_memory=managed, + pool_allocator=pool, + initial_pool_size=2 << 27 + ) + + assert(rmm.is_initialized()) + + M = utils.read_csv_for_nx(graph_file) + verts = np.zeros(3, dtype=np.int32) + verts[0] = 0 + verts[1] = 1 + verts[2] = 17 + cu_sg = cugraph_call(M, verts, False) + nx_sg = nx_call(M, verts, False) + assert compare_edges(cu_sg, nx_sg, verts) From b0407c0e4786686ceb22106e870dd612c7f1b25d Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Thu, 2 Apr 2020 14:24:51 -0500 Subject: [PATCH 13/40] fix errors in test --- CHANGELOG.md | 1 + .../cugraph/community/subgraph_extraction.py | 3 +- python/cugraph/cores/k_core.py | 1 - python/cugraph/cores/ktruss_subgraph.py | 6 ++-- python/cugraph/tests/test_k_truss_subgraph.py | 33 ++++++++++--------- python/cugraph/traversal/bfs.py | 3 +- 6 files changed, 24 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52210d977fd..9af9263009a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - PR #782 Use Cython's `new_build_ext` (if available) - PR #788 Added options and config file to enable codecov - PR #793 Fix legacy cudf imports/cimports +- PR #798 Edit return graph type in algorithms return graphs ## Bug Fixes - PR #763 Update RAPIDS conda dependencies to v0.14 diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index f1b0283a048..3280c53a971 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -12,8 +12,7 @@ # limitations under the License. from cugraph.community import subgraph_extraction_wrapper -from cugraph.structure.graph import null_check, DiGraph - +from cugraph.structure.graph import null_check def subgraph(G, vertices): """ diff --git a/python/cugraph/cores/k_core.py b/python/cugraph/cores/k_core.py index 51a55ae69ec..b66cf93515d 100644 --- a/python/cugraph/cores/k_core.py +++ b/python/cugraph/cores/k_core.py @@ -12,7 +12,6 @@ # limitations under the License. from cugraph.cores import k_core_wrapper, core_number_wrapper -from cugraph.structure.graph import DiGraph def k_core(G, diff --git a/python/cugraph/cores/ktruss_subgraph.py b/python/cugraph/cores/ktruss_subgraph.py index f7cfd4e95da..43d6361e4d7 100644 --- a/python/cugraph/cores/ktruss_subgraph.py +++ b/python/cugraph/cores/ktruss_subgraph.py @@ -15,7 +15,7 @@ from cugraph.structure.graph import DiGraph -def ktruss_subgraph(G, k): +def ktruss_subgraph(G, k, use_weights=True): """ Returns the subgraph of the k-truss of a graph for a specific k. @@ -71,12 +71,12 @@ def ktruss_subgraph(G, k): >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edge_list(M, source='0', destination='1') - >>> k_subgraph = cugraph.ktruss_subgraph(G) + >>> k_subgraph = cugraph.ktruss_subgraph(G, 3) """ KTrussSubgraph = type(G)() - ktruss_subgraph_wrapper.ktruss_subgraph(G, k, + ktruss_subgraph_wrapper.ktruss_subgraph(G, k, use_weights, KTrussSubgraph) return KTrussSubgraph diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index 2514dcc35bb..958c5c7998c 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -11,7 +11,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import scipy import gc from itertools import product @@ -21,6 +20,7 @@ from cugraph.tests import utils import rmm +import numpy as np # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -41,16 +41,10 @@ # parameter k. This fix (https://github.com/networkx/networkx/pull/3713) is # currently in networkx master and will hopefully will make it to a release # soon. -def ktruss_ground_truth(graph_file, directed): - Mnx = utils.read_csv_for_nx(graph_file) - N = max(max(Mnx['0']), max(Mnx['1'])) + 1 - Mcsr = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])), - shape=(N, N)) - if directed: - nxktruss_subgraph = nx.DiGraph(Mcsr) - else: - nxktruss_subgraph = nx.Graph(Mcsr) - return nxktruss_subgraph +def ktruss_ground_truth(graph_file): + G = nx.read_edgelist(graph_file, nodetype=int, data=(('weights', float),)) + df = nx.to_pandas_edgelist(G) + return df def cugraph_k_truss_subgraph(graph_file, k, directed): @@ -59,22 +53,29 @@ def cugraph_k_truss_subgraph(graph_file, k, directed): G = cugraph.DiGraph() else: G = cugraph.Graph() - G.from_cudf_edgelist(cu_M, source='0', destination='1') + G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2') k_subgraph = cugraph.ktruss_subgraph(G, k) return k_subgraph def compare_k_truss(graph_file, k, ground_truth_file, directed=True): k_truss_cugraph = cugraph_k_truss_subgraph(graph_file, k, directed) - k_truss_nx = ktruss_ground_truth(ground_truth_file, directed) + k_truss_nx = ktruss_ground_truth(ground_truth_file) edgelist_df = k_truss_cugraph.view_edge_list() + src = edgelist_df['src'] + dst = edgelist_df['dst'] + wgt = edgelist_df['weights'] if not directed: assert len(edgelist_df) == k_truss_nx.size() - src, dest = edgelist_df['src'], edgelist_df['dst'] for i in range(len(src)): - assert (k_truss_nx.has_edge(src[i], dest[i]) or - k_truss_nx.has_edge(dest[i], src[i])) + has_edge = ((k_truss_nx['source'] == src[i]) & + (k_truss_nx['target'] == dst[i]) & + np.isclose(k_truss_nx['weights'], wgt[i])).any() + has_opp_edge = ((k_truss_nx['source'] == dst[i]) & + (k_truss_nx['target'] == src[i]) & + np.isclose(k_truss_nx['weights'], wgt[i])).any() + assert(has_edge or has_opp_edge) return True diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py index 4ecc37d8fba..84855810c20 100644 --- a/python/cugraph/traversal/bfs.py +++ b/python/cugraph/traversal/bfs.py @@ -12,7 +12,8 @@ # limitations under the License. from cugraph.traversal import bfs_wrapper -from cugraph.structure.graph import * +from cugraph.structure.graph import Graph + def bfs(G, start): """ From 993a2b245ae580eb1c03e41ab0ffa4e6db8e1b69 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Thu, 2 Apr 2020 18:32:01 -0500 Subject: [PATCH 14/40] fix ktruss test --- python/cugraph/cores/ktruss_subgraph_wrapper.pyx | 4 ++-- python/cugraph/tests/test_k_truss_subgraph.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cugraph/cores/ktruss_subgraph_wrapper.pyx b/python/cugraph/cores/ktruss_subgraph_wrapper.pyx index 0f884ade2c2..045d8fe17c4 100644 --- a/python/cugraph/cores/ktruss_subgraph_wrapper.pyx +++ b/python/cugraph/cores/ktruss_subgraph_wrapper.pyx @@ -38,7 +38,7 @@ def ktruss_subgraph_double(input_graph, k, use_weights, subgraph_truss): input_graph.view_edge_list() num_verts = input_graph.number_of_vertices() - num_edges = input_graph.number_of_edges() + num_edges = len(input_graph.edgelist.edgelist_df) cdef uintptr_t c_src_indices = input_graph.edgelist.edgelist_df['src'].__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_indices = input_graph.edgelist.edgelist_df['dst'].__cuda_array_interface__['data'][0] @@ -86,7 +86,7 @@ def ktruss_subgraph_float(input_graph, k, use_weights, subgraph_truss): input_graph.view_edge_list() num_verts = input_graph.number_of_vertices() - num_edges = input_graph.number_of_edges() + num_edges = len(input_graph.edgelist.edgelist_df) cdef uintptr_t c_src_indices = input_graph.edgelist.edgelist_df['src'].__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_indices = input_graph.edgelist.edgelist_df['dst'].__cuda_array_interface__['data'][0] diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index 958c5c7998c..8a7550746f1 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -67,7 +67,7 @@ def compare_k_truss(graph_file, k, ground_truth_file, directed=True): dst = edgelist_df['dst'] wgt = edgelist_df['weights'] if not directed: - assert len(edgelist_df) == k_truss_nx.size() + assert len(edgelist_df) == len(k_truss_nx) for i in range(len(src)): has_edge = ((k_truss_nx['source'] == src[i]) & (k_truss_nx['target'] == dst[i]) & From 3da7ad8cdcd35d31861ad3c09a1f0ae3c1e90184 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Fri, 3 Apr 2020 01:20:28 -0500 Subject: [PATCH 15/40] revert bfs change --- python/cugraph/community/subgraph_extraction.py | 1 + python/cugraph/cores/ktruss_subgraph.py | 1 - python/cugraph/traversal/bfs.py | 7 +------ 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index 3280c53a971..d551b6e34c8 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -14,6 +14,7 @@ from cugraph.community import subgraph_extraction_wrapper from cugraph.structure.graph import null_check + def subgraph(G, vertices): """ Compute a subgraph of the existing graph including only the specified diff --git a/python/cugraph/cores/ktruss_subgraph.py b/python/cugraph/cores/ktruss_subgraph.py index 43d6361e4d7..59a746e7f9d 100644 --- a/python/cugraph/cores/ktruss_subgraph.py +++ b/python/cugraph/cores/ktruss_subgraph.py @@ -12,7 +12,6 @@ # limitations under the License. from cugraph.cores import ktruss_subgraph_wrapper -from cugraph.structure.graph import DiGraph def ktruss_subgraph(G, k, use_weights=True): diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py index 84855810c20..f77a2ed7c50 100644 --- a/python/cugraph/traversal/bfs.py +++ b/python/cugraph/traversal/bfs.py @@ -12,10 +12,9 @@ # limitations under the License. from cugraph.traversal import bfs_wrapper -from cugraph.structure.graph import Graph -def bfs(G, start): +def bfs(G, start, directed=True): """ Find the distances and predecessors for a breadth first traversal of a graph. @@ -50,10 +49,6 @@ def bfs(G, start): >>> df = cugraph.bfs(G, 0) """ - if type(G) is Graph: - directed = False - else: - directed = True df = bfs_wrapper.bfs(G, start, directed) return df From b975c4598961b25b0c4923a5642df9cdc30d68d1 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Fri, 3 Apr 2020 11:54:01 -0500 Subject: [PATCH 16/40] update bfs optimization path --- CHANGELOG.md | 3 ++- cpp/src/traversal/bfs.cu | 4 ++-- python/cugraph/traversal/bfs.py | 8 +++++++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52210d977fd..38dbd8d0a79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,8 @@ ## Bug Fixes - PR #763 Update RAPIDS conda dependencies to v0.14 - +- PR #800 Fix bfs error in optimization path + # cuGraph 0.13.0 (Date TBD) ## New Features diff --git a/cpp/src/traversal/bfs.cu b/cpp/src/traversal/bfs.cu index 9217102da95..321ff091225 100644 --- a/cpp/src/traversal/bfs.cu +++ b/cpp/src/traversal/bfs.cu @@ -252,7 +252,7 @@ namespace detail { //undirected g : need parents to be in children's neighbors bool can_use_bottom_up = !directed && distances; - while (nf > 0) { + while (nf > 0 && nu > 0) { //Each vertices can appear only once in the frontierer array - we know it will fit new_frontier = frontier + nf; IndexType old_nf = nf; @@ -500,4 +500,4 @@ void bfs(experimental::GraphCSR const &graph, VT *distances, VT *pre template void bfs(experimental::GraphCSR const &graph, int *distances, int *predecessors, const int source_vertex, bool directed); -} // !namespace cugraph \ No newline at end of file +} // !namespace cugraph diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py index 194ff93189a..1d3d0bc9123 100644 --- a/python/cugraph/traversal/bfs.py +++ b/python/cugraph/traversal/bfs.py @@ -12,9 +12,10 @@ # limitations under the License. from cugraph.traversal import bfs_wrapper +from cugraph.structure.graph import Graph -def bfs(G, start, directed=True): +def bfs(G, start): """ Find the distances and predecessors for a breadth first traversal of a graph. @@ -53,6 +54,11 @@ def bfs(G, start, directed=True): >>> df = cugraph.bfs(G, 0) """ + if type(G) is Graph: + directed = False + else: + directed = True + df = bfs_wrapper.bfs(G, start, directed) return df From d5360178563be91856b529d0d8e8b70eaeb4c82f Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Fri, 3 Apr 2020 12:19:09 -0500 Subject: [PATCH 17/40] add doc back --- python/cugraph/traversal/bfs.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py index f77a2ed7c50..194ff93189a 100644 --- a/python/cugraph/traversal/bfs.py +++ b/python/cugraph/traversal/bfs.py @@ -26,6 +26,10 @@ def bfs(G, start, directed=True): as an adjacency list. start : Integer The index of the graph vertex from which the traversal begins + directed : bool + Indicates whether the graph in question is a directed graph, or whether + each edge has a corresponding reverse edge. (Allows optimizations if + the graph is undirected) Returns ------- From c6a61f83415b8609a22638118ad6871b64d042d3 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Fri, 3 Apr 2020 12:21:29 -0500 Subject: [PATCH 18/40] add back doc --- python/cugraph/cores/ktruss_subgraph.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/cugraph/cores/ktruss_subgraph.py b/python/cugraph/cores/ktruss_subgraph.py index 59a746e7f9d..d92822af90d 100644 --- a/python/cugraph/cores/ktruss_subgraph.py +++ b/python/cugraph/cores/ktruss_subgraph.py @@ -58,6 +58,8 @@ def ktruss_subgraph(G, k, use_weights=True): k : int The desired k to be used for extracting the k-truss subgraph. + use_weights : Bool + whether the output should contain the edge weights if G has them Returns ------- From 6e17a097df18ca45701913729f042fa22afc1196 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Fri, 3 Apr 2020 13:01:20 -0500 Subject: [PATCH 19/40] update doc --- python/cugraph/traversal/bfs.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py index 1d3d0bc9123..44bd99b77cc 100644 --- a/python/cugraph/traversal/bfs.py +++ b/python/cugraph/traversal/bfs.py @@ -27,10 +27,6 @@ def bfs(G, start): as an adjacency list. start : Integer The index of the graph vertex from which the traversal begins - directed : bool - Indicates whether the graph in question is a directed graph, or whether - each edge has a corresponding reverse edge. (Allows optimizations if - the graph is undirected) Returns ------- From 0b7fe025d6c44250911fb68112b9940caec170f4 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Fri, 3 Apr 2020 13:24:40 -0500 Subject: [PATCH 20/40] add comments --- python/cugraph/tests/test_k_core.py | 2 ++ python/cugraph/tests/test_k_truss_subgraph.py | 2 ++ python/cugraph/tests/test_subgraph_extraction.py | 2 ++ 3 files changed, 6 insertions(+) diff --git a/python/cugraph/tests/test_k_core.py b/python/cugraph/tests/test_k_core.py index 2f2ef57186b..233cd9d72d5 100644 --- a/python/cugraph/tests/test_k_core.py +++ b/python/cugraph/tests/test_k_core.py @@ -35,6 +35,8 @@ def calc_k_cores(graph_file, directed=True): + # directed is used to create either a Graph or DiGraph so the returned + # cugraph can be compared to nx graph of same type. cu_M = utils.read_csv_file(graph_file) NM = utils.read_csv_for_nx(graph_file) if directed: diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index 8a7550746f1..3893906e345 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -48,6 +48,8 @@ def ktruss_ground_truth(graph_file): def cugraph_k_truss_subgraph(graph_file, k, directed): + # directed is used to create either a Graph or DiGraph so the returned + # cugraph can be compared to nx graph of same type. cu_M = utils.read_csv_file(graph_file) if directed: G = cugraph.DiGraph() diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index e662e98523a..896eca209e5 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -44,6 +44,8 @@ def compare_edges(cg, nxg, verts): def cugraph_call(M, verts, directed=True): + # directed is used to create either a Graph or DiGraph so the returned + # cugraph can be compared to nx graph of same type. if directed: G = cugraph.DiGraph() else: From cb574a483947a4976d04a29a7aed69da40cdf6cd Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Fri, 3 Apr 2020 15:51:45 -0700 Subject: [PATCH 21/40] add BUILD_BYPRODUCTS entry to cugunrock project to fix -GNinja --- cpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index cf3952e19dd..b38b7b0e1a6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -293,6 +293,7 @@ ExternalProject_Add(cugunrock -DGPU_ARCHS="" -DGUNROCK_BUILD_SHARED_LIBS=OFF -DGUNROCK_BUILD_TESTS=OFF + BUILD_BYPRODUCTS ${CUGUNROCK_DIR}/lib/libgunrock.a ) add_library(gunrock STATIC IMPORTED) From f15fe73ca6434c4a78239c5e381ca143de6e2759 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Fri, 3 Apr 2020 15:53:49 -0700 Subject: [PATCH 22/40] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52210d977fd..a381273da96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - PR #782 Use Cython's `new_build_ext` (if available) - PR #788 Added options and config file to enable codecov - PR #793 Fix legacy cudf imports/cimports +- PR #803 Enable Ninja build ## Bug Fixes - PR #763 Update RAPIDS conda dependencies to v0.14 From f103ca6a125ca970143a19ffeb85339dcb38b336 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Fri, 3 Apr 2020 17:29:59 -0700 Subject: [PATCH 23/40] set ext_modules=extensions so the cmdclass sets nthreads --- python/setup.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/setup.py b/python/setup.py index fcf6b2fe3b3..828976870fa 100644 --- a/python/setup.py +++ b/python/setup.py @@ -79,6 +79,11 @@ extra_compile_args=['-std=c++14']) ] +for e in EXTENSIONS: + e.cython_directives = dict( + profile=False, language_level=3, embedsignature=True + ) + setup(name='cugraph', description="cuGraph - GPU Graph Analytics", version=versioneer.get_version(), @@ -93,7 +98,7 @@ # Include the separately-compiled shared library author="NVIDIA Corporation", setup_requires=['cython'], - ext_modules=cythonize(EXTENSIONS), + ext_modules=EXTENSIONS, packages=find_packages(include=['cugraph', 'cugraph.*']), install_requires=INSTALL_REQUIRES, license="Apache", From 6c1ffb22eb9aa841f73082170044cbdcb7e4faaf Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Fri, 3 Apr 2020 17:31:42 -0700 Subject: [PATCH 24/40] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52210d977fd..28706b1bec8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - PR #782 Use Cython's `new_build_ext` (if available) - PR #788 Added options and config file to enable codecov - PR #793 Fix legacy cudf imports/cimports +- PR #804 Cythonize in parallel ## Bug Fixes - PR #763 Update RAPIDS conda dependencies to v0.14 From 44e15d45ebab0293d58356bbad8ba33fe7f4454d Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Fri, 3 Apr 2020 17:34:33 -0700 Subject: [PATCH 25/40] remove unused import --- python/setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/setup.py b/python/setup.py index 828976870fa..98226867897 100644 --- a/python/setup.py +++ b/python/setup.py @@ -17,7 +17,6 @@ from setuptools import setup, find_packages from setuptools.extension import Extension -from Cython.Build import cythonize try: from Cython.Distutils.build_ext import new_build_ext as build_ext From 141e66f9e5324f7a6f22025b2e390299f60294e1 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 6 Apr 2020 14:00:34 -0400 Subject: [PATCH 26/40] Updating docs --- .../centrality/betweenness_centrality.py | 18 +++--- python/cugraph/centrality/katz_centrality.py | 11 ++-- python/cugraph/community/ecg.py | 23 ++++--- python/cugraph/community/louvain.py | 24 ++++--- .../cugraph/community/spectral_clustering.py | 63 ++++++++++--------- .../cugraph/community/subgraph_extraction.py | 12 ++-- python/cugraph/community/triangle_count.py | 12 ++-- python/cugraph/components/connectivity.py | 26 ++++---- 8 files changed, 100 insertions(+), 89 deletions(-) diff --git a/python/cugraph/centrality/betweenness_centrality.py b/python/cugraph/centrality/betweenness_centrality.py index 617c52ad07d..ce15dc7db8b 100644 --- a/python/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/centrality/betweenness_centrality.py @@ -17,7 +17,7 @@ def betweenness_centrality(G, k=None, normalized=True, weight=None, endpoints=False, seed=None): """ - Compute betweenness centrality for the nodes of the graph G. cuGraph + Compute the betweenness centrality for all nodes of the graph G. cuGraph does not currently support the 'endpoints' and 'weight' parameters as seen in the corresponding networkX call. @@ -25,14 +25,16 @@ def betweenness_centrality(G, k=None, normalized=True, ---------- G : cuGraph.Graph cuGraph graph descriptor with connectivity information. The graph can - contain either directed or undirected edges where undirected edges are - represented as directed edges in both directions. + be either directed (DiGraph) or undirected (Graph) k : int, optional + Default is None. If k is not None, use k node samples to estimate betweenness. Higher values give better approximation normalized : bool, optional - Value defaults to true. If true, the betweenness values are normalized - by 2/((n-1)(n-2)) for graphs, and 1 / ((n-1)(n-2)) for directed graphs + Default is True. + If true, the betweenness values are normalized by + 2/((n-1)(n-2)) for Graphs (undirected), and + 1 / ((n-1)(n-2)) for DiGraphs (directed graphs) where n is the number of nodes in G. weight : cudf.Series Specifies the weights to be used for each vertex. @@ -57,15 +59,13 @@ def betweenness_centrality(G, k=None, normalized=True, -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> bc = cugraph.betweenness_centrality(G) """ # - # Some features not implemented for gunrock implementation, failing fast, + # Some features not implemented in gunrock implementation, failing fast, # but passing parameters through # # vertices is intended to be a cuDF series that contains a sampling of diff --git a/python/cugraph/centrality/katz_centrality.py b/python/cugraph/centrality/katz_centrality.py index 58be7e1dce8..a34130cca63 100644 --- a/python/cugraph/centrality/katz_centrality.py +++ b/python/cugraph/centrality/katz_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -35,8 +35,7 @@ def katz_centrality(G, ---------- G : cuGraph.Graph cuGraph graph descriptor with connectivity information. The graph can - contain either directed or undirected edges where undirected edges are - represented as directed edges in both directions. + contain either directed (DiGraph) or undirected edges (Graph). alpha : float Attenuation factor with a default value of 0.1. If alpha is not less than 1/(lambda_max) where lambda_max is the maximum degree @@ -79,12 +78,10 @@ def katz_centrality(G, Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> kc = cugraph.katz_centrality(G) """ diff --git a/python/cugraph/community/ecg.py b/python/cugraph/community/ecg.py index 802d97db222..a678cd0a05d 100644 --- a/python/cugraph/community/ecg.py +++ b/python/cugraph/community/ecg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,23 +16,24 @@ def ecg(input_graph, min_weight=.05, ensemble_size=16): """ - Compute the ensemble clustering for graphs (ECG) partition of the input - graph ECG runs truncated Louvain on an ensemble of permutations of the + Compute the Ensemble Clustering for Graphs (ECG) partition of the input + graph. ECG runs truncated Louvain on an ensemble of permutations of the input graph, then uses the ensemble partitions to determine weights for - the input graph.The final result is found by running full Louvain on + the input graph. The final result is found by running full Louvain on the input graph using the determined weights. + See https://arxiv.org/abs/1809.05578 for further information. Parameters ---------- input_graph : cugraph.Graph cuGraph graph descriptor, should contain the connectivity information - and weights.The adjacency list will be computed if not already + and weights. The adjacency list will be computed if not already present. min_weight : floating point The minimum value to assign as an edgeweight in the ECG algorithm. - It should be a value in the range (0,1] usually left as the default + It should be a value in the range [0,1] usually left as the default value of .05 ensemble_size : integer @@ -46,17 +47,19 @@ def ecg(input_graph, min_weight=.05, ensemble_size=16): GPU data frame of size V containing two columns, the vertex id and the partition id it is assigned to. + df['vertex'] : cudf.Series + Contains the vertex identifiers + df['partition'] : cudf.Series + Contains the partition assigned to the vertices + Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter = ' ', dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) - >>> values = cudf.Series(M['2']) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, values) + >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2') >>> parts = cugraph.ecg(G) """ diff --git a/python/cugraph/community/louvain.py b/python/cugraph/community/louvain.py index 882bce075da..0fb78b45f02 100644 --- a/python/cugraph/community/louvain.py +++ b/python/cugraph/community/louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -23,8 +23,8 @@ def louvain(input_graph, max_iter=100): Parameters ---------- input_graph : cugraph.Graph - cuGraph graph descriptor, should contain the connectivity information - as an edge list. + cuGraph graph descriptor of type Graph + The adjacency list will be computed if not already present. The graph should be undirected where an undirected edge is represented by a directed edge in both direction. @@ -40,18 +40,24 @@ def louvain(input_graph, max_iter=100): parts : cudf.DataFrame GPU data frame of size V containing two columns the vertex id and the partition id it is assigned to. + + df['vertex'] : cudf.Series + Contains the vertex identifiers + df['partition'] : cudf.Series + Contains the partition assigned to the vertices + modularity_score : float - a floating point number containing the modularity score of the + a floating point number containing the global modularity score of the partitioning. Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> parts, modularity_score = cugraph.louvain(G) """ diff --git a/python/cugraph/community/spectral_clustering.py b/python/cugraph/community/spectral_clustering.py index bdb0fc99bbf..c2137d280b5 100644 --- a/python/cugraph/community/spectral_clustering.py +++ b/python/cugraph/community/spectral_clustering.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -36,12 +36,16 @@ def spectralBalancedCutClustering(G, num_clusters. evs_tolerance: float Specifies the tolerance to use in the eigensolver + Default is 0.00001 evs_max_iter: integer Specifies the maximum number of iterations for the eigensolver + Default is 100 kmean_tolerance: float Specifies the tolerance to use in the k-means solver + Default is 0.00001 kmean_max_iter: integer Specifies the maximum number of iterations for the k-means solver + Default is 100 Returns ------- @@ -56,12 +60,12 @@ def spectralBalancedCutClustering(G, Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> df = cugraph.spectralBalancedCutClustering(G, 5) """ @@ -99,12 +103,16 @@ def spectralModularityMaximizationClustering(G, num_clusters evs_tolerance: float Specifies the tolerance to use in the eigensolver + Default is 0.00001 evs_max_iter: integer Specifies the maximum number of iterations for the eigensolver + Default is 100 kmean_tolerance: float Specifies the tolerance to use in the k-means solver + Default is 0.00001 kmean_max_iter: integer Specifies the maximum number of iterations for the k-means solver + Default is 100 Returns ------- @@ -116,13 +124,12 @@ def spectralModularityMaximizationClustering(G, Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) - >>> values = cudf.Series(M['2']) + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, values) + >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2') >>> df = cugraph.spectralModularityMaximizationClustering(G, 5) """ @@ -158,13 +165,12 @@ def analyzeClustering_modularity(G, n_clusters, clustering): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) - >>> values = cudf.Series(M['2']) + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, values) + >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2') >>> df = cugraph.spectralBalancedCutClustering(G, 5) >>> score = cugraph.analyzeClustering_modularity(G, 5, df['cluster']) """ @@ -197,12 +203,12 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None) >>> df = cugraph.spectralBalancedCutClustering(G, 5) >>> score = cugraph.analyzeClustering_edge_cut(G, 5, df['cluster']) """ @@ -235,13 +241,12 @@ def analyzeClustering_ratio_cut(G, n_clusters, clustering): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) - >>> values = cudf.Series(M['2']) + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, values) + >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2') >>> df = cugraph.spectralBalancedCutClustering(G, 5) >>> score = cugraph.analyzeClustering_ratio_cut(G, 5, df['cluster']) """ diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index 63edf72de0c..17e6f1753ce 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -36,12 +36,12 @@ def subgraph(G, vertices): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None) >>> verts = numpy.zeros(3, dtype=numpy.int32) >>> verts[0] = 0 >>> verts[1] = 1 diff --git a/python/cugraph/community/triangle_count.py b/python/cugraph/community/triangle_count.py index 2e2fe2b5cb4..407a2f1f2a5 100644 --- a/python/cugraph/community/triangle_count.py +++ b/python/cugraph/community/triangle_count.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -34,12 +34,12 @@ def triangles(G): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> count = cugraph.triangles(G) """ diff --git a/python/cugraph/components/connectivity.py b/python/cugraph/components/connectivity.py index bb479881a14..290976713c2 100644 --- a/python/cugraph/components/connectivity.py +++ b/python/cugraph/components/connectivity.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ def weakly_connected_components(G): """ - Generate the weakly connected components and attach a component label to + Generate the Weakly Connected Components and attach a component label to each vertex. Parameters @@ -37,12 +37,12 @@ def weakly_connected_components(G): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None) >>> df = cugraph.weakly_connected_components(G) """ @@ -53,7 +53,7 @@ def weakly_connected_components(G): def strongly_connected_components(G): """ - Generate the stronlgly connected components and attach a component label to + Generate the Stronlgly Connected Components and attach a component label to each vertex. Parameters @@ -74,12 +74,12 @@ def strongly_connected_components(G): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) + >>> M = cudf.read_csv('datasets/karate.csv', + delimiter = ' ', + dtype=['int32', 'int32', 'float32'], + header=None) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources,destinations,None) + >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None) >>> df = cugraph.strongly_connected_components(G) """ From c0fc4274a89b0cad36ae20c8ea41ea18ddbe4c48 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 6 Apr 2020 14:03:10 -0400 Subject: [PATCH 27/40] Updated Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b9797fc7ac8..8705e84204c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - PR #788 Added options and config file to enable codecov - PR #793 Fix legacy cudf imports/cimports - PR #803 Enable Ninja build +- PR #807 Updating the Python docs ## Bug Fixes - PR #763 Update RAPIDS conda dependencies to v0.14 From 9f165de16fa117b285b801f04daa02ec7ea5c816 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 6 Apr 2020 14:42:38 -0400 Subject: [PATCH 28/40] Updated docs --- python/cugraph/cores/core_number.py | 8 +++----- python/cugraph/cores/k_core.py | 8 +++----- python/cugraph/cores/ktruss_subgraph.py | 10 +++++----- python/cugraph/link_analysis/pagerank.py | 17 +++++++++++------ python/cugraph/link_prediction/jaccard.py | 14 +++++--------- python/cugraph/link_prediction/overlap.py | 6 ++---- 6 files changed, 29 insertions(+), 34 deletions(-) diff --git a/python/cugraph/cores/core_number.py b/python/cugraph/cores/core_number.py index f6ae00296d5..7f14540f162 100644 --- a/python/cugraph/cores/core_number.py +++ b/python/cugraph/cores/core_number.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -43,12 +43,10 @@ def core_number(G): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> cn = cugraph.core_number(G) """ diff --git a/python/cugraph/cores/k_core.py b/python/cugraph/cores/k_core.py index 3353a7569fd..6927224cdc0 100644 --- a/python/cugraph/cores/k_core.py +++ b/python/cugraph/cores/k_core.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -52,12 +52,10 @@ def k_core(G, Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> KCoreGraph = cugraph.k_core(G) """ diff --git a/python/cugraph/cores/ktruss_subgraph.py b/python/cugraph/cores/ktruss_subgraph.py index 60397bbe6a9..18c626658aa 100644 --- a/python/cugraph/cores/ktruss_subgraph.py +++ b/python/cugraph/cores/ktruss_subgraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,10 +17,10 @@ def ktruss_subgraph(G, k, use_weights=True): """ - Returns the subgraph of the k-truss of a graph for a specific k. + Returns the K-Truss subgraph of a graph for a specific k. - The k-truss of a graph is subgraph where each edge is part of at least - (k−2) triangles. k-trusses are used for finding tighlty knit groups of + The k-truss of a graph is a subgraph where each edge is part of at least + (k−2) triangles. K-trusses are used for finding tighlty knit groups of vertices in a graph. A k-truss is a relaxation of a k-clique in the graph and was define in [1]. Finding cliques is computationally demanding and finding the maximal k-clique is known to be NP-Hard. @@ -73,7 +73,7 @@ def ktruss_subgraph(G, k, use_weights=True): >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() - >>> G.from_cudf_edge_list(M, source='0', destination='1') + >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> k_subgraph = cugraph.ktruss_subgraph(G, 3) """ diff --git a/python/cugraph/link_analysis/pagerank.py b/python/cugraph/link_analysis/pagerank.py index f07a113fe53..11f1452304e 100644 --- a/python/cugraph/link_analysis/pagerank.py +++ b/python/cugraph/link_analysis/pagerank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -22,7 +22,7 @@ def pagerank(G, tol=1.0e-5, nstart=None): """ - Find the PageRank vertex values for a graph. cuGraph computes an + Find the PageRank score for every vertex in a graph. cuGraph computes an approximation of the Pagerank eigenvector using the power method. The number of iterations depends on the properties of the network itself; it increases when the tolerance descreases and/or alpha increases toward the @@ -47,6 +47,7 @@ def pagerank(G, Subset of vertices of graph for personalization personalization['values'] : cudf.Series Personalization values for vertices + max_iter : int The maximum number of iterations before an answer is returned. This can be used to limit the execution time and do an early exit before the @@ -75,14 +76,18 @@ def pagerank(G, GPU data frame containing two cudf.Series of size V: the vertex identifiers and the corresponding PageRank values. + df['vertex'] : cudf.Series + Contains the vertex identifiers + df['pagerank'] : cudf.Series + Contains the PageRank score + + Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500, tol = 1.0e-05) """ diff --git a/python/cugraph/link_prediction/jaccard.py b/python/cugraph/link_prediction/jaccard.py index 9b6a9371aba..9621cc3335a 100644 --- a/python/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/link_prediction/jaccard.py @@ -51,13 +51,11 @@ def jaccard(input_graph, vertex_pair=None): you can get the interesting (non-zero) values that are part of the networkx solution by doing the following: - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) - >>> pairs = G.get_two_hop_neighbors() + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') + >>> pairs = cugraph.get_two_hop_neighbors(G) >>> df = cugraph.jaccard(G, pairs) But please remember that cugraph will fill the dataframe with the entire @@ -99,12 +97,10 @@ def jaccard(input_graph, vertex_pair=None): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> df = cugraph.jaccard(G) """ if type(input_graph) is not Graph: diff --git a/python/cugraph/link_prediction/overlap.py b/python/cugraph/link_prediction/overlap.py index b6c6559ecec..b23c49f256e 100644 --- a/python/cugraph/link_prediction/overlap.py +++ b/python/cugraph/link_prediction/overlap.py @@ -58,12 +58,10 @@ def overlap(input_graph, vertex_pair=None): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> df = cugraph.overlap(G) """ From 230ff57f15249dbbad30be2ad6f038e73aec80df Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 6 Apr 2020 14:48:13 -0400 Subject: [PATCH 29/40] updating examples --- python/cugraph/link_prediction/wjaccard.py | 2 +- python/cugraph/link_prediction/woverlap.py | 2 +- python/cugraph/traversal/bfs.py | 6 ++---- python/cugraph/traversal/sssp.py | 6 ++---- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/python/cugraph/link_prediction/wjaccard.py b/python/cugraph/link_prediction/wjaccard.py index f5e16391ea1..e37d20ceb4a 100644 --- a/python/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/link_prediction/wjaccard.py @@ -69,7 +69,7 @@ def jaccard_w(input_graph, weights, vertex_pair=None): >>> weights = cudf.Series(numpy.ones( >>> max(sources.max(),destinations.max())+1, dtype=numpy.float32)) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> df = cugraph.jaccard_w(G, weights) """ if type(input_graph) is not Graph: diff --git a/python/cugraph/link_prediction/woverlap.py b/python/cugraph/link_prediction/woverlap.py index e38c2e7a29c..252f682bd31 100644 --- a/python/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/link_prediction/woverlap.py @@ -68,7 +68,7 @@ def overlap_w(input_graph, weights, vertex_pair=None): >>> weights = cudf.Series(numpy.ones( >>> max(sources.max(),destinations.max())+1, dtype=numpy.float32)) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> df = cugraph.overlap_w(G, weights) """ diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py index 44bd99b77cc..8e0cbc021f2 100644 --- a/python/cugraph/traversal/bfs.py +++ b/python/cugraph/traversal/bfs.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -43,10 +43,8 @@ def bfs(G, start): -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> df = cugraph.bfs(G, 0) """ diff --git a/python/cugraph/traversal/sssp.py b/python/cugraph/traversal/sssp.py index 81d26e5e31b..861fb265ef3 100644 --- a/python/cugraph/traversal/sssp.py +++ b/python/cugraph/traversal/sssp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019 - 2020, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -47,10 +47,8 @@ def sssp(G, source): -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) >>> G = cugraph.Graph() - >>> G.add_edge_list(sources, destinations, None) + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> distances = cugraph.sssp(G, 0) """ From ee4fbbc6a2115022eb9602926ea53e59f0c49f9e Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 6 Apr 2020 14:53:30 -0400 Subject: [PATCH 30/40] style fixes --- python/cugraph/centrality/betweenness_centrality.py | 10 +++++----- python/cugraph/community/ecg.py | 2 +- python/cugraph/community/louvain.py | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/python/cugraph/centrality/betweenness_centrality.py b/python/cugraph/centrality/betweenness_centrality.py index ce15dc7db8b..bd19d76a8b0 100644 --- a/python/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/centrality/betweenness_centrality.py @@ -25,16 +25,16 @@ def betweenness_centrality(G, k=None, normalized=True, ---------- G : cuGraph.Graph cuGraph graph descriptor with connectivity information. The graph can - be either directed (DiGraph) or undirected (Graph) + be either directed (DiGraph) or undirected (Graph) k : int, optional - Default is None. + Default is None. If k is not None, use k node samples to estimate betweenness. Higher values give better approximation normalized : bool, optional - Default is True. - If true, the betweenness values are normalized by + Default is True. + If true, the betweenness values are normalized by 2/((n-1)(n-2)) for Graphs (undirected), and - 1 / ((n-1)(n-2)) for DiGraphs (directed graphs) + 1 / ((n-1)(n-2)) for DiGraphs (directed graphs) where n is the number of nodes in G. weight : cudf.Series Specifies the weights to be used for each vertex. diff --git a/python/cugraph/community/ecg.py b/python/cugraph/community/ecg.py index a678cd0a05d..221e58bf31b 100644 --- a/python/cugraph/community/ecg.py +++ b/python/cugraph/community/ecg.py @@ -50,7 +50,7 @@ def ecg(input_graph, min_weight=.05, ensemble_size=16): df['vertex'] : cudf.Series Contains the vertex identifiers df['partition'] : cudf.Series - Contains the partition assigned to the vertices + Contains the partition assigned to the vertices Examples -------- diff --git a/python/cugraph/community/louvain.py b/python/cugraph/community/louvain.py index 0fb78b45f02..c00c6e4a3cb 100644 --- a/python/cugraph/community/louvain.py +++ b/python/cugraph/community/louvain.py @@ -23,8 +23,8 @@ def louvain(input_graph, max_iter=100): Parameters ---------- input_graph : cugraph.Graph - cuGraph graph descriptor of type Graph - + cuGraph graph descriptor of type Graph + The adjacency list will be computed if not already present. The graph should be undirected where an undirected edge is represented by a directed edge in both direction. @@ -44,7 +44,7 @@ def louvain(input_graph, max_iter=100): df['vertex'] : cudf.Series Contains the vertex identifiers df['partition'] : cudf.Series - Contains the partition assigned to the vertices + Contains the partition assigned to the vertices modularity_score : float a floating point number containing the global modularity score of the From 36deae60fb9bc3d6bb9e7a2bf071f805ba4af9e6 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Tue, 7 Apr 2020 16:19:48 -0400 Subject: [PATCH 31/40] updated to address review comment --- python/cugraph/community/subgraph_extraction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index 17e6f1753ce..fc9306c2f89 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -41,7 +41,7 @@ def subgraph(G, vertices): dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None) + >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> verts = numpy.zeros(3, dtype=numpy.int32) >>> verts[0] = 0 >>> verts[1] = 1 From 9dac738fa2eaf4ca738f34d29deb92fb15cb68fa Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Tue, 7 Apr 2020 16:24:58 -0400 Subject: [PATCH 32/40] removed unneeded comments --- python/cugraph/link_prediction/wjaccard.py | 8 ++------ python/cugraph/link_prediction/woverlap.py | 8 ++------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/python/cugraph/link_prediction/wjaccard.py b/python/cugraph/link_prediction/wjaccard.py index e37d20ceb4a..917ff7ea517 100644 --- a/python/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/link_prediction/wjaccard.py @@ -64,13 +64,9 @@ def jaccard_w(input_graph, weights, vertex_pair=None): -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) - >>> weights = cudf.Series(numpy.ones( - >>> max(sources.max(),destinations.max())+1, dtype=numpy.float32)) >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(gdf, source='0', destination='1') - >>> df = cugraph.jaccard_w(G, weights) + >>> G.from_cudf_edgelist(M, source='0', destination='1') + >>> df = cugraph.jaccard_w(G, M[2]) """ if type(input_graph) is not Graph: raise Exception("input graph must be undirected") diff --git a/python/cugraph/link_prediction/woverlap.py b/python/cugraph/link_prediction/woverlap.py index 252f682bd31..a176bc3b8d2 100644 --- a/python/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/link_prediction/woverlap.py @@ -63,13 +63,9 @@ def overlap_w(input_graph, weights, vertex_pair=None): -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sources = cudf.Series(M['0']) - >>> destinations = cudf.Series(M['1']) - >>> weights = cudf.Series(numpy.ones( - >>> max(sources.max(),destinations.max())+1, dtype=numpy.float32)) >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(gdf, source='0', destination='1') - >>> df = cugraph.overlap_w(G, weights) + >>> G.from_cudf_edgelist(M, source='0', destination='1') + >>> df = cugraph.overlap_w(G, M[2]) """ if (type(vertex_pair) == cudf.DataFrame): From 2c03164b7a344f9a4e364ca00e58cc9320c36481 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Tue, 7 Apr 2020 16:27:24 -0400 Subject: [PATCH 33/40] fixed typo --- python/cugraph/traversal/sssp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/traversal/sssp.py b/python/cugraph/traversal/sssp.py index 861fb265ef3..de27449b47f 100644 --- a/python/cugraph/traversal/sssp.py +++ b/python/cugraph/traversal/sssp.py @@ -48,7 +48,7 @@ def sssp(G, source): >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(gdf, source='0', destination='1') + >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> distances = cugraph.sssp(G, 0) """ From 3243a068ce1d0b5747d09dadbbfa9828d0e64e70 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Tue, 7 Apr 2020 16:27:41 -0400 Subject: [PATCH 34/40] fixed typo --- python/cugraph/traversal/bfs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py index 8e0cbc021f2..8b88318d0b4 100644 --- a/python/cugraph/traversal/bfs.py +++ b/python/cugraph/traversal/bfs.py @@ -44,7 +44,7 @@ def bfs(G, start): >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(gdf, source='0', destination='1') + >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> df = cugraph.bfs(G, 0) """ From 35aba60c87e387ab2f29300af5a9396a49065dc5 Mon Sep 17 00:00:00 2001 From: afender Date: Wed, 8 Apr 2020 14:34:44 -0500 Subject: [PATCH 35/40] adding MPI to cmake --- cpp/CMakeLists.txt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5751b902246..ef3fb3869f8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -104,6 +104,13 @@ set(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags") option(BUILD_TESTS "Configure CMake to build tests" ON) +option(BUILD_MPI "Build with MPI" OFF) +if (BUILD_MPI) + find_package(MPI REQUIRED) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MPI_C_COMPILE_FLAGS}") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MPI_CXX_COMPILE_FLAGS}") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MPI_CXX_LINK_FLAGS}") +endif(BUILD_MPI) ################################################################################################### # - cmake modules --------------------------------------------------------------------------------- @@ -399,6 +406,7 @@ target_include_directories(cugraph "${CUHORNET_INCLUDE_DIR}/primitives" "${CMAKE_CURRENT_SOURCE_DIR}/src" "${CUGUNROCK_DIR}/include" + "${MPI_CXX_INCLUDE_PATH}" PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" ) @@ -407,7 +415,7 @@ target_include_directories(cugraph # - link libraries -------------------------------------------------------------------------------- target_link_libraries(cugraph PRIVATE - ${CUDF_LIBRARY} ${RMM_LIBRARY} gunrock ${NVSTRINGS_LIBRARY} cublas cusparse curand cusolver cudart cuda ${LIBCYPHERPARSER_LIBRARY}) + ${CUDF_LIBRARY} ${RMM_LIBRARY} gunrock ${NVSTRINGS_LIBRARY} cublas cusparse curand cusolver cudart cuda ${LIBCYPHERPARSER_LIBRARY} ${MPI_CXX_LIBRARIES}) if(OpenMP_CXX_FOUND) target_link_libraries(cugraph PRIVATE ################################################################################################### From 1f02fc250a1c7789b5217464d3570496ce6a2371 Mon Sep 17 00:00:00 2001 From: afender Date: Wed, 8 Apr 2020 14:50:15 -0500 Subject: [PATCH 36/40] adding nccl --- ci/gpu/build.sh | 1 + conda/environments/cugraph_dev_cuda10.0.yml | 1 + conda/environments/cugraph_dev_cuda10.1.yml | 1 + conda/environments/cugraph_dev_cuda10.2.yml | 1 + conda/recipes/cugraph/meta.yaml | 1 + conda/recipes/libcugraph/meta.yaml | 2 + cpp/CMakeLists.txt | 12 +- cpp/cmake/FindNCCL.cmake | 116 ++++++++++++++++++++ python/setup.py | 2 +- 9 files changed, 135 insertions(+), 2 deletions(-) create mode 100644 cpp/cmake/FindNCCL.cmake diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index c97aee330b4..1a34ca7c4c7 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -66,6 +66,7 @@ conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaul distributed>=2.12.0 \ dask-cudf=${MINOR_VERSION} \ dask-cuda=${MINOR_VERSION} \ + nccl>=2.5 \ libcypher-parser \ ipython=7.3* \ jupyterlab diff --git a/conda/environments/cugraph_dev_cuda10.0.yml b/conda/environments/cugraph_dev_cuda10.0.yml index 0b57ce2294a..7eee4bb9a87 100644 --- a/conda/environments/cugraph_dev_cuda10.0.yml +++ b/conda/environments/cugraph_dev_cuda10.0.yml @@ -13,6 +13,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=0.14* - dask-cudf=0.14* +- nccl>=2.5 - scipy - networkx - python-louvain diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 4b0f2159d7a..5f80a222d39 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -12,6 +12,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=0.14* - dask-cudf=0.14* +- nccl>=2.5 - scipy - networkx - python-louvain diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 4a1b5b35251..929eb1b4f84 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -12,6 +12,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=0.14* - dask-cudf=0.14* +- nccl>=2.5 - scipy - networkx - python-louvain diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 946e4ab8c2f..b45f9d71fe8 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -32,6 +32,7 @@ requirements: - python x.x - libcugraph={{ version }} - cudf={{ minor_version }} + - nccl>=2.5 #test: # commands: diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 56868229c37..f6dfe5930fd 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -30,9 +30,11 @@ requirements: - cudatoolkit {{ cuda_version }}.* - boost-cpp>=1.66 - libcypher-parser + - nccl>=2.5 run: - libcudf={{ minor_version }} - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} + - nccl>=2.5 #test: # commands: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ef3fb3869f8..701bd6cf590 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -308,6 +308,15 @@ add_dependencies(gunrock cugunrock) set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${CUGUNROCK_DIR}/lib/libgunrock.a) +# - NCCL +if(NOT NCCL_PATH) + find_package(NCCL REQUIRED) +else() + message("-- Manually set NCCL PATH to ${NCCL_PATH}") + set(NCCL_INCLUDE_DIRS ${NCCL_PATH}/include) + set(NCCL_LIBRARIES ${NCCL_PATH}/lib/libnccl.so) +endif(NOT NCCL_PATH) + ################################################################################################### # - library targets ------------------------------------------------------------------------------- @@ -406,6 +415,7 @@ target_include_directories(cugraph "${CUHORNET_INCLUDE_DIR}/primitives" "${CMAKE_CURRENT_SOURCE_DIR}/src" "${CUGUNROCK_DIR}/include" + "${NCCL_INCLUDE_DIRS}" "${MPI_CXX_INCLUDE_PATH}" PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include" @@ -415,7 +425,7 @@ target_include_directories(cugraph # - link libraries -------------------------------------------------------------------------------- target_link_libraries(cugraph PRIVATE - ${CUDF_LIBRARY} ${RMM_LIBRARY} gunrock ${NVSTRINGS_LIBRARY} cublas cusparse curand cusolver cudart cuda ${LIBCYPHERPARSER_LIBRARY} ${MPI_CXX_LIBRARIES}) + ${CUDF_LIBRARY} ${RMM_LIBRARY} gunrock ${NVSTRINGS_LIBRARY} cublas cusparse curand cusolver cudart cuda ${LIBCYPHERPARSER_LIBRARY} ${MPI_CXX_LIBRARIES} ${NCCL_LIBRARIES}) if(OpenMP_CXX_FOUND) target_link_libraries(cugraph PRIVATE ################################################################################################### diff --git a/cpp/cmake/FindNCCL.cmake b/cpp/cmake/FindNCCL.cmake new file mode 100644 index 00000000000..16ca4458a7f --- /dev/null +++ b/cpp/cmake/FindNCCL.cmake @@ -0,0 +1,116 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Based on FindPNG.cmake from cmake 3.14.3 + +#[=======================================================================[.rst: +FindNCCL +-------- + +Find libnccl, the NVIDIA Collective Communication Library. A hint to find NCCL +can be provided by setting NCCL_INSTALL_DIR. + +Imported targets +^^^^^^^^^^^^^^^^ + +This module defines the following :prop_tgt:`IMPORTED` target: + +``NCCL::NCCL`` + The libnccl library, if found. + +Result variables +^^^^^^^^^^^^^^^^ + +This module will set the following variables in your project: + +``NCCL_INCLUDE_DIRS`` + where to find nccl.h , etc. +``NCCL_LIBRARIES`` + the libraries to link against to use NCCL. +``NCCL_FOUND`` + If false, do not try to use NCCL. +``NCCL_VERSION_STRING`` + the version of the NCCL library found + +#]=======================================================================] + +find_path(NCCL_NCCL_INCLUDE_DIR nccl.h HINTS ${NCCL_INSTALL_DIR} PATH_SUFFIXES include) + +#TODO: Does this need to support finding the static library? + +list(APPEND NCCL_NAMES nccl libnccl) +set(_NCCL_VERSION_SUFFIXES 2) + +foreach(v IN LISTS _NCCL_VERSION_SUFFIXES) + list(APPEND NCCL_NAMES nccl${v} libnccl${v}) +endforeach() +unset(_NCCL_VERSION_SUFFIXES) +# For compatibility with versions prior to this multi-config search, honor +# any NCCL_LIBRARY that is already specified and skip the search. +if(NOT NCCL_LIBRARY) + find_library(NCCL_LIBRARY_RELEASE NAMES ${NCCL_NAMES} HINTS ${NCCL_INSTALL_DIR} PATH_SUFFIXES lib) + include(${CMAKE_ROOT}/Modules/SelectLibraryConfigurations.cmake) + select_library_configurations(NCCL) + mark_as_advanced(NCCL_LIBRARY_RELEASE) +endif() +unset(NCCL_NAMES) + +# Set by select_library_configurations(), but we want the one from +# find_package_handle_standard_args() below. +unset(NCCL_FOUND) + +if (NCCL_LIBRARY AND NCCL_NCCL_INCLUDE_DIR) + set(NCCL_INCLUDE_DIRS ${NCCL_NCCL_INCLUDE_DIR} ) + set(NCCL_LIBRARY ${NCCL_LIBRARY}) + + if(NOT TARGET NCCL::NCCL) + add_library(NCCL::NCCL UNKNOWN IMPORTED) + set_target_properties(NCCL::NCCL PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${NCCL_INCLUDE_DIRS}") + if(EXISTS "${NCCL_LIBRARY}") + set_target_properties(NCCL::NCCL PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${NCCL_LIBRARY}") + endif() + endif() +endif () + +if (NCCL_NCCL_INCLUDE_DIR AND EXISTS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h") + file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_major_version_str REGEX "^#define[ \t]+NCCL_MAJOR[ \t]+[0-9]+") + string(REGEX REPLACE "^#define[ \t]+NCCL_MAJOR[ \t]+([0-9]+)" "\\1" nccl_major_version_str "${nccl_major_version_str}") + + file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_minor_version_str REGEX "^#define[ \t]+NCCL_MINOR[ \t]+[0-9]+") + string(REGEX REPLACE "^#define[ \t]+NCCL_MINOR[ \t]+([0-9]+)" "\\1" nccl_minor_version_str "${nccl_minor_version_str}") + + file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_patch_version_str REGEX "^#define[ \t]+NCCL_PATCH[ \t]+[0-9]+") + string(REGEX REPLACE "^#define[ \t]+NCCL_PATCH[ \t]+([0-9]+)" "\\1" nccl_patch_version_str "${nccl_patch_version_str}") + + file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_suffix_version_str REGEX "^#define[ \t]+NCCL_SUFFIX[ \t]+\".*\"") + string(REGEX REPLACE "^#define[ \t]+NCCL_SUFFIX[ \t]+\"(.*)\"" "\\1" nccl_suffix_version_str "${nccl_suffix_version_str}") + + set(NCCL_VERSION_STRING "${nccl_major_version_str}.${nccl_minor_version_str}.${nccl_patch_version_str}${nccl_suffix_version_str}") + + unset(nccl_major_version_str) + unset(nccl_minor_version_str) + unset(nccl_patch_version_str) + unset(nccl_suffix_version_str) +endif () + +include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) +find_package_handle_standard_args(NCCL + REQUIRED_VARS NCCL_LIBRARY NCCL_NCCL_INCLUDE_DIR + VERSION_VAR NCCL_VERSION_STRING) + +mark_as_advanced(NCCL_NCCL_INCLUDE_DIR NCCL_LIBRARY) diff --git a/python/setup.py b/python/setup.py index 0dd98203349..b6667bc52af 100644 --- a/python/setup.py +++ b/python/setup.py @@ -65,7 +65,7 @@ cuda_include_dir], library_dirs=[get_python_lib()], runtime_library_dirs=[conda_lib_dir], - libraries=['cugraph', 'cudf'], + libraries=['cugraph', 'cudf', 'nccl'], language='c++', extra_compile_args=['-std=c++14']) ] From e4e71a9c83af7a27fbaa46b7c15f0717741098cc Mon Sep 17 00:00:00 2001 From: afender Date: Wed, 8 Apr 2020 16:57:20 -0500 Subject: [PATCH 37/40] added allgather test --- cpp/CMakeLists.txt | 1 - cpp/cmake/{ => Modules}/FindNCCL.cmake | 0 cpp/tests/CMakeLists.txt | 16 ++++++++++++++++ cpp/tests/test_utils.h | 17 +++++++++++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) rename cpp/cmake/{ => Modules}/FindNCCL.cmake (100%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 701bd6cf590..6b2b4e7ea0c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -399,7 +399,6 @@ add_dependencies(cugraph cugunrock) ################################################################################################### # - include paths --------------------------------------------------------------------------------- - target_include_directories(cugraph PRIVATE "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" diff --git a/cpp/cmake/FindNCCL.cmake b/cpp/cmake/Modules/FindNCCL.cmake similarity index 100% rename from cpp/cmake/FindNCCL.cmake rename to cpp/cmake/Modules/FindNCCL.cmake diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index de62ffcd2ea..8c850924730 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -44,6 +44,12 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE gtest gmock_main gmock cugraph ${CUDF_LIBRARY} ${RMM_LIBRARY} ${CMAKE_EXTRA_LIBS} cudart cuda) + if (BUILD_MPI) + include_directories(include ${MPI_CXX_INCLUDE_PATH} ${NCCL_INCLUDE_DIRS}) + target_link_libraries(${CMAKE_TEST_NAME} PRIVATE ${MPI_C_LIBRARIES} ${NCCL_LIBRARIES} ) + target_compile_options(${CMAKE_TEST_NAME} PUBLIC ${MPI_C_COMPILE_FLAGS}) + endif(BUILD_MPI) + if(OpenMP_CXX_FOUND) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE ################################################################################################### @@ -271,6 +277,16 @@ set(FIND_MATCHES_TEST_SRC ConfigureTest(FIND_MATCHES_TEST "${FIND_MATCHES_TEST_SRC}" "") +################################################################################################### +#-NCCL tests --------------------------------------------------------------------- + +if (BUILD_MPI) + set(NCCL_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/nccl/nccl_test.cu") + + ConfigureTest(NCCL_TEST "${NCCL_TEST_SRC}" "") +endif(BUILD_MPI) + ################################################################################################### ### enable testing ################################################################################ ################################################################################################### diff --git a/cpp/tests/test_utils.h b/cpp/tests/test_utils.h index 163800d64f2..b9f5299f16d 100644 --- a/cpp/tests/test_utils.h +++ b/cpp/tests/test_utils.h @@ -62,6 +62,23 @@ extern "C" { } #endif +#define NCCLCHECK(cmd) do { \ + ncclResult_t nccl_status = cmd; \ + if (nccl_status!= ncclSuccess) { \ + printf("NCCL failure %s:%d '%s'\n", \ + __FILE__,__LINE__,ncclGetErrorString(nccl_status)); \ + } \ + } while(0) + +#define MPICHECK(cmd) do { \ + int e = cmd; \ + if( e != MPI_SUCCESS ) { \ + printf("Failed: MPI error %s:%d '%d'\n", \ + __FILE__,__LINE__, e); \ + exit(EXIT_FAILURE); \ + } \ +} while(0) + std::function gdf_col_deleter = [](gdf_column* col){ if (col) { col->size = 0; From a7e155e4e2c756b49141e52b90e86068c4aba8d4 Mon Sep 17 00:00:00 2001 From: afender Date: Thu, 9 Apr 2020 11:37:04 -0500 Subject: [PATCH 38/40] adding test file --- cpp/tests/nccl/nccl_test.cu | 75 +++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 cpp/tests/nccl/nccl_test.cu diff --git a/cpp/tests/nccl/nccl_test.cu b/cpp/tests/nccl/nccl_test.cu new file mode 100644 index 00000000000..edd2efb0077 --- /dev/null +++ b/cpp/tests/nccl/nccl_test.cu @@ -0,0 +1,75 @@ +#include "gtest/gtest.h" +#include +#include "test_utils.h" +#include +#include +#include +#include +#include + +TEST(allgather, success) +{ + int p = 1, r = 0, dev = 0, dev_count = 0; + MPICHECK(MPI_Comm_size(MPI_COMM_WORLD, &p)); + MPICHECK(MPI_Comm_rank(MPI_COMM_WORLD, &r)); + CUDA_RT_CALL(cudaGetDeviceCount(&dev_count)); + + // shortcut for device ID here + // may need something smarter later + dev = r%dev_count; + // cudaSetDevice must happen before ncclCommInitRank + CUDA_RT_CALL(cudaSetDevice(dev)); + + // print info + printf("# Rank %2d - Pid %6d - device %2d\n", + r, getpid(), dev); + + // NCCL init + ncclUniqueId id; + ncclComm_t comm; + if (r == 0) NCCLCHECK(ncclGetUniqueId(&id)); + MPICHECK(MPI_Bcast((void *)&id, sizeof(id), MPI_BYTE, 0, MPI_COMM_WORLD)); + NCCLCHECK(ncclCommInitRank(&comm, p, id, r)); + MPICHECK(MPI_Barrier(MPI_COMM_WORLD)); + + //allocate device buffers + int size = 3; + float *sendbuff, *recvbuff; + CUDA_RT_CALL(cudaMalloc(&sendbuff, size * sizeof(float))); + CUDA_RT_CALL(cudaMalloc(&recvbuff, size*p * sizeof(float))); + + //init values + thrust::fill(thrust::device_pointer_cast(sendbuff), + thrust::device_pointer_cast(sendbuff + size), (float)r); + thrust::fill(thrust::device_pointer_cast(recvbuff), + thrust::device_pointer_cast(recvbuff + size*p), -1.0f); + + // ncclAllGather + NCCLCHECK(ncclAllGather((const void*)sendbuff, (void*)recvbuff, size, ncclFloat, comm, cudaStreamDefault)); + + // expect each rankid printed size times in ascending order + if (r == 0) { + thrust::device_ptr dev_ptr(recvbuff); + std::cout.precision(15); + thrust::copy(dev_ptr, dev_ptr + size*p, std::ostream_iterator(std::cout, " ")); + std::cout << std::endl; + } + + //free device buffers + CUDA_RT_CALL(cudaFree(sendbuff)); + CUDA_RT_CALL(cudaFree(recvbuff)); + + //finalizing NCCL + NCCLCHECK(ncclCommDestroy(comm)); +} + +int main( int argc, char** argv ) +{ + testing::InitGoogleTest(&argc,argv); + MPI_Init(&argc, &argv); + rmmInitialize(nullptr); + int rc = RUN_ALL_TESTS(); + rmmFinalize(); + MPI_Finalize(); + return rc; +} \ No newline at end of file From 69956718dbf1dd400c5077f39cb4d142c0bd8eec Mon Sep 17 00:00:00 2001 From: afender Date: Thu, 9 Apr 2020 11:38:20 -0500 Subject: [PATCH 39/40] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64f07b7c5a6..521f573a299 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ## Improvements - PR #765 Remove gdf_column from connected components +- PR #820 OPG infra and all-gather smoke test ## Bug Fixes - PR #763 Update RAPIDS conda dependencies to v0.14 From 0dbc162d254a42dc3fe001746e27a18b94d710fa Mon Sep 17 00:00:00 2001 From: afender Date: Thu, 9 Apr 2020 17:32:31 -0500 Subject: [PATCH 40/40] improved nccl/mpi direct macros in test suite --- cpp/tests/test_utils.h | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/cpp/tests/test_utils.h b/cpp/tests/test_utils.h index b9f5299f16d..6ac36d4ab35 100644 --- a/cpp/tests/test_utils.h +++ b/cpp/tests/test_utils.h @@ -62,22 +62,23 @@ extern "C" { } #endif -#define NCCLCHECK(cmd) do { \ +#define NCCLCHECK(cmd) { \ ncclResult_t nccl_status = cmd; \ if (nccl_status!= ncclSuccess) { \ printf("NCCL failure %s:%d '%s'\n", \ __FILE__,__LINE__,ncclGetErrorString(nccl_status)); \ - } \ - } while(0) - -#define MPICHECK(cmd) do { \ - int e = cmd; \ - if( e != MPI_SUCCESS ) { \ - printf("Failed: MPI error %s:%d '%d'\n", \ - __FILE__,__LINE__, e); \ - exit(EXIT_FAILURE); \ - } \ -} while(0) + FAIL(); \ + } \ + } + +#define MPICHECK(cmd) { \ + int e = cmd; \ + if ( e != MPI_SUCCESS ) { \ + printf("Failed: MPI error %s:%d '%d'\n", \ + __FILE__,__LINE__, e); \ + FAIL(); \ + } \ +} std::function gdf_col_deleter = [](gdf_column* col){ if (col) {