From 39d96c3763fea2006ff4371de84d58a7c82f60b2 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Fri, 26 Apr 2019 13:51:19 -0400 Subject: [PATCH 1/2] add some python documentation --- cpp/include/functions.h | 4 +-- python/cugraph/graph/c_graph.pyx | 45 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/cpp/include/functions.h b/cpp/include/functions.h index 3a31948a467..2aae8600e52 100644 --- a/cpp/include/functions.h +++ b/cpp/include/functions.h @@ -18,8 +18,8 @@ /* ----------------------------------------------------------------------------*/ /** - * @Synopsis Renumber source and destination indexes to be a dense numbering - * between 0 and number of vertices minus 1. + * @Synopsis Renumber source and destination indexes to be a dense numbering, + * using contiguous values between 0 and number of vertices minus 1. * * Assumptions: * * source and dest have same size and type diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index 14a2b5355d9..45451a5c705 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -114,6 +114,51 @@ class Graph: free(g) def renumber(self, source_col, dest_col): + """ + Take a (potentially sparse) set of source and destination vertex + ids and renumber the vertices to create a dense set of vertex ids + using all values contiguously from 0 to the number of unique vertices + - 1. + + Input columns can be either int64 or int32. The output will be mapped + to int32, since many of the cugraph functions are limited to int32. + If the number of unique values in source_col and dest_col > 2^31-1 + then this function will return an error. + + Return from this call will be three cudf Series - the renumbered + source_col, the renumbered dest_col and a numbering map that maps the + new ids to the original ids. + + Parameters + ---------- + source_col : cudf.Series + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains the source index for each edge. + Source indices must be an integer type. + dest_col : cudf.Series + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains the destination index for each edge. + Destination indices must be an integer type. + + Examples + -------- + >>> import numpy as np + >>> import pytest + >>> from scipy.io import mmread + >>> + >>> import cudf + >>> import cugraph + >>> + >>> + >>> mm_file = '../datasets/karate.mtx' + >>> M = mmread(mm_file).asfptype() + >>> sources = cudf.Series(M.row) + >>> destinations = cudf.Series(M.col) + >>> + >>> G = cugraph.Graph() + >>> src_r, dst_r, numbering = G.renumber(sources, destinations) + """ + cdef gdf_column src_renumbered cdef gdf_column dst_renumbered cdef gdf_column numbering_map From 933f597ca7a235d08fe09b3fbb263b4f6268c47e Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Fri, 26 Apr 2019 13:53:53 -0400 Subject: [PATCH 2/2] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 882ebb93bed..0d099139f36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ - PR #203 Added small datasets directly in the repo - PR #215 Simplified get_rapids_dataset_root_dir(), set a default value for the root dir - PR #233 Added csv datasets and edited test to use cudf for reading graphs +- PR #247 Added some documentation for renumbering ## Bug Fixes - PR #226 Bump cudf dependencies to 0.7