From 97b92166da8d8f35b66ed91928bd64682e6aab80 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 16 Apr 2019 13:26:44 -0700 Subject: [PATCH 01/18] revmoed trailing spaces --- cpp/src/cugraph.cu | 118 ++++++++++++++++++++++----------------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/cpp/src/cugraph.cu b/cpp/src/cugraph.cu index 867f8981e58..8a22c9e7005 100644 --- a/cpp/src/cugraph.cu +++ b/cpp/src/cugraph.cu @@ -10,7 +10,7 @@ * */ -// Graph analytics features +// Graph analytics features // Author: Alex Fender afender@nvidia.com #include @@ -31,7 +31,7 @@ using Vector = thrust::device_vector>; void gdf_col_delete(gdf_column* col) { if (col) { - col->size = 0; + col->size = 0; if(col->data) { ALLOC_FREE_TRY(col->data, nullptr); } @@ -62,13 +62,13 @@ void cpy_column_view(const gdf_column *in, gdf_column *out) { } } -gdf_error gdf_adj_list_view(gdf_graph *graph, const gdf_column *offsets, +gdf_error gdf_adj_list_view(gdf_graph *graph, const gdf_column *offsets, const gdf_column *indices, const gdf_column *edge_data) { - GDF_REQUIRE( offsets->null_count == 0 , GDF_VALIDITY_UNSUPPORTED ); + GDF_REQUIRE( offsets->null_count == 0 , GDF_VALIDITY_UNSUPPORTED ); GDF_REQUIRE( indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED ); GDF_REQUIRE( (offsets->dtype == indices->dtype), GDF_UNSUPPORTED_DTYPE ); GDF_REQUIRE( ((offsets->dtype == GDF_INT32) || (offsets->dtype == GDF_INT64)), GDF_UNSUPPORTED_DTYPE ); - GDF_REQUIRE( (offsets->size > 0), GDF_DATASET_EMPTY ); + GDF_REQUIRE( (offsets->size > 0), GDF_DATASET_EMPTY ); GDF_REQUIRE( (graph->adjList == nullptr) , GDF_INVALID_API_CALL); graph->adjList = new gdf_adj_list; @@ -101,19 +101,19 @@ gdf_error gdf_adj_list::get_source_indices (gdf_column *src_indices) { GDF_REQUIRE( offsets->data != nullptr , GDF_INVALID_API_CALL); GDF_REQUIRE( src_indices->size == indices->size, GDF_COLUMN_SIZE_MISMATCH ); GDF_REQUIRE( src_indices->dtype == indices->dtype, GDF_UNSUPPORTED_DTYPE ); - GDF_REQUIRE( src_indices->size > 0, GDF_DATASET_EMPTY ); + GDF_REQUIRE( src_indices->size > 0, GDF_DATASET_EMPTY ); cugraph::offsets_to_indices((int*)offsets->data, offsets->size-1, (int*)src_indices->data); return GDF_SUCCESS; } -gdf_error gdf_edge_list_view(gdf_graph *graph, const gdf_column *src_indices, +gdf_error gdf_edge_list_view(gdf_graph *graph, const gdf_column *src_indices, const gdf_column *dest_indices, const gdf_column *edge_data) { GDF_REQUIRE( src_indices->size == dest_indices->size, GDF_COLUMN_SIZE_MISMATCH ); GDF_REQUIRE( src_indices->dtype == dest_indices->dtype, GDF_UNSUPPORTED_DTYPE ); GDF_REQUIRE( ((src_indices->dtype == GDF_INT32) || (src_indices->dtype == GDF_INT64)), GDF_UNSUPPORTED_DTYPE ); - GDF_REQUIRE( src_indices->size > 0, GDF_DATASET_EMPTY ); - GDF_REQUIRE( src_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED ); + GDF_REQUIRE( src_indices->size > 0, GDF_DATASET_EMPTY ); + GDF_REQUIRE( src_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED ); GDF_REQUIRE( dest_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED ); GDF_REQUIRE( graph->edgeList == nullptr , GDF_INVALID_API_CALL); @@ -150,20 +150,20 @@ gdf_error gdf_add_adj_list_impl (gdf_graph *graph) { CSR_Result_Weighted adj_list; status = ConvertCOOtoCSR_weighted((int*)graph->edgeList->src_indices->data, (int*)graph->edgeList->dest_indices->data, (WT*)graph->edgeList->edge_data->data, nnz, adj_list); - - gdf_column_view(graph->adjList->offsets, adj_list.rowOffsets, + + gdf_column_view(graph->adjList->offsets, adj_list.rowOffsets, nullptr, adj_list.size+1, graph->edgeList->src_indices->dtype); - gdf_column_view(graph->adjList->indices, adj_list.colIndices, + gdf_column_view(graph->adjList->indices, adj_list.colIndices, nullptr, adj_list.nnz, graph->edgeList->src_indices->dtype); - gdf_column_view(graph->adjList->edge_data, adj_list.edgeWeights, + gdf_column_view(graph->adjList->edge_data, adj_list.edgeWeights, nullptr, adj_list.nnz, graph->edgeList->edge_data->dtype); } else { CSR_Result adj_list; - status = ConvertCOOtoCSR((int*)graph->edgeList->src_indices->data,(int*)graph->edgeList->dest_indices->data, nnz, adj_list); - gdf_column_view(graph->adjList->offsets, adj_list.rowOffsets, + status = ConvertCOOtoCSR((int*)graph->edgeList->src_indices->data,(int*)graph->edgeList->dest_indices->data, nnz, adj_list); + gdf_column_view(graph->adjList->offsets, adj_list.rowOffsets, nullptr, adj_list.size+1, graph->edgeList->src_indices->dtype); - gdf_column_view(graph->adjList->indices, adj_list.colIndices, + gdf_column_view(graph->adjList->indices, adj_list.colIndices, nullptr, adj_list.nnz, graph->edgeList->src_indices->dtype); } if (status !=0) { @@ -185,14 +185,14 @@ gdf_error gdf_add_edge_list (gdf_graph *graph) { CUDA_TRY(cudaMallocManaged ((void**)&d_src, sizeof(int) * graph->adjList->indices->size)); - cugraph::offsets_to_indices((int*)graph->adjList->offsets->data, - graph->adjList->offsets->size-1, + cugraph::offsets_to_indices((int*)graph->adjList->offsets->data, + graph->adjList->offsets->size-1, (int*)d_src); - gdf_column_view(graph->edgeList->src_indices, d_src, + gdf_column_view(graph->edgeList->src_indices, d_src, nullptr, graph->adjList->indices->size, graph->adjList->indices->dtype); cpy_column_view(graph->adjList->indices, graph->edgeList->dest_indices); - + if (graph->adjList->edge_data != nullptr) { graph->edgeList->edge_data = new gdf_column; cpy_column_view(graph->adjList->edge_data, graph->edgeList->edge_data); @@ -211,25 +211,25 @@ gdf_error gdf_add_transposed_adj_list_impl (gdf_graph *graph) { graph->transposedAdjList->offsets = new gdf_column; graph->transposedAdjList->indices = new gdf_column; graph->transposedAdjList->ownership = 1; - + if (graph->edgeList->edge_data) { graph->transposedAdjList->edge_data = new gdf_column; CSR_Result_Weighted adj_list; status = ConvertCOOtoCSR_weighted( (int*)graph->edgeList->dest_indices->data, (int*)graph->edgeList->src_indices->data, (WT*)graph->edgeList->edge_data->data, nnz, adj_list); - gdf_column_view(graph->transposedAdjList->offsets, adj_list.rowOffsets, + gdf_column_view(graph->transposedAdjList->offsets, adj_list.rowOffsets, nullptr, adj_list.size+1, graph->edgeList->src_indices->dtype); - gdf_column_view(graph->transposedAdjList->indices, adj_list.colIndices, + gdf_column_view(graph->transposedAdjList->indices, adj_list.colIndices, nullptr, adj_list.nnz, graph->edgeList->src_indices->dtype); - gdf_column_view(graph->transposedAdjList->edge_data, adj_list.edgeWeights, + gdf_column_view(graph->transposedAdjList->edge_data, adj_list.edgeWeights, nullptr, adj_list.nnz, graph->edgeList->edge_data->dtype); } else { CSR_Result adj_list; - status = ConvertCOOtoCSR((int*)graph->edgeList->dest_indices->data, (int*)graph->edgeList->src_indices->data, nnz, adj_list); - gdf_column_view(graph->transposedAdjList->offsets, adj_list.rowOffsets, + status = ConvertCOOtoCSR((int*)graph->edgeList->dest_indices->data, (int*)graph->edgeList->src_indices->data, nnz, adj_list); + gdf_column_view(graph->transposedAdjList->offsets, adj_list.rowOffsets, nullptr, adj_list.size+1, graph->edgeList->src_indices->dtype); - gdf_column_view(graph->transposedAdjList->indices, adj_list.colIndices, + gdf_column_view(graph->transposedAdjList->indices, adj_list.colIndices, nullptr, adj_list.nnz, graph->edgeList->src_indices->dtype); } if (status !=0) { @@ -263,46 +263,46 @@ gdf_error gdf_degree_impl(int n, int e, gdf_column* col_ptr, gdf_column* degree, nblocks.x = min((e + nthreads.x - 1) / nthreads.x, CUDA_MAX_BLOCKS); nblocks.y = 1; nblocks.z = 1; - + switch (col_ptr->dtype) { case GDF_INT32: cugraph::degree_coo <<>>(n, e, static_cast(col_ptr->data), static_cast(degree->data));break; default: return GDF_UNSUPPORTED_DTYPE; } } return GDF_SUCCESS; -} +} gdf_error gdf_degree(gdf_graph *graph, gdf_column *degree, int x) { // Calculates the degree of all nodes of the graph // x = 0: in+out degree - // x = 1: in-degree + // x = 1: in-degree // x = 2: out-degree GDF_REQUIRE(graph->adjList != nullptr || graph->transposedAdjList != nullptr, GDF_INVALID_API_CALL); - int n; + int n; int e; if(graph->adjList != nullptr) { n = graph->adjList->offsets->size -1; e = graph->adjList->indices->size; } else { - n = graph->transposedAdjList->offsets->size - 1; + n = graph->transposedAdjList->offsets->size - 1; e = graph->transposedAdjList->indices->size; - } + } - if(x!=1) { + if(x!=1) { // Computes out-degree for x=0 and x=2 - if(graph->adjList) + if(graph->adjList) gdf_degree_impl(n, e, graph->adjList->offsets, degree, true); - else + else gdf_degree_impl(n, e, graph->transposedAdjList->indices, degree, false); } - if(x!=2) { + if(x!=2) { // Computes in-degree for x=0 and x=1 - if(graph->adjList) + if(graph->adjList) gdf_degree_impl(n, e, graph->adjList->indices, degree, false); - else + else gdf_degree_impl(n, e, graph->transposedAdjList->offsets, degree, true); } return GDF_SUCCESS; @@ -315,17 +315,17 @@ gdf_error gdf_pagerank_impl (gdf_graph *graph, float tolerance = 1e-4, int max_iter = 200, bool has_guess = false) { GDF_REQUIRE( graph->edgeList != nullptr, GDF_VALIDITY_UNSUPPORTED ); - GDF_REQUIRE( graph->edgeList->src_indices->size == graph->edgeList->dest_indices->size, GDF_COLUMN_SIZE_MISMATCH ); - GDF_REQUIRE( graph->edgeList->src_indices->dtype == graph->edgeList->dest_indices->dtype, GDF_UNSUPPORTED_DTYPE ); - GDF_REQUIRE( graph->edgeList->src_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED ); - GDF_REQUIRE( graph->edgeList->dest_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED ); - GDF_REQUIRE( pagerank != nullptr , GDF_INVALID_API_CALL ); - GDF_REQUIRE( pagerank->data != nullptr , GDF_INVALID_API_CALL ); - GDF_REQUIRE( pagerank->null_count == 0 , GDF_VALIDITY_UNSUPPORTED ); - GDF_REQUIRE( pagerank->size > 0 , GDF_INVALID_API_CALL ); + GDF_REQUIRE( graph->edgeList->src_indices->size == graph->edgeList->dest_indices->size, GDF_COLUMN_SIZE_MISMATCH ); + GDF_REQUIRE( graph->edgeList->src_indices->dtype == graph->edgeList->dest_indices->dtype, GDF_UNSUPPORTED_DTYPE ); + GDF_REQUIRE( graph->edgeList->src_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED ); + GDF_REQUIRE( graph->edgeList->dest_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED ); + GDF_REQUIRE( pagerank != nullptr , GDF_INVALID_API_CALL ); + GDF_REQUIRE( pagerank->data != nullptr , GDF_INVALID_API_CALL ); + GDF_REQUIRE( pagerank->null_count == 0 , GDF_VALIDITY_UNSUPPORTED ); + GDF_REQUIRE( pagerank->size > 0 , GDF_INVALID_API_CALL ); int m=pagerank->size, nnz = graph->edgeList->src_indices->size, status = 0; - WT *d_pr, *d_val = nullptr, *d_leaf_vector = nullptr; + WT *d_pr, *d_val = nullptr, *d_leaf_vector = nullptr; WT res = 1.0; WT *residual = &res; @@ -345,16 +345,16 @@ gdf_error gdf_pagerank_impl (gdf_graph *graph, cugraph::copy(m, (WT*)pagerank->data, d_pr); } - status = cugraph::pagerank( m,nnz, (int*)graph->transposedAdjList->offsets->data, (int*)graph->transposedAdjList->indices->data, + status = cugraph::pagerank( m,nnz, (int*)graph->transposedAdjList->offsets->data, (int*)graph->transposedAdjList->indices->data, d_val, alpha, d_leaf_vector, false, tolerance, max_iter, d_pr, residual); - + if (status !=0) - switch ( status ) { - case -1: std::cerr<< "Error : bad parameters in Pagerank"<(m, d_pr, (WT*)pagerank->data); ALLOC_FREE_TRY(d_val, stream); @@ -466,9 +466,9 @@ gdf_error gdf_louvain(gdf_graph *graph, void *final_modularity, void *num_level, void* offsets_ptr = graph->adjList->offsets->data; void* indices_ptr = graph->adjList->indices->data; - + void* value_ptr; - Vector d_values; + Vector d_values; if(graph->adjList->edge_data) { value_ptr = graph->adjList->edge_data->data; } @@ -496,7 +496,7 @@ gdf_error gdf_louvain(gdf_graph *graph, void *final_modularity, void *num_level, cudaDataType_t index_type = gdf_to_cudadtype(graph->adjList->indices); cudaDataType_t val_type = graph->adjList->edge_data? gdf_to_cudadtype(graph->adjList->edge_data): CUDA_R_32F; - nvgraphLouvain(index_type, val_type, n, e, offsets_ptr, indices_ptr, value_ptr, 1, 0, NULL, + nvgraphLouvain(index_type, val_type, n, e, offsets_ptr, indices_ptr, value_ptr, 1, 0, NULL, final_modularity, louvain_parts_ptr, num_level); return GDF_SUCCESS; } From e06c7432a6cef780d563e6c22482a7ae1293028c Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 16 Apr 2019 14:07:57 -0700 Subject: [PATCH 02/18] replaced heap allocation of temporary C++ gdf_column objects with stack allocation, these objects are only used to pass the data encapsulated in Python cudf Series objects to C++ functions expecting (pointers to) C++ gdf_column objects and sizeof(gdf_column) is not large enough to blow stack, no need to involve heap allocation overhead and risk memory leak (if forget to free). --- python/cugraph/graph/c_graph.pyx | 156 ++++++++++++++++--------------- 1 file changed, 81 insertions(+), 75 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index 190c56bfaf1..b96986ac377 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -23,6 +23,31 @@ import numpy as np dtypes = {np.int32: GDF_INT32, np.int64: GDF_INT64, np.float32: GDF_FLOAT32, np.float64: GDF_FLOAT64} +cdef gdf_column get_gdf_column_view(col): + """ + This function returns a C++ gdf_column object from the Python cudf Series + object by shallow copying. The returned C++ object is expected to be used + as a temporary variable to pass the column data encapsulated in the Python + cudf Series object to C++ functions expecting (pointers to) C++ gdf_column + objects. + """ + cdef gdf_column c_col + cdef uintptr_t data_ptr = cudf.bindings.cudf_cpp.get_column_data_ptr(col._column) + # cdef uintptr_t valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column) + cdef gdf_dtype_extra_info c_extra_dtype_info = gdf_dtype_extra_info(time_unit=TIME_UNIT_NONE) + + err = gdf_column_view_augmented(< gdf_column *> &c_col, + < void *> data_ptr, + < gdf_valid_type *> 0, + < gdf_size_type > len(col), + dtypes[col.dtype.type], + < gdf_size_type > col.null_count, + c_extra_dtype_info) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + return c_col + + cdef create_column(col): cdef gdf_column * c_col = < gdf_column *> malloc(sizeof(gdf_column)) cdef uintptr_t data_ptr = cudf.bindings.cudf_cpp.get_column_data_ptr(col._column) @@ -43,13 +68,6 @@ cdef create_column(col): return col_ptr -cdef delete_column(col_ptr): - cdef uintptr_t col = col_ptr - cdef gdf_column * c_col = < gdf_column *> col - free(c_col) - return - - class Graph: """ cuGraph graph class containing basic graph creation and transformation operations. @@ -138,34 +156,30 @@ class Graph: # (if not None) to avoid garbage collection while they are still in use # inside this class. If copy is set to True, deep-copy the objects. if copy is False: - self.edge_list_source_col = source_col; - self.edge_list_dest_col = dest_col; - self.edge_list_value_col = value_col; + self.edge_list_source_col = source_col + self.edge_list_dest_col = dest_col + self.edge_list_value_col = value_col else: - self.edge_list_source_col = source_col.copy(); - self.edge_list_dest_col = dest_col.copy(); - self.edge_list_value_col = value_col.copy(); + self.edge_list_source_col = source_col.copy() + self.edge_list_dest_col = dest_col.copy() + self.edge_list_value_col = value_col.copy() cdef uintptr_t graph = self.graph_ptr - cdef uintptr_t source = create_column(self.edge_list_source_col) - cdef uintptr_t dest = create_column(self.edge_list_dest_col) - cdef uintptr_t value + cdef gdf_column c_source_col = get_gdf_column_view(self.edge_list_source_col) + cdef gdf_column c_dest_col = get_gdf_column_view(self.edge_list_dest_col) + cdef gdf_column c_value_col + cdef gdf_column * c_value_col_ptr if value_col is None: - value = 0 + c_value_col_ptr = NULL else: - value = create_column(self.edge_list_value_col) + c_value_col = get_gdf_column_view(self.edge_list_value_col) + c_value_col_ptr = &c_value_col - try: - err = gdf_edge_list_view(< gdf_graph *> graph, - < gdf_column *> source, - < gdf_column *> dest, - < gdf_column *> value) - cudf.bindings.cudf_cpp.check_gdf_error(err) - finally: - delete_column(source) - delete_column(dest) - if value is not 0: - delete_column(value) + err = gdf_edge_list_view(< gdf_graph *> graph, + &c_source_col, + &c_dest_col, + c_value_col_ptr) + cudf.bindings.cudf_cpp.check_gdf_error(err) def num_vertices(self): """ @@ -280,34 +294,30 @@ class Graph: # still in use inside this class. If copy is set to True, deep-copy the # objects. if copy is False: - self.adj_list_offset_col = offset_col; - self.adj_list_index_col = index_col; - self.adj_list_value_col = value_col; + self.adj_list_offset_col = offset_col + self.adj_list_index_col = index_col + self.adj_list_value_col = value_col else: - self.adj_list_offset_col = offset_col.copy(); - self.adj_list_index_col = index_col.copy(); - self_adj_list_value_col = value_col.copy(); + self.adj_list_offset_col = offset_col.copy() + self.adj_list_index_col = index_col.copy() + self_adj_list_value_col = value_col.copy() cdef uintptr_t graph = self.graph_ptr - cdef uintptr_t offsets = create_column(self.adj_list_offset_col) - cdef uintptr_t indices = create_column(self.adj_list_index_col) - cdef uintptr_t value + cdef gdf_column c_offset_col = get_gdf_column_view(self.adj_list_offset_col) + cdef gdf_column c_index_col = get_gdf_column_view(self.adj_list_index_col) + cdef gdf_column c_value_col + cdef gdf_column * c_value_col_ptr if value_col is None: - value = 0 + c_value_col_ptr = NULL else: - value = create_column(self.adj_list_value_col) + c_value_col = get_gdf_column_view(self.adj_list_value_col) + c_value_col_ptr = &c_value_col - try: - err = gdf_adj_list_view(< gdf_graph *> graph, - < gdf_column *> offsets, - < gdf_column *> indices, - < gdf_column *> value) - cudf.bindings.cudf_cpp.check_gdf_error(err) - finally: - delete_column(offsets) - delete_column(indices) - if value is not 0: - delete_column(value) + err = gdf_adj_list_view(< gdf_graph *> graph, + &c_offset_col, + &c_index_col, + c_value_col_ptr) + cudf.bindings.cudf_cpp.check_gdf_error(err) def view_adj_list(self): """ @@ -404,32 +414,30 @@ class Graph: """ cdef uintptr_t graph = self.graph_ptr cdef gdf_graph * g = < gdf_graph *> graph - cdef gdf_column * first = < gdf_column *> malloc(sizeof(gdf_column)) - cdef gdf_column * second = < gdf_column *> malloc(sizeof(gdf_column)) - err = gdf_get_two_hop_neighbors(g, first, second) + cdef gdf_column c_first_col + cdef gdf_column c_second_col + err = gdf_get_two_hop_neighbors(g, &c_first_col, &c_second_col) cudf.bindings.cudf_cpp.check_gdf_error(err) df = cudf.DataFrame() - if first.dtype == GDF_INT32: - first_out = rmm.device_array_from_ptr(first.data, - nelem=first.size, + if c_first_col.dtype == GDF_INT32: + first_out = rmm.device_array_from_ptr(c_first_col.data, + nelem=c_first_col.size, dtype=np.int32) - second_out = rmm.device_array_from_ptr(second.data, - nelem=second.size, + second_out = rmm.device_array_from_ptr(c_second_col.data, + nelem=c_second_col.size, dtype=np.int32) df['first'] = first_out df['second'] = second_out - if first.dtype == GDF_INT64: - first_out = rmm.device_array_from_ptr(first.data, - nelem=first.size, + if c_first_col.dtype == GDF_INT64: + first_out = rmm.device_array_from_ptr(c_first_col.data, + nelem=c_first_col.size, dtype=np.int64) - second_out = rmm.device_array_from_ptr(second.data, - nelem=second.size, + second_out = rmm.device_array_from_ptr(c_second_col.data, + nelem=c_second_col.size, dtype=np.int64) df['first'] = first_out df['second'] = second_out - delete_column(first) - delete_column(second) return df def delete_adj_list(self): @@ -510,7 +518,7 @@ class Graph: given by the specified vertex_subset. df['vertex']: The vertex ID of node (will be identical to vertex_subset if specified) - df['degree']: The computed out-degree of the corresponding vertex + df['degree']: The computed out-degree of the corresponding vertex Examples -------- >>> import numpy as np @@ -546,7 +554,7 @@ class Graph: given by the specified vertex_subset. df['vertex']: The vertex ID of node (will be identical to vertex_subset if specified) - df['degree']: The computed degree of the corresponding vertex + df['degree']: The computed degree of the corresponding vertex Examples -------- >>> import numpy as np @@ -574,16 +582,16 @@ class Graph: df = cudf.DataFrame() vertex_col = cudf.Series(np.zeros(n, dtype=np.int32)) - cdef uintptr_t identifier_ptr = create_column(vertex_col) + c_vertex_col = get_gdf_column_view(vertex_col) if g.adjList: - err = g.adjList.get_vertex_identifiers(identifier_ptr) + err = g.adjList.get_vertex_identifiers(&c_vertex_col) else: - err = g.transposedAdjList.get_vertex_identifiers(identifier_ptr) + err = g.transposedAdjList.get_vertex_identifiers(&c_vertex_col) cudf.bindings.cudf_cpp.check_gdf_error(err) degree_col = cudf.Series(np.zeros(n, dtype=np.int32)) - cdef uintptr_t degree_col_ptr = create_column(degree_col) - err = gdf_degree(g, degree_col_ptr, x) + cdef gdf_column c_degree_col = get_gdf_column_view(degree_col) + err = gdf_degree(g, &c_degree_col, x) cudf.bindings.cudf_cpp.check_gdf_error(err) if vertex_subset is None: @@ -595,6 +603,4 @@ class Graph: del vertex_col del degree_col - delete_column(identifier_ptr) - delete_column(degree_col_ptr) return df From ff98880a30a35fe4edf4bcb45c483096b82a6f7e Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 16 Apr 2019 15:23:18 -0700 Subject: [PATCH 03/18] replaced create_column (heap-allocation for gdf_column) wiht get_gdf_column_view (stack-allocation for gdf_column) --- python/cugraph/bfs/bfs_wrapper.pyx | 18 ++++----- python/cugraph/graph/c_graph.pyx | 20 ---------- python/cugraph/jaccard/jaccard_wrapper.pyx | 28 +++++++------- python/cugraph/jaccard/wjaccard_wrapper.pyx | 37 +++++++++---------- python/cugraph/louvain/louvain_wrapper.pyx | 10 ++--- python/cugraph/overlap/overlap_wrapper.pyx | 28 +++++++------- python/cugraph/overlap/woverlap_wrapper.pyx | 37 +++++++++---------- python/cugraph/pagerank/pagerank_wrapper.pyx | 8 ++-- .../spectral_clustering.pyx | 28 +++++++------- python/cugraph/sssp/sssp_wrapper.pyx | 8 ++-- 10 files changed, 100 insertions(+), 122 deletions(-) diff --git a/python/cugraph/bfs/bfs_wrapper.pyx b/python/cugraph/bfs/bfs_wrapper.pyx index 615bbb33daf..348d0c62d6c 100644 --- a/python/cugraph/bfs/bfs_wrapper.pyx +++ b/python/cugraph/bfs/bfs_wrapper.pyx @@ -52,21 +52,21 @@ cpdef bfs(G, start, directed=True): >>> G.add_edge_list(sources,destinations,none) >>> dist, pred = cuGraph.bfs(G, 0, false) """ - + cdef uintptr_t graph = G.graph_ptr cdef gdf_graph* g = graph num_verts = G.num_vertices() - + df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - cdef uintptr_t vertex_ptr = create_column(df['vertex']) + cdef gdf_column c_vertex_col = get_gdf_column_view(df['vertex']) df['distance'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - cdef uintptr_t distances_ptr = create_column(df['distance']) + cdef gdf_column c_distances_col = get_gdf_column_view(df['distance']) df['predecessor'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - cdef uintptr_t predecessors_ptr = create_column(df['predecessor']) - - err = g.adjList.get_vertex_identifiers(vertex_ptr) + cdef gdf_column c_predecessors_col = get_gdf_column_view(df['predecessor']) + + err = g.adjList.get_vertex_identifiers(&c_vertex_col) cudf.bindings.cudf_cpp.check_gdf_error(err) - - gdf_bfs(g, distances_ptr, predecessors_ptr, start, directed) + + gdf_bfs(g, &c_distances_col, &c_predecessors_col, start, directed) return df diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index b96986ac377..0e43084873b 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -48,26 +48,6 @@ cdef gdf_column get_gdf_column_view(col): return c_col -cdef create_column(col): - cdef gdf_column * c_col = < gdf_column *> malloc(sizeof(gdf_column)) - cdef uintptr_t data_ptr = cudf.bindings.cudf_cpp.get_column_data_ptr(col._column) - # cdef uintptr_t valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column) - cdef gdf_dtype_extra_info c_extra_dtype_info = gdf_dtype_extra_info(time_unit=TIME_UNIT_NONE) - - - err = gdf_column_view_augmented(< gdf_column *> c_col, - < void *> data_ptr, - < gdf_valid_type *> 0, - < gdf_size_type > len(col), - dtypes[col.dtype.type], - < gdf_size_type > col.null_count, - c_extra_dtype_info) - cudf.bindings.cudf_cpp.check_gdf_error(err) - - cdef uintptr_t col_ptr = < uintptr_t > c_col - return col_ptr - - class Graph: """ cuGraph graph class containing basic graph creation and transformation operations. diff --git a/python/cugraph/jaccard/jaccard_wrapper.pyx b/python/cugraph/jaccard/jaccard_wrapper.pyx index 17f021027b4..c602d8db414 100644 --- a/python/cugraph/jaccard/jaccard_wrapper.pyx +++ b/python/cugraph/jaccard/jaccard_wrapper.pyx @@ -73,22 +73,22 @@ cpdef jaccard(input_graph, first=None, second=None): err = gdf_add_adj_list(< gdf_graph *> graph) cudf.bindings.cudf_cpp.check_gdf_error(err) - cdef uintptr_t result_ptr - cdef uintptr_t first_ptr - cdef uintptr_t second_ptr - cdef uintptr_t src_indices_ptr + cdef gdf_column c_result_col + cdef gdf_column c_first_col + cdef gdf_column c_second_col + cdef gdf_column c_src_index_col if type(first) == cudf.dataframe.series.Series and type(second) == cudf.dataframe.series.Series: resultSize = len(first) result = cudf.Series(np.ones(resultSize, dtype=np.float32)) - result_ptr = create_column(result) - first_ptr = create_column(first) - second_ptr = create_column(second) + c_result_col = get_gdf_column_view(result) + c_first_col = get_gdf_column_view(first) + c_second_col = get_gdf_column_view(second) err = gdf_jaccard_list(g, < gdf_column *> NULL, - < gdf_column *> first_ptr, - < gdf_column *> second_ptr, - < gdf_column *> result_ptr) + &c_first_col, + &c_second_col, + &c_result_col) cudf.bindings.cudf_cpp.check_gdf_error(err) df = cudf.DataFrame() df['source'] = first @@ -99,9 +99,9 @@ cpdef jaccard(input_graph, first=None, second=None): elif first is None and second is None: e = g.adjList.indices.size result = cudf.Series(np.ones(e, dtype=np.float32), nan_as_null=False) - result_ptr = create_column(result) + c_result_col = get_gdf_column_view(result) - err = gdf_jaccard(g, < gdf_column *> NULL, < gdf_column *> result_ptr) + err = gdf_jaccard(g, < gdf_column *> NULL, &c_result_col) cudf.bindings.cudf_cpp.check_gdf_error(err) dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data, @@ -109,8 +109,8 @@ cpdef jaccard(input_graph, first=None, second=None): dtype=gdf_to_np_dtypes[g.adjList.indices.dtype]) df = cudf.DataFrame() df['source'] = cudf.Series(np.zeros(e, dtype=gdf_to_np_dtypes[g.adjList.indices.dtype])) - src_indices_ptr = create_column(df['source']) - err = g.adjList.get_source_indices(< gdf_column *> src_indices_ptr); + c_src_index_col = get_gdf_column_view(df['source']) + err = g.adjList.get_source_indices(&c_src_index_col); cudf.bindings.cudf_cpp.check_gdf_error(err) df['destination'] = cudf.Series(dest_data) df['jaccard_coeff'] = result diff --git a/python/cugraph/jaccard/wjaccard_wrapper.pyx b/python/cugraph/jaccard/wjaccard_wrapper.pyx index 7dddcbf6fd3..f0f61f79d54 100644 --- a/python/cugraph/jaccard/wjaccard_wrapper.pyx +++ b/python/cugraph/jaccard/wjaccard_wrapper.pyx @@ -79,24 +79,24 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None): err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - cdef uintptr_t result_ptr - cdef uintptr_t weight_ptr - cdef uintptr_t first_ptr - cdef uintptr_t second_ptr - cdef uintptr_t indices_ptr + cdef gdf_column c_result_col + cdef gdf_column c_weight_col + cdef gdf_column c_first_col + cdef gdf_column c_second_col + cdef gdf_column c_indices_col if type(first) == cudf.dataframe.series.Series and type(second) == cudf.dataframe.series.Series: resultSize = len(first) result = cudf.Series(np.ones(resultSize, dtype=np.float32)) - result_ptr = create_column(result) - weight_ptr = create_column(weights) - first_ptr = create_column(first) - second_ptr = create_column(second) + c_result_col = get_gdf_column_view(result) + c_weight_col = get_gdf_column_view(weights) + c_first_col = get_gdf_column_view(first) + c_second_col = get_gdf_column_view(second) err = gdf_jaccard_list(g, - < gdf_column *> weight_ptr, - < gdf_column *> first_ptr, - < gdf_column *> second_ptr, - < gdf_column *> result_ptr) + &c_weight_col, + &c_first_col, + &c_second_col, + &c_result_col) cudf.bindings.cudf_cpp.check_gdf_error(err) df = cudf.DataFrame() df['source'] = first @@ -107,10 +107,10 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None): elif first is None and second is None: resultSize = g.adjList.indices.size result = cudf.Series(np.ones(resultSize, dtype=np.float32)) - result_ptr = create_column(result) - weight_ptr = create_column(weights) + c_result_col = get_gdf_column_view(result) + c_weight_col = get_gdf_column_view(weights) - err = gdf_jaccard(g, < gdf_column *> weight_ptr, < gdf_column *> result_ptr) + err = gdf_jaccard(g, &c_weight_col, &c_result_col) cudf.bindings.cudf_cpp.check_gdf_error(err) dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data, @@ -118,8 +118,8 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None): dtype=gdf_to_np_dtypes[g.adjList.indices.dtype]) df = cudf.DataFrame() df['source'] = cudf.Series(np.zeros(resultSize, dtype=gdf_to_np_dtypes[g.adjList.indices.dtype])) - indices_ptr = create_column(df['source']) - err = g.adjList.get_source_indices(< gdf_column *> indices_ptr); + c_index_col = get_gdf_column_view(df['source']) + err = g.adjList.get_source_indices(&c_index_col); cudf.bindings.cudf_cpp.check_gdf_error(err) df['destination'] = cudf.Series(dest_data) df['jaccard_coeff'] = result @@ -127,4 +127,3 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None): return df raise ValueError("Specify first and second or neither") - diff --git a/python/cugraph/louvain/louvain_wrapper.pyx b/python/cugraph/louvain/louvain_wrapper.pyx index aff1e3399ee..3b0db53e8bc 100644 --- a/python/cugraph/louvain/louvain_wrapper.pyx +++ b/python/cugraph/louvain/louvain_wrapper.pyx @@ -56,16 +56,16 @@ cpdef nvLouvain(input_graph): df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(n, dtype=np.int32)) - cdef uintptr_t identifier_ptr = create_column(df['vertex']) - err = g.adjList.get_vertex_identifiers(identifier_ptr) + cdef gdf_column c_index_col = get_gdf_column_view(df['vertex']) + err = g.adjList.get_vertex_identifiers(&c_index_col) cudf.bindings.cudf_cpp.check_gdf_error(err) df['partition'] = cudf.Series(np.zeros(n,dtype=np.int32)) - cdef uintptr_t louvain_parts_col_ptr = create_column(df['partition']) + cdef gdf_column c_louvain_parts_col = get_gdf_column_view(df['partition']) cdef double final_modularity = 1.0 cdef int num_level - err = gdf_louvain(g, &final_modularity, &num_level, louvain_parts_col_ptr) + err = gdf_louvain(g, &final_modularity, &num_level, &c_louvain_parts_col) cudf.bindings.cudf_cpp.check_gdf_error(err) cdef double fm = final_modularity @@ -75,4 +75,4 @@ cpdef nvLouvain(input_graph): fm = tmp else: fm = tmp - return df, fm + return df, fm diff --git a/python/cugraph/overlap/overlap_wrapper.pyx b/python/cugraph/overlap/overlap_wrapper.pyx index f216075b37a..ecad68ea939 100644 --- a/python/cugraph/overlap/overlap_wrapper.pyx +++ b/python/cugraph/overlap/overlap_wrapper.pyx @@ -73,22 +73,22 @@ cpdef overlap(input_graph, first=None, second=None): err = gdf_add_adj_list(< gdf_graph *> graph) cudf.bindings.cudf_cpp.check_gdf_error(err) - cdef uintptr_t result_ptr - cdef uintptr_t first_ptr - cdef uintptr_t second_ptr - cdef uintptr_t src_indices_ptr + cdef gdf_column c_result_col + cdef gdf_column c_first_col + cdef gdf_column c_second_col + cdef gdf_column c_src_index_col if type(first) == cudf.dataframe.series.Series and type(second) == cudf.dataframe.series.Series: resultSize = len(first) result = cudf.Series(np.ones(resultSize, dtype=np.float32)) - result_ptr = create_column(result) - first_ptr = create_column(first) - second_ptr = create_column(second) + c_result_col = get_gdf_column_view(result) + c_first_col = get_gdf_column_view(first) + c_second_col = get_gdf_column_view(second) err = gdf_overlap_list(g, < gdf_column *> NULL, - < gdf_column *> first_ptr, - < gdf_column *> second_ptr, - < gdf_column *> result_ptr) + &c_first_col, + &c_second_col, + &c_result_col) cudf.bindings.cudf_cpp.check_gdf_error(err) df = cudf.DataFrame() df['source'] = first @@ -99,9 +99,9 @@ cpdef overlap(input_graph, first=None, second=None): elif first is None and second is None: e = g.adjList.indices.size result = cudf.Series(np.ones(e, dtype=np.float32), nan_as_null=False) - result_ptr = create_column(result) + c_result_col = get_gdf_column_view(result) - err = gdf_overlap(g, < gdf_column *> NULL, < gdf_column *> result_ptr) + err = gdf_overlap(g, < gdf_column *> NULL, &c_result_col) cudf.bindings.cudf_cpp.check_gdf_error(err) dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data, @@ -109,8 +109,8 @@ cpdef overlap(input_graph, first=None, second=None): dtype=gdf_to_np_dtypes[g.adjList.indices.dtype]) df = cudf.DataFrame() df['source'] = cudf.Series(np.zeros(e, dtype=gdf_to_np_dtypes[g.adjList.indices.dtype])) - src_indices_ptr = create_column(df['source']) - err = g.adjList.get_source_indices(< gdf_column *> src_indices_ptr); + c_src_index_col = get_gdf_column_view(df['source']) + err = g.adjList.get_source_indices(&c_src_index_col); cudf.bindings.cudf_cpp.check_gdf_error(err) df['destination'] = cudf.Series(dest_data) df['overlap_coeff'] = result diff --git a/python/cugraph/overlap/woverlap_wrapper.pyx b/python/cugraph/overlap/woverlap_wrapper.pyx index 6307a964f87..c55a48d147a 100644 --- a/python/cugraph/overlap/woverlap_wrapper.pyx +++ b/python/cugraph/overlap/woverlap_wrapper.pyx @@ -79,24 +79,24 @@ cpdef overlap_w(input_graph, weights, first=None, second=None): err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - cdef uintptr_t result_ptr - cdef uintptr_t weight_ptr - cdef uintptr_t first_ptr - cdef uintptr_t second_ptr - cdef uintptr_t indices_ptr + cdef gdf_column c_result_col + cdef gdf_column c_weight_col + cdef gdf_column c_first_col + cdef gdf_column c_second_col + cdef gdf_column c_index_col if type(first) == cudf.dataframe.series.Series and type(second) == cudf.dataframe.series.Series: resultSize = len(first) result = cudf.Series(np.ones(resultSize, dtype=np.float32)) - result_ptr = create_column(result) - weight_ptr = create_column(weights) - first_ptr = create_column(first) - second_ptr = create_column(second) + c_result_col = get_gdf_column_view(result) + c_weight_col = get_gdf_column_view(weights) + c_first_col = get_gdf_column_view(first) + c_second_col = get_gdf_column_view(second) err = gdf_overlap_list(g, - < gdf_column *> weight_ptr, - < gdf_column *> first_ptr, - < gdf_column *> second_ptr, - < gdf_column *> result_ptr) + &c_weight_col, + &c_first_col, + &c_second_col, + &c_result_col) cudf.bindings.cudf_cpp.check_gdf_error(err) df = cudf.DataFrame() df['source'] = first @@ -107,10 +107,10 @@ cpdef overlap_w(input_graph, weights, first=None, second=None): elif first is None and second is None: resultSize = g.adjList.indices.size result = cudf.Series(np.ones(resultSize, dtype=np.float32)) - result_ptr = create_column(result) - weight_ptr = create_column(weights) + c_result_col = get_gdf_column_view(result) + c_weight_col = get_gdf_column_view(weights) - err = gdf_overlap(g, < gdf_column *> weight_ptr, < gdf_column *> result_ptr) + err = gdf_overlap(g, &c_weight_col, &c_result_col) cudf.bindings.cudf_cpp.check_gdf_error(err) dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data, @@ -118,8 +118,8 @@ cpdef overlap_w(input_graph, weights, first=None, second=None): dtype=gdf_to_np_dtypes[g.adjList.indices.dtype]) df = cudf.DataFrame() df['source'] = cudf.Series(np.zeros(resultSize, dtype=gdf_to_np_dtypes[g.adjList.indices.dtype])) - indices_ptr = create_column(df['source']) - err = g.adjList.get_source_indices(< gdf_column *> indices_ptr); + c_index_col = get_gdf_column_view(df['source']) + err = g.adjList.get_source_indices(&c_index_col); cudf.bindings.cudf_cpp.check_gdf_error(err) df['destination'] = cudf.Series(dest_data) df['overlap_coeff'] = result @@ -127,4 +127,3 @@ cpdef overlap_w(input_graph, weights, first=None, second=None): return df raise ValueError("Specify first and second or neither") - diff --git a/python/cugraph/pagerank/pagerank_wrapper.pyx b/python/cugraph/pagerank/pagerank_wrapper.pyx index d2d8371ec05..821ceee1d22 100755 --- a/python/cugraph/pagerank/pagerank_wrapper.pyx +++ b/python/cugraph/pagerank/pagerank_wrapper.pyx @@ -65,13 +65,13 @@ cpdef pagerank(G,alpha=0.85, max_iter=100, tol=1.0e-5): cdef gdf_graph* g = graph df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.int32)) - cdef uintptr_t identifier_ptr = create_column(df['vertex']) + cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex']) df['pagerank'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.float32)) - cdef uintptr_t pagerank_ptr = create_column(df['pagerank']) + cdef gdf_column c_pagerank_col = get_gdf_column_view(df['pagerank']) - err = g.transposedAdjList.get_vertex_identifiers(identifier_ptr) + err = g.transposedAdjList.get_vertex_identifiers(&c_identifier_col) cudf.bindings.cudf_cpp.check_gdf_error(err) - err = gdf_pagerank(graph, pagerank_ptr, alpha, tol, max_iter, 0) + err = gdf_pagerank(g, &c_pagerank_col, alpha, tol, max_iter, 0) cudf.bindings.cudf_cpp.check_gdf_error(err) return df diff --git a/python/cugraph/spectral_clustering/spectral_clustering.pyx b/python/cugraph/spectral_clustering/spectral_clustering.pyx index e8af4f27139..1491692385e 100644 --- a/python/cugraph/spectral_clustering/spectral_clustering.pyx +++ b/python/cugraph/spectral_clustering/spectral_clustering.pyx @@ -78,12 +78,12 @@ cpdef spectralBalancedCutClustering(G, # Create the output dataframe df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(num_vert, dtype=np.int32)) - cdef uintptr_t identifier_ptr = create_column(df['vertex']) + cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex']) df['cluster'] = cudf.Series(np.zeros(num_vert, dtype=np.int32)) - cdef uintptr_t cluster_ptr = create_column(df['cluster']) + cdef gdf_column c_cluster_col = get_gdf_column_view(df['cluster']) # Set the vertex identifiers - err = g.adjList.get_vertex_identifiers(< gdf_column *> identifier_ptr) + err = g.adjList.get_vertex_identifiers(&c_identifier_col) cudf.bindings.cudf_cpp.check_gdf_error(err) err = gdf_balancedCutClustering_nvgraph(g, @@ -93,7 +93,7 @@ cpdef spectralBalancedCutClustering(G, evs_max_iter, kmean_tolerance, kmean_max_iter, - < gdf_column *> cluster_ptr) + &c_cluster_col) cudf.bindings.cudf_cpp.check_gdf_error(err) return df @@ -155,12 +155,12 @@ cpdef spectralModularityMaximizationClustering(G, # Create the output dataframe df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(num_vert, dtype=np.int32)) - cdef uintptr_t identifier_ptr = create_column(df['vertex']) + cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex']) df['cluster'] = cudf.Series(np.zeros(num_vert, dtype=np.int32)) - cdef uintptr_t cluster_ptr = create_column(df['cluster']) + cdef gdf_column c_cluster_col = get_gdf_column_view(df['cluster']) # Set the vertex identifiers - err = g.adjList.get_vertex_identifiers(< gdf_column *> identifier_ptr) + err = g.adjList.get_vertex_identifiers(&c_identifier_col) cudf.bindings.cudf_cpp.check_gdf_error(err) err = gdf_spectralModularityMaximization_nvgraph(g, @@ -170,7 +170,7 @@ cpdef spectralModularityMaximizationClustering(G, evs_max_iter, kmean_tolerance, kmean_max_iter, - < gdf_column *> cluster_ptr) + &c_cluster_col) cudf.bindings.cudf_cpp.check_gdf_error(err) return df @@ -209,9 +209,9 @@ cpdef analyzeClustering_modularity(G, n_clusters, clustering): err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - cdef uintptr_t clustering_ptr = create_column(clustering) + cdef gdf_column c_clustering_col = get_gdf_column_view(clustering) cdef float score - err = gdf_AnalyzeClustering_modularity_nvgraph(g, n_clusters, clustering_ptr, &score) + err = gdf_AnalyzeClustering_modularity_nvgraph(g, n_clusters, &c_clustering_col, &score) cudf.bindings.cudf_cpp.check_gdf_error(err) return score @@ -249,9 +249,9 @@ cpdef analyzeClustering_edge_cut(G, n_clusters, clustering): err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - cdef uintptr_t clustering_ptr = create_column(clustering) + cdef gdf_column c_clustering_col = get_gdf_column_view(clustering) cdef float score - err = gdf_AnalyzeClustering_edge_cut_nvgraph(g, n_clusters, clustering_ptr, &score) + err = gdf_AnalyzeClustering_edge_cut_nvgraph(g, n_clusters, &c_clustering_col, &score) cudf.bindings.cudf_cpp.check_gdf_error(err) return score @@ -289,8 +289,8 @@ cpdef analyzeClustering_ratio_cut(G, n_clusters, clustering): err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - cdef uintptr_t clustering_ptr = create_column(clustering) + cdef gdf_column c_clustering_col = get_gdf_column_view(clustering) cdef float score - err = gdf_AnalyzeClustering_ratio_cut_nvgraph(g, n_clusters, clustering_ptr, &score) + err = gdf_AnalyzeClustering_ratio_cut_nvgraph(g, n_clusters, &c_clustering_col, &score) cudf.bindings.cudf_cpp.check_gdf_error(err) return score diff --git a/python/cugraph/sssp/sssp_wrapper.pyx b/python/cugraph/sssp/sssp_wrapper.pyx index e0f8fd8b13e..439b09b7cb9 100644 --- a/python/cugraph/sssp/sssp_wrapper.pyx +++ b/python/cugraph/sssp/sssp_wrapper.pyx @@ -64,16 +64,16 @@ cpdef sssp(G, source): df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.int32)) - cdef uintptr_t identifier_ptr = create_column(df['vertex']) + cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex']) df['distance'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=data_type)) - cdef uintptr_t distance_ptr = create_column(df['distance']) + cdef gdf_column c_distance_col = get_gdf_column_view(df['distance']) - err = g.transposedAdjList.get_vertex_identifiers(identifier_ptr) + err = g.transposedAdjList.get_vertex_identifiers(&c_identifier_col) cudf.bindings.cudf_cpp.check_gdf_error(err) cdef int[1] sources sources[0] = source - err = gdf_sssp_nvgraph(graph, sources, distance_ptr) + err = gdf_sssp_nvgraph(g, sources, &c_distance_col) cudf.bindings.cudf_cpp.check_gdf_error(err) return df From b08841465b159ffd7b81e2fdef88f867eb8aa6cc Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 16 Apr 2019 15:34:59 -0700 Subject: [PATCH 04/18] ReadMtxFile is renamed to read_mtx_file, but the comments were still using ReadMtxFile, fixed this. --- python/cugraph/bfs/bfs_wrapper.pyx | 2 +- python/cugraph/jaccard/jaccard_wrapper.pyx | 2 +- python/cugraph/jaccard/wjaccard_wrapper.pyx | 2 +- python/cugraph/louvain/louvain_wrapper.pyx | 2 +- python/cugraph/overlap/overlap_wrapper.pyx | 2 +- python/cugraph/overlap/woverlap_wrapper.pyx | 2 +- python/cugraph/pagerank/pagerank_wrapper.pyx | 2 +- .../spectral_clustering/spectral_clustering.pyx | 10 +++++----- python/cugraph/sssp/sssp_wrapper.pyx | 2 +- .../cugraph/triangle_count/triangle_count_wrapper.pyx | 2 +- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/python/cugraph/bfs/bfs_wrapper.pyx b/python/cugraph/bfs/bfs_wrapper.pyx index 348d0c62d6c..ac226469390 100644 --- a/python/cugraph/bfs/bfs_wrapper.pyx +++ b/python/cugraph/bfs/bfs_wrapper.pyx @@ -45,7 +45,7 @@ cpdef bfs(G, start, directed=True): Examples -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() diff --git a/python/cugraph/jaccard/jaccard_wrapper.pyx b/python/cugraph/jaccard/jaccard_wrapper.pyx index c602d8db414..74fe01ed649 100644 --- a/python/cugraph/jaccard/jaccard_wrapper.pyx +++ b/python/cugraph/jaccard/jaccard_wrapper.pyx @@ -60,7 +60,7 @@ cpdef jaccard(input_graph, first=None, second=None): Examples -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() diff --git a/python/cugraph/jaccard/wjaccard_wrapper.pyx b/python/cugraph/jaccard/wjaccard_wrapper.pyx index f0f61f79d54..52ff299bd8a 100644 --- a/python/cugraph/jaccard/wjaccard_wrapper.pyx +++ b/python/cugraph/jaccard/wjaccard_wrapper.pyx @@ -65,7 +65,7 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None): the source and destination vertices. Examples -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() diff --git a/python/cugraph/louvain/louvain_wrapper.pyx b/python/cugraph/louvain/louvain_wrapper.pyx index 3b0db53e8bc..8393e464cb7 100644 --- a/python/cugraph/louvain/louvain_wrapper.pyx +++ b/python/cugraph/louvain/louvain_wrapper.pyx @@ -38,7 +38,7 @@ cpdef nvLouvain(input_graph): Examples -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() diff --git a/python/cugraph/overlap/overlap_wrapper.pyx b/python/cugraph/overlap/overlap_wrapper.pyx index ecad68ea939..41f628edf37 100644 --- a/python/cugraph/overlap/overlap_wrapper.pyx +++ b/python/cugraph/overlap/overlap_wrapper.pyx @@ -60,7 +60,7 @@ cpdef overlap(input_graph, first=None, second=None): Examples -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() diff --git a/python/cugraph/overlap/woverlap_wrapper.pyx b/python/cugraph/overlap/woverlap_wrapper.pyx index c55a48d147a..93194f5b96c 100644 --- a/python/cugraph/overlap/woverlap_wrapper.pyx +++ b/python/cugraph/overlap/woverlap_wrapper.pyx @@ -65,7 +65,7 @@ cpdef overlap_w(input_graph, weights, first=None, second=None): the source and destination vertices. Examples -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() diff --git a/python/cugraph/pagerank/pagerank_wrapper.pyx b/python/cugraph/pagerank/pagerank_wrapper.pyx index 821ceee1d22..c48af92bebe 100755 --- a/python/cugraph/pagerank/pagerank_wrapper.pyx +++ b/python/cugraph/pagerank/pagerank_wrapper.pyx @@ -50,7 +50,7 @@ cpdef pagerank(G,alpha=0.85, max_iter=100, tol=1.0e-5): Examples -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() diff --git a/python/cugraph/spectral_clustering/spectral_clustering.pyx b/python/cugraph/spectral_clustering/spectral_clustering.pyx index 1491692385e..674faeb8034 100644 --- a/python/cugraph/spectral_clustering/spectral_clustering.pyx +++ b/python/cugraph/spectral_clustering/spectral_clustering.pyx @@ -58,7 +58,7 @@ cpdef spectralBalancedCutClustering(G, Example: -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() @@ -135,7 +135,7 @@ cpdef spectralModularityMaximizationClustering(G, Example: -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() @@ -194,7 +194,7 @@ cpdef analyzeClustering_modularity(G, n_clusters, clustering): Example: -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() @@ -234,7 +234,7 @@ cpdef analyzeClustering_edge_cut(G, n_clusters, clustering): Example: -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() @@ -274,7 +274,7 @@ cpdef analyzeClustering_ratio_cut(G, n_clusters, clustering): Example: -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() diff --git a/python/cugraph/sssp/sssp_wrapper.pyx b/python/cugraph/sssp/sssp_wrapper.pyx index 439b09b7cb9..8c3bcff536a 100644 --- a/python/cugraph/sssp/sssp_wrapper.pyx +++ b/python/cugraph/sssp/sssp_wrapper.pyx @@ -44,7 +44,7 @@ cpdef sssp(G, source): Examples -------- - >>> M = ReadMtxFile(graph_file) + >>> M = read_mtx_file(graph_file) >>> sources = cudf.Series(M.row) >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() diff --git a/python/cugraph/triangle_count/triangle_count_wrapper.pyx b/python/cugraph/triangle_count/triangle_count_wrapper.pyx index bcddaf3524c..e90ccf4ff6d 100644 --- a/python/cugraph/triangle_count/triangle_count_wrapper.pyx +++ b/python/cugraph/triangle_count/triangle_count_wrapper.pyx @@ -37,7 +37,7 @@ cpdef triangles(input_graph): Example ------- - >>>> M = ReadMtxFile(graph_file) + >>>> M = read_mtx_file(graph_file) >>>> sources = cudf.Series(M.row) >>>> destinations = cudf.Series(M.col) >>>> G = cugraph.Graph() From 971c1bd4d0c2734be4036cb06e4e4c203f0fe3c0 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 16 Apr 2019 16:13:24 -0700 Subject: [PATCH 05/18] updated indentation inside cython typecasting operator to match cudf (no spacing between < and >), cudf is inconsistent in placing a space between > and the name of the variable to be casted, so left this part as is. --- python/cugraph/graph/c_graph.pyx | 56 +++++++++---------- python/cugraph/jaccard/jaccard_wrapper.pyx | 10 ++-- python/cugraph/jaccard/wjaccard_wrapper.pyx | 4 +- python/cugraph/overlap/overlap_wrapper.pyx | 10 ++-- python/cugraph/overlap/woverlap_wrapper.pyx | 4 +- .../spectral_clustering.pyx | 10 ++-- 6 files changed, 47 insertions(+), 47 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index 0e43084873b..dd7376e5de7 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -36,12 +36,12 @@ cdef gdf_column get_gdf_column_view(col): # cdef uintptr_t valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column) cdef gdf_dtype_extra_info c_extra_dtype_info = gdf_dtype_extra_info(time_unit=TIME_UNIT_NONE) - err = gdf_column_view_augmented(< gdf_column *> &c_col, - < void *> data_ptr, - < gdf_valid_type *> 0, - < gdf_size_type > len(col), + err = gdf_column_view_augmented( &c_col, + data_ptr, + 0, + len(col), dtypes[col.dtype.type], - < gdf_size_type > col.null_count, + col.null_count, c_extra_dtype_info) cudf.bindings.cudf_cpp.check_gdf_error(err) @@ -63,9 +63,9 @@ class Graph: >>> G = cuGraph.Graph() """ cdef gdf_graph * g - g = < gdf_graph *> calloc(1, sizeof(gdf_graph)) + g = calloc(1, sizeof(gdf_graph)) - cdef uintptr_t graph_ptr = < uintptr_t > g + cdef uintptr_t graph_ptr = g self.graph_ptr = graph_ptr self.edge_list_source_col = None @@ -78,7 +78,7 @@ class Graph: def __del__(self): cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph self.delete_edge_list() self.delete_adj_list() self.delete_transposed_adj_list() @@ -155,7 +155,7 @@ class Graph: c_value_col = get_gdf_column_view(self.edge_list_value_col) c_value_col_ptr = &c_value_col - err = gdf_edge_list_view(< gdf_graph *> graph, + err = gdf_edge_list_view( graph, &c_source_col, &c_dest_col, c_value_col_ptr) @@ -166,7 +166,7 @@ class Graph: Get the number of vertices in the graph """ cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph if g.adjList: return g.adjList.offsets.size - 1 elif g.transposedAdjList: @@ -181,14 +181,14 @@ class Graph: Display the edge list. Compute it if needed. """ cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph err = gdf_add_edge_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) col_size = g.edgeList.src_indices.size - cdef uintptr_t src_col_data = < uintptr_t > g.edgeList.src_indices.data - cdef uintptr_t dest_col_data = < uintptr_t > g.edgeList.dest_indices.data + cdef uintptr_t src_col_data = g.edgeList.src_indices.data + cdef uintptr_t dest_col_data = g.edgeList.dest_indices.data src_data = rmm.device_array_from_ptr(src_col_data, nelem=col_size, @@ -209,7 +209,7 @@ class Graph: Delete the edge list. """ cdef uintptr_t graph = self.graph_ptr - err = gdf_delete_edge_list(< gdf_graph *> graph) + err = gdf_delete_edge_list( graph) cudf.bindings.cudf_cpp.check_gdf_error(err) # decrease reference count to free memory if the referenced objects are @@ -293,7 +293,7 @@ class Graph: c_value_col = get_gdf_column_view(self.adj_list_value_col) c_value_col_ptr = &c_value_col - err = gdf_adj_list_view(< gdf_graph *> graph, + err = gdf_adj_list_view( graph, &c_offset_col, &c_index_col, c_value_col_ptr) @@ -304,15 +304,15 @@ class Graph: Display the adjacency list. Compute it if needed. """ cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) col_size_off = g.adjList.offsets.size col_size_ind = g.adjList.indices.size - cdef uintptr_t offset_col_data = < uintptr_t > g.adjList.offsets.data - cdef uintptr_t index_col_data = < uintptr_t > g.adjList.indices.data + cdef uintptr_t offset_col_data = g.adjList.offsets.data + cdef uintptr_t index_col_data = g.adjList.indices.data offsets_data = rmm.device_array_from_ptr(offset_col_data, nelem=col_size_off, @@ -333,7 +333,7 @@ class Graph: Delete the adjacency list. """ cdef uintptr_t graph = self.graph_ptr - err = gdf_delete_adj_list(< gdf_graph *> graph) + err = gdf_delete_adj_list( graph) cudf.bindings.cudf_cpp.check_gdf_error(err) # decrease reference count to free memory if the referenced objects are @@ -348,7 +348,7 @@ class Graph: the existing graph. """ cdef uintptr_t graph = self.graph_ptr - err = gdf_add_transposed_adj_list(< gdf_graph *> graph) + err = gdf_add_transposed_adj_list( graph) cudf.bindings.cudf_cpp.check_gdf_error(err) def view_transposed_adj_list(self): @@ -356,15 +356,15 @@ class Graph: Display the transposed adjacency list. Compute it if needed. """ cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph err = gdf_add_transposed_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) off_size = g.transposedAdjList.offsets.size ind_size = g.transposedAdjList.indices.size - cdef uintptr_t offset_col_data = < uintptr_t > g.transposedAdjList.offsets.data - cdef uintptr_t indices_col_data = < uintptr_t > g.transposedAdjList.indices.data + cdef uintptr_t offset_col_data = g.transposedAdjList.offsets.data + cdef uintptr_t indices_col_data = g.transposedAdjList.indices.data offsets_data = rmm.device_array_from_ptr(offset_col_data, nelem=off_size, @@ -393,7 +393,7 @@ class Graph: df['second'] the second vertex id of a pair """ cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph cdef gdf_column c_first_col cdef gdf_column c_second_col err = gdf_get_two_hop_neighbors(g, &c_first_col, &c_second_col) @@ -425,7 +425,7 @@ class Graph: Delete the adjacency list. """ cdef uintptr_t graph = self.graph_ptr - err = gdf_delete_adj_list(< gdf_graph *> graph) + err = gdf_delete_adj_list( graph) cudf.bindings.cudf_cpp.check_gdf_error(err) def delete_transposed_adj_list(self): @@ -433,7 +433,7 @@ class Graph: Delete the transposed adjacency list. """ cdef uintptr_t graph = self.graph_ptr - err = gdf_delete_transposed_adj_list(< gdf_graph *> graph) + err = gdf_delete_transposed_adj_list( graph) cudf.bindings.cudf_cpp.check_gdf_error(err) def num_vertices(self): @@ -441,7 +441,7 @@ class Graph: Get the number of vertices in the graph """ cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph* g = < gdf_graph *> graph + cdef gdf_graph* g = graph err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) return g.adjList.offsets.size - 1 @@ -556,7 +556,7 @@ class Graph: def _degree(self, vertex_subset, x = 0): cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph* g = < gdf_graph *> graph + cdef gdf_graph* g = graph n = self.num_vertices() diff --git a/python/cugraph/jaccard/jaccard_wrapper.pyx b/python/cugraph/jaccard/jaccard_wrapper.pyx index 74fe01ed649..164e794f6a7 100644 --- a/python/cugraph/jaccard/jaccard_wrapper.pyx +++ b/python/cugraph/jaccard/jaccard_wrapper.pyx @@ -68,9 +68,9 @@ cpdef jaccard(input_graph, first=None, second=None): >>> jaccard_weights = cugraph.jaccard(G) """ cdef uintptr_t graph = input_graph.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph - err = gdf_add_adj_list(< gdf_graph *> graph) + err = gdf_add_adj_list( graph) cudf.bindings.cudf_cpp.check_gdf_error(err) cdef gdf_column c_result_col @@ -85,7 +85,7 @@ cpdef jaccard(input_graph, first=None, second=None): c_first_col = get_gdf_column_view(first) c_second_col = get_gdf_column_view(second) err = gdf_jaccard_list(g, - < gdf_column *> NULL, + NULL, &c_first_col, &c_second_col, &c_result_col) @@ -101,10 +101,10 @@ cpdef jaccard(input_graph, first=None, second=None): result = cudf.Series(np.ones(e, dtype=np.float32), nan_as_null=False) c_result_col = get_gdf_column_view(result) - err = gdf_jaccard(g, < gdf_column *> NULL, &c_result_col) + err = gdf_jaccard(g, NULL, &c_result_col) cudf.bindings.cudf_cpp.check_gdf_error(err) - dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data, + dest_data = rmm.device_array_from_ptr( g.adjList.indices.data, nelem=e, dtype=gdf_to_np_dtypes[g.adjList.indices.dtype]) df = cudf.DataFrame() diff --git a/python/cugraph/jaccard/wjaccard_wrapper.pyx b/python/cugraph/jaccard/wjaccard_wrapper.pyx index 52ff299bd8a..cba0da8fc53 100644 --- a/python/cugraph/jaccard/wjaccard_wrapper.pyx +++ b/python/cugraph/jaccard/wjaccard_wrapper.pyx @@ -74,7 +74,7 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None): """ cdef uintptr_t graph = input_graph.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) @@ -113,7 +113,7 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None): err = gdf_jaccard(g, &c_weight_col, &c_result_col) cudf.bindings.cudf_cpp.check_gdf_error(err) - dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data, + dest_data = rmm.device_array_from_ptr( g.adjList.indices.data, nelem=resultSize, dtype=gdf_to_np_dtypes[g.adjList.indices.dtype]) df = cudf.DataFrame() diff --git a/python/cugraph/overlap/overlap_wrapper.pyx b/python/cugraph/overlap/overlap_wrapper.pyx index 41f628edf37..d7a799a8bd4 100644 --- a/python/cugraph/overlap/overlap_wrapper.pyx +++ b/python/cugraph/overlap/overlap_wrapper.pyx @@ -68,9 +68,9 @@ cpdef overlap(input_graph, first=None, second=None): >>> df = cugraph.overlap(G) """ cdef uintptr_t graph = input_graph.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph - err = gdf_add_adj_list(< gdf_graph *> graph) + err = gdf_add_adj_list( graph) cudf.bindings.cudf_cpp.check_gdf_error(err) cdef gdf_column c_result_col @@ -85,7 +85,7 @@ cpdef overlap(input_graph, first=None, second=None): c_first_col = get_gdf_column_view(first) c_second_col = get_gdf_column_view(second) err = gdf_overlap_list(g, - < gdf_column *> NULL, + NULL, &c_first_col, &c_second_col, &c_result_col) @@ -101,10 +101,10 @@ cpdef overlap(input_graph, first=None, second=None): result = cudf.Series(np.ones(e, dtype=np.float32), nan_as_null=False) c_result_col = get_gdf_column_view(result) - err = gdf_overlap(g, < gdf_column *> NULL, &c_result_col) + err = gdf_overlap(g, NULL, &c_result_col) cudf.bindings.cudf_cpp.check_gdf_error(err) - dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data, + dest_data = rmm.device_array_from_ptr( g.adjList.indices.data, nelem=e, dtype=gdf_to_np_dtypes[g.adjList.indices.dtype]) df = cudf.DataFrame() diff --git a/python/cugraph/overlap/woverlap_wrapper.pyx b/python/cugraph/overlap/woverlap_wrapper.pyx index 93194f5b96c..b69bbb48d92 100644 --- a/python/cugraph/overlap/woverlap_wrapper.pyx +++ b/python/cugraph/overlap/woverlap_wrapper.pyx @@ -74,7 +74,7 @@ cpdef overlap_w(input_graph, weights, first=None, second=None): """ cdef uintptr_t graph = input_graph.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) @@ -113,7 +113,7 @@ cpdef overlap_w(input_graph, weights, first=None, second=None): err = gdf_overlap(g, &c_weight_col, &c_result_col) cudf.bindings.cudf_cpp.check_gdf_error(err) - dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data, + dest_data = rmm.device_array_from_ptr( g.adjList.indices.data, nelem=resultSize, dtype=gdf_to_np_dtypes[g.adjList.indices.dtype]) df = cudf.DataFrame() diff --git a/python/cugraph/spectral_clustering/spectral_clustering.pyx b/python/cugraph/spectral_clustering/spectral_clustering.pyx index 674faeb8034..0a9dc261db1 100644 --- a/python/cugraph/spectral_clustering/spectral_clustering.pyx +++ b/python/cugraph/spectral_clustering/spectral_clustering.pyx @@ -67,7 +67,7 @@ cpdef spectralBalancedCutClustering(G, """ cdef uintptr_t graph = G.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph # Ensure that the graph has CSR adjacency list err = gdf_add_adj_list(g) @@ -144,7 +144,7 @@ cpdef spectralModularityMaximizationClustering(G, """ cdef uintptr_t graph = G.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph # Ensure that the graph has CSR adjacency list err = gdf_add_adj_list(g) @@ -203,7 +203,7 @@ cpdef analyzeClustering_modularity(G, n_clusters, clustering): >>> score = cuGraph.analyzeClustering_modularity(G, 5, DF['cluster']) """ cdef uintptr_t graph = G.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph # Ensure that the graph has CSR adjacency list err = gdf_add_adj_list(g) @@ -243,7 +243,7 @@ cpdef analyzeClustering_edge_cut(G, n_clusters, clustering): >>> score = cuGraph.analyzeClustering_edge_cut(G, 5, DF['cluster']) """ cdef uintptr_t graph = G.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph # Ensure that the graph has CSR adjacency list err = gdf_add_adj_list(g) @@ -283,7 +283,7 @@ cpdef analyzeClustering_ratio_cut(G, n_clusters, clustering): >>> score = cuGraph.analyzeClustering_ratio_cut(G, 5, DF['cluster']) """ cdef uintptr_t graph = G.graph_ptr - cdef gdf_graph * g = < gdf_graph *> graph + cdef gdf_graph * g = graph # Ensure that the graph has CSR adjacency list err = gdf_add_adj_list(g) From d5f37e61831ae915479d14a4575087260fa712b7 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 18 Apr 2019 09:49:00 -0700 Subject: [PATCH 06/18] updated comments on get_gdf_column_view --- python/cugraph/graph/c_graph.pyx | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index dd7376e5de7..6f7df83a8b9 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -29,16 +29,26 @@ cdef gdf_column get_gdf_column_view(col): object by shallow copying. The returned C++ object is expected to be used as a temporary variable to pass the column data encapsulated in the Python cudf Series object to C++ functions expecting (pointers to) C++ gdf_column - objects. + objects. It is the caller's responsibility to insure that col out-lives the + returned view object. cudf has column_view_from_column and using this is, + in general, better design than creating our own, but we will keep this as + cudf is planning to remove the function. cudf plans to redesign + cudf::column to fundamentally solve this problem, so once they finished the + redesign, we need to update this code to use their new features. Until that + time, we may rely on this as a temporary solution. """ cdef gdf_column c_col cdef uintptr_t data_ptr = cudf.bindings.cudf_cpp.get_column_data_ptr(col._column) - # cdef uintptr_t valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column) + cdef uintptr_t valid_ptr + if col._column._mask is None: + valid_ptr = 0 + else: + valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column) cdef gdf_dtype_extra_info c_extra_dtype_info = gdf_dtype_extra_info(time_unit=TIME_UNIT_NONE) err = gdf_column_view_augmented( &c_col, data_ptr, - 0, + valid_ptr, len(col), dtypes[col.dtype.type], col.null_count, From 5b37c8497d2aa8960938485b7702d76b08dbca50 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 18 Apr 2019 16:12:41 -0700 Subject: [PATCH 07/18] fixed a bug (not properly freeing valid) in gdf_col_delete --- cpp/src/cugraph.cu | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/cpp/src/cugraph.cu b/cpp/src/cugraph.cu index 8a22c9e7005..175a3c8e43a 100644 --- a/cpp/src/cugraph.cu +++ b/cpp/src/cugraph.cu @@ -29,26 +29,27 @@ template using Vector = thrust::device_vector>; +/* + * cudf has gdf_column_free and using this is, in general, better design than + * creating our own, but we will keep this as cudf is planning to remove the + * function. cudf plans to redesign cudf::column to fundamentally solve this + * problem, so once they finished the redesign, we need to update this code to + * use their new features. Until that time, we may rely on this as a temporary + * solution. + */ void gdf_col_delete(gdf_column* col) { - if (col) { - col->size = 0; - if(col->data) { - ALLOC_FREE_TRY(col->data, nullptr); + if (col != nullptr) { + auto stream = cudaStream_t{nullptr}; + if (col->data != nullptr) { + ALLOC_FREE_TRY(col->data, stream); + } + if (col->valid != nullptr) { + ALLOC_FREE_TRY(col->valid, stream); + } + if (col->col_name != nullptr) { + free(col->col_name); } -#if 1 -// If delete col is executed, the memory pointed by col is no longer valid and -// can be used in another memory allocation, so executing col->data = nullptr -// after delete col is dangerous, also, col = nullptr has no effect here (the -// address is passed by value, for col = nullptr should work, the input -// parameter should be gdf_column*& col (or alternatively, gdf_column** col and -// *col = nullptr also work) - col->data = nullptr; - delete col; -#else delete col; - col->data = nullptr; - col = nullptr; -#endif } } From e80277eab8a38f8ed040bbeb69fd2e5ca2db09de Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 18 Apr 2019 17:56:16 -0700 Subject: [PATCH 08/18] commented out freeing gdf_column's col_name if not nullptr, currently, cudf's gdf_column_view does not properly initialize col_name to nullptr, freeing col_name can result in freeing unallocated memory, this problem should be cleaned up once cudf finishes redesigning cudf::column. --- cpp/src/cugraph.cu | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/cugraph.cu b/cpp/src/cugraph.cu index 175a3c8e43a..1506fdfafab 100644 --- a/cpp/src/cugraph.cu +++ b/cpp/src/cugraph.cu @@ -46,9 +46,14 @@ void gdf_col_delete(gdf_column* col) { if (col->valid != nullptr) { ALLOC_FREE_TRY(col->valid, stream); } +#if 0/* Currently, gdf_column_view does not set col_name, and col_name can have + an arbitrary value, so freeing col_name can lead to freeing a ranodom + address. This problem should be cleaned up once cudf finishes + redesigning cudf::column. */ if (col->col_name != nullptr) { free(col->col_name); } +#endif delete col; } } From 57daf68486bb2547c05eb5d7226f69a0ad84c28f Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 18 Apr 2019 22:43:39 -0700 Subject: [PATCH 09/18] removed tab space in algorithms.h --- cpp/include/algorithms.h | 46 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/cpp/include/algorithms.h b/cpp/include/algorithms.h index 985b7a838db..2116fabe7eb 100644 --- a/cpp/include/algorithms.h +++ b/cpp/include/algorithms.h @@ -40,11 +40,11 @@ */ /* ----------------------------------------------------------------------------*/ gdf_error gdf_pagerank(gdf_graph *graph, - gdf_column *pagerank, - float alpha, - float tolerance, - int max_iter, - bool has_guess); + gdf_column *pagerank, + float alpha, + float tolerance, + int max_iter, + bool has_guess); /** * @Synopsis Creates source, destination and value columns based on the specified R-MAT model @@ -78,11 +78,11 @@ gdf_error gdf_pagerank(gdf_graph *graph, */ /* ----------------------------------------------------------------------------*/ gdf_error gdf_grmat_gen(const char* argv, - size_t &vertices, - size_t &edges, - gdf_column* src, - gdf_column* dest, - gdf_column* val); + size_t &vertices, + size_t &edges, + gdf_column* src, + gdf_column* dest, + gdf_column* val); /** * @Synopsis Performs a breadth first search traversal of a graph starting from a node. @@ -101,10 +101,10 @@ gdf_error gdf_grmat_gen(const char* argv, */ /* ----------------------------------------------------------------------------*/ gdf_error gdf_bfs(gdf_graph *graph, - gdf_column *distances, - gdf_column *predecessors, - int start_node, - bool directed); + gdf_column *distances, + gdf_column *predecessors, + int start_node, + bool directed); /** * Computes the Jaccard similarity coefficient for every pair of vertices in the graph @@ -116,8 +116,8 @@ gdf_error gdf_bfs(gdf_graph *graph, * @return Error code */ gdf_error gdf_jaccard(gdf_graph *graph, - gdf_column *weights, - gdf_column *result); + gdf_column *weights, + gdf_column *result); /** * Computes the Jaccard similarity coefficient for each pair of specified vertices. @@ -131,10 +131,10 @@ gdf_error gdf_jaccard(gdf_graph *graph, * @return Error code */ gdf_error gdf_jaccard_list(gdf_graph *graph, - gdf_column *weights, - gdf_column *first, - gdf_column *second, - gdf_column *result); + gdf_column *weights, + gdf_column *first, + gdf_column *second, + gdf_column *result); /** * Computes the Overlap Coefficient for every pair of vertices in the graph which are @@ -167,6 +167,6 @@ gdf_error gdf_overlap_list(gdf_graph *graph, gdf_column *result); gdf_error gdf_louvain(gdf_graph *graph, - void *final_modularity, - void *num_level, - gdf_column *louvain_parts); + void *final_modularity, + void *num_level, + gdf_column *louvain_parts); From dea90dc820f7d99d115e0f324e64cf96d6cbc23c Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 18 Apr 2019 23:51:05 -0700 Subject: [PATCH 10/18] fixed a warning in louvain_wrapper.pyx (dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing]) --- python/cugraph/louvain/louvain_wrapper.pyx | 46 ++++++++++++++++------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/python/cugraph/louvain/louvain_wrapper.pyx b/python/cugraph/louvain/louvain_wrapper.pyx index 8393e464cb7..6f3870b7d83 100644 --- a/python/cugraph/louvain/louvain_wrapper.pyx +++ b/python/cugraph/louvain/louvain_wrapper.pyx @@ -25,9 +25,9 @@ cpdef nvLouvain(input_graph): Parameters ---------- - graph : cuGraph.Graph + input_graph : cuGraph.Graph cuGraph graph descriptor, should contain the connectivity information as an edge list (edge weights are not used for this algorithm). - The adjacency list will be computed if not already present. + The adjacency list will be computed if not already present. Returns ------- @@ -43,7 +43,7 @@ cpdef nvLouvain(input_graph): >>> destinations = cudf.Series(M.col) >>> G = cuGraph.Graph() >>> G.add_edge_list(sources,destinations,None) - >>> louvain_parts = cuGraph.louvain(G) + >>> louvain_parts, modularity_score = cuGraph.louvain(G) """ cdef uintptr_t graph = input_graph.graph_ptr @@ -62,17 +62,37 @@ cpdef nvLouvain(input_graph): df['partition'] = cudf.Series(np.zeros(n,dtype=np.int32)) cdef gdf_column c_louvain_parts_col = get_gdf_column_view(df['partition']) - cdef double final_modularity = 1.0 - cdef int num_level - err = gdf_louvain(g, &final_modularity, &num_level, &c_louvain_parts_col) - cudf.bindings.cudf_cpp.check_gdf_error(err) - - cdef double fm = final_modularity - cdef float tmp = ((&final_modularity))[0] + cdef bool single_precision = False + # this implementation is tied to cugraph.cu line 503 + # cudaDataType_t val_type = graph->adjList->edge_data? + # gdf_to_cudadtype(graph->adjList->edge_data): CUDA_R_32F; + # this is tied to the low-level implementation detail of the lower level + # function, and very vulnerable to low level changes. Better be + # reimplemented, but we are planning to eventually remove nvgraph, so I may + # leave as is right at this moment. if g.adjList.edge_data: if g.adjList.edge_data.dtype == GDF_FLOAT32: - fm = tmp + single_precision = True; + else: + single_precision = True; + + cdef float final_modularity_single_precision = 1.0 + cdef double final_modularity_double_precision = 1.0 + cdef int num_level = 0 + cdef gdf_error error + + if single_precision: + err = gdf_louvain(g, + &final_modularity_single_precision, + &num_level, &c_louvain_parts_col) + else: + err = gdf_louvain(g, + &final_modularity_double_precision, + &num_level, &c_louvain_parts_col) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + if single_precision: + return df, final_modularity_single_precision else: - fm = tmp - return df, fm + return df, final_modularity_double_precision From 71a6bf7962ae3823a713965ab8f3917efc7917ee Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 19 Apr 2019 00:22:22 -0700 Subject: [PATCH 11/18] replaced adjList.offsets.size - 1 and transposedAdjList.offsets.size - 1 with num_vertices(), this is a better abstraction and less vulnerable to low level changes in class Graph --- python/cugraph/graph/c_graph.pyx | 4 ++-- python/cugraph/louvain/louvain_wrapper.pyx | 6 +++--- python/cugraph/pagerank/pagerank_wrapper.pyx | 13 ++++++++----- .../spectral_clustering/spectral_clustering.pyx | 4 ++-- python/cugraph/sssp/sssp_wrapper.pyx | 11 ++++++----- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index 6f7df83a8b9..6ee6cb61b15 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -318,7 +318,7 @@ class Graph: err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - col_size_off = g.adjList.offsets.size + col_size_off = self.num_vertices() + 1 col_size_ind = g.adjList.indices.size cdef uintptr_t offset_col_data = g.adjList.offsets.data @@ -370,7 +370,7 @@ class Graph: err = gdf_add_transposed_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - off_size = g.transposedAdjList.offsets.size + off_size = self.num_vertices() + 1 ind_size = g.transposedAdjList.indices.size cdef uintptr_t offset_col_data = g.transposedAdjList.offsets.data diff --git a/python/cugraph/louvain/louvain_wrapper.pyx b/python/cugraph/louvain/louvain_wrapper.pyx index 6f3870b7d83..d7ab7fb294d 100644 --- a/python/cugraph/louvain/louvain_wrapper.pyx +++ b/python/cugraph/louvain/louvain_wrapper.pyx @@ -52,15 +52,15 @@ cpdef nvLouvain(input_graph): err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - n = g.adjList.offsets.size - 1 + num_vert = input_graph.num_vertices() df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(n, dtype=np.int32)) + df['vertex'] = cudf.Series(np.zeros(num_vert, dtype=np.int32)) cdef gdf_column c_index_col = get_gdf_column_view(df['vertex']) err = g.adjList.get_vertex_identifiers(&c_index_col) cudf.bindings.cudf_cpp.check_gdf_error(err) - df['partition'] = cudf.Series(np.zeros(n,dtype=np.int32)) + df['partition'] = cudf.Series(np.zeros(num_vert,dtype=np.int32)) cdef gdf_column c_louvain_parts_col = get_gdf_column_view(df['partition']) cdef bool single_precision = False diff --git a/python/cugraph/pagerank/pagerank_wrapper.pyx b/python/cugraph/pagerank/pagerank_wrapper.pyx index c48af92bebe..c09d118f09a 100755 --- a/python/cugraph/pagerank/pagerank_wrapper.pyx +++ b/python/cugraph/pagerank/pagerank_wrapper.pyx @@ -59,14 +59,17 @@ cpdef pagerank(G,alpha=0.85, max_iter=100, tol=1.0e-5): """ cdef uintptr_t graph = G.graph_ptr - err = gdf_add_transposed_adj_list(graph) - cudf.bindings.cudf_cpp.check_gdf_error(err) - cdef gdf_graph* g = graph + + err = gdf_add_transposed_adj_list(g) + cudf.bindings.cudf_cpp.check_gdf_error(err) + + num_vert = G.num_vertices() + df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.int32)) + df['vertex'] = cudf.Series(np.zeros(num_vert, dtype=np.int32)) cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex']) - df['pagerank'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.float32)) + df['pagerank'] = cudf.Series(np.zeros(num_vert, dtype=np.float32)) cdef gdf_column c_pagerank_col = get_gdf_column_view(df['pagerank']) err = g.transposedAdjList.get_vertex_identifiers(&c_identifier_col) diff --git a/python/cugraph/spectral_clustering/spectral_clustering.pyx b/python/cugraph/spectral_clustering/spectral_clustering.pyx index 0a9dc261db1..3b0bf170988 100644 --- a/python/cugraph/spectral_clustering/spectral_clustering.pyx +++ b/python/cugraph/spectral_clustering/spectral_clustering.pyx @@ -73,7 +73,7 @@ cpdef spectralBalancedCutClustering(G, err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - num_vert = g.adjList.offsets.size - 1 + num_vert = G.num_vertices() # Create the output dataframe df = cudf.DataFrame() @@ -150,7 +150,7 @@ cpdef spectralModularityMaximizationClustering(G, err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - num_vert = g.adjList.offsets.size - 1 + num_vert = G.num_vertices() # Create the output dataframe df = cudf.DataFrame() diff --git a/python/cugraph/sssp/sssp_wrapper.pyx b/python/cugraph/sssp/sssp_wrapper.pyx index 8c3bcff536a..035bbc6d68a 100644 --- a/python/cugraph/sssp/sssp_wrapper.pyx +++ b/python/cugraph/sssp/sssp_wrapper.pyx @@ -53,19 +53,21 @@ cpdef sssp(G, source): """ cdef uintptr_t graph = G.graph_ptr - err = gdf_add_transposed_adj_list(graph) + cdef gdf_graph* g = graph + + err = gdf_add_transposed_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - cdef gdf_graph* g = graph + num_vert = G.num_vertices() data_type = np.float32 if g.transposedAdjList.edge_data: data_type = gdf_to_np_dtypes[g.transposedAdjList.edge_data.dtype] df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.int32)) + df['vertex'] = cudf.Series(np.zeros(num_vert, dtype=np.int32)) cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex']) - df['distance'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=data_type)) + df['distance'] = cudf.Series(np.zeros(num_vert, dtype=data_type)) cdef gdf_column c_distance_col = get_gdf_column_view(df['distance']) err = g.transposedAdjList.get_vertex_identifiers(&c_identifier_col) @@ -77,4 +79,3 @@ cpdef sssp(G, source): cudf.bindings.cudf_cpp.check_gdf_error(err) return df - From 839bb3c27f0f8a359df42444529ef4afd965c92f Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 19 Apr 2019 00:25:27 -0700 Subject: [PATCH 12/18] there were two implementations of num_vertices in class Graph, removed one. --- python/cugraph/graph/c_graph.pyx | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index 6ee6cb61b15..15a07543355 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -171,21 +171,6 @@ class Graph: c_value_col_ptr) cudf.bindings.cudf_cpp.check_gdf_error(err) - def num_vertices(self): - """ - Get the number of vertices in the graph - """ - cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph * g = graph - if g.adjList: - return g.adjList.offsets.size - 1 - elif g.transposedAdjList: - return g.transposedAdjList.offsets.size - 1 - else: - err = gdf_add_adj_list(g) - cudf.bindings.cudf_cpp.check_gdf_error(err) - return g.adjList.offsets.size - 1 - def view_edge_list(self): """ Display the edge list. Compute it if needed. @@ -451,10 +436,15 @@ class Graph: Get the number of vertices in the graph """ cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph* g = graph - err = gdf_add_adj_list(g) - cudf.bindings.cudf_cpp.check_gdf_error(err) - return g.adjList.offsets.size - 1 + cdef gdf_graph * g = graph + if g.adjList: + return g.adjList.offsets.size - 1 + elif g.transposedAdjList: + return g.transposedAdjList.offsets.size - 1 + else: + err = gdf_add_adj_list(g) + cudf.bindings.cudf_cpp.check_gdf_error(err) + return g.adjList.offsets.size - 1 def in_degree(self, vertex_subset = None): """ From 85adcd8dd773d7fb3864010ed3c0f5286dabe493 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 19 Apr 2019 00:29:45 -0700 Subject: [PATCH 13/18] there were two implementations of delete_adj_list in class Graph, removed one. --- python/cugraph/graph/c_graph.pyx | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index 15a07543355..63cafa582a2 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -375,6 +375,13 @@ class Graph: return cudf.Series(offsets_data), cudf.Series(indices_data) + def delete_transposed_adj_list(self): + """ + Delete the transposed adjacency list. + """ + cdef uintptr_t graph = self.graph_ptr + err = gdf_delete_transposed_adj_list( graph) + cudf.bindings.cudf_cpp.check_gdf_error(err) def get_two_hop_neighbors(self): """ @@ -415,22 +422,6 @@ class Graph: return df - def delete_adj_list(self): - """ - Delete the adjacency list. - """ - cdef uintptr_t graph = self.graph_ptr - err = gdf_delete_adj_list( graph) - cudf.bindings.cudf_cpp.check_gdf_error(err) - - def delete_transposed_adj_list(self): - """ - Delete the transposed adjacency list. - """ - cdef uintptr_t graph = self.graph_ptr - err = gdf_delete_transposed_adj_list( graph) - cudf.bindings.cudf_cpp.check_gdf_error(err) - def num_vertices(self): """ Get the number of vertices in the graph From ee30f6fb78e1259f0715cbac1d9bf3aa4f892152 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 19 Apr 2019 00:35:06 -0700 Subject: [PATCH 14/18] changed variable names in view_adj_list and view_transposed_adj_list so, view_transposed_adj_list can better mirror view_adj_list (except for replacing adjList with transposedAdjList) --- python/cugraph/graph/c_graph.pyx | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index 63cafa582a2..9edb7ec8d19 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -303,18 +303,18 @@ class Graph: err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - col_size_off = self.num_vertices() + 1 - col_size_ind = g.adjList.indices.size + offset_col_size = self.num_vertices() + 1 + index_col_size = g.adjList.indices.size cdef uintptr_t offset_col_data = g.adjList.offsets.data cdef uintptr_t index_col_data = g.adjList.indices.data offsets_data = rmm.device_array_from_ptr(offset_col_data, - nelem=col_size_off, + nelem=offset_col_size, dtype=np.int32) # , # finalizer=rmm._make_finalizer(offset_col_data, 0)) indices_data = rmm.device_array_from_ptr(index_col_data, - nelem=col_size_ind, + nelem=index_col_size, dtype=np.int32) # , # finalizer=rmm._make_finalizer(index_col_data, 0)) # g.adjList.offsets.data and g.adjList.indices.data are not owned by @@ -355,20 +355,20 @@ class Graph: err = gdf_add_transposed_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) - off_size = self.num_vertices() + 1 - ind_size = g.transposedAdjList.indices.size + offset_col_size = self.num_vertices() + 1 + inex_col_size = g.transposedAdjList.indices.size cdef uintptr_t offset_col_data = g.transposedAdjList.offsets.data - cdef uintptr_t indices_col_data = g.transposedAdjList.indices.data + cdef uintptr_t index_col_data = g.transposedAdjList.indices.data offsets_data = rmm.device_array_from_ptr(offset_col_data, - nelem=off_size, + nelem=offset_col_size, dtype=np.int32) # , - # finalizer=rmm._make_finalizer(offsets_col_data, 0)) - indices_data = rmm.device_array_from_ptr(indices_col_data, - nelem=ind_size, + # finalizer=rmm._make_finalizer(offset_col_data, 0)) + indices_data = rmm.device_array_from_ptr(index_col_data, + nelem=inex_col_size, dtype=np.int32) # , - # finalizer=rmm._make_finalizer(indices_col_data, 0)) + # finalizer=rmm._make_finalizer(index_col_data, 0)) # g.transposedAdjList.offsets.data and g.transposedAdjList.indices.data # are not owned by this instance, so should not be freed here (this # will lead to double free, and undefined behavior). From 2b96d97e56298aa4d1686f528e864541cbf72192 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 19 Apr 2019 00:52:29 -0700 Subject: [PATCH 15/18] removed unnecessary imports --- python/cugraph/bfs/bfs_wrapper.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cugraph/bfs/bfs_wrapper.pyx b/python/cugraph/bfs/bfs_wrapper.pyx index ac226469390..caa965d44e6 100644 --- a/python/cugraph/bfs/bfs_wrapper.pyx +++ b/python/cugraph/bfs/bfs_wrapper.pyx @@ -14,9 +14,7 @@ from c_bfs cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t -from libc.stdlib cimport calloc, malloc, free import cudf -from librmm_cffi import librmm as rmm #from pygdf import Column import numpy as np From ee1f0259c53dd2b242c366377b4dcf400e6d0360 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 19 Apr 2019 09:17:33 -0700 Subject: [PATCH 16/18] updated change log. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7d17641c67..e8856c14cde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ - PR #218 Update c_graph.pyx - PR #224 Update erroneous comments in overlap_wrapper.pyx, woverlap_wrapper.pyx, test_louvain.py, and spectral_clustering.pyx - PR #220 Fixed bugs in Nvgraph triangle counting +- PR #232 Fixed memory leaks in managing cudf columns. # cuGraph 0.6.0 (22 Mar 2019) From ab837dda7a1914a6ca45926cd85180a58446a618 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 19 Apr 2019 15:11:19 -0700 Subject: [PATCH 17/18] fixed typo in variable name --- python/cugraph/graph/c_graph.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index 9edb7ec8d19..172aae60999 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -356,7 +356,7 @@ class Graph: cudf.bindings.cudf_cpp.check_gdf_error(err) offset_col_size = self.num_vertices() + 1 - inex_col_size = g.transposedAdjList.indices.size + index_col_size = g.transposedAdjList.indices.size cdef uintptr_t offset_col_data = g.transposedAdjList.offsets.data cdef uintptr_t index_col_data = g.transposedAdjList.indices.data @@ -366,7 +366,7 @@ class Graph: dtype=np.int32) # , # finalizer=rmm._make_finalizer(offset_col_data, 0)) indices_data = rmm.device_array_from_ptr(index_col_data, - nelem=inex_col_size, + nelem=index_col_size, dtype=np.int32) # , # finalizer=rmm._make_finalizer(index_col_data, 0)) # g.transposedAdjList.offsets.data and g.transposedAdjList.indices.data From 65a63103e47a1e9264e775d6e8dca2e7ccf05dc1 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 22 Apr 2019 07:49:25 -0700 Subject: [PATCH 18/18] fixed a typo --- python/cugraph/jaccard/wjaccard_wrapper.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/jaccard/wjaccard_wrapper.pyx b/python/cugraph/jaccard/wjaccard_wrapper.pyx index cba0da8fc53..df04dce512a 100644 --- a/python/cugraph/jaccard/wjaccard_wrapper.pyx +++ b/python/cugraph/jaccard/wjaccard_wrapper.pyx @@ -83,7 +83,7 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None): cdef gdf_column c_weight_col cdef gdf_column c_first_col cdef gdf_column c_second_col - cdef gdf_column c_indices_col + cdef gdf_column c_index_col if type(first) == cudf.dataframe.series.Series and type(second) == cudf.dataframe.series.Series: resultSize = len(first)