From 97b92166da8d8f35b66ed91928bd64682e6aab80 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Tue, 16 Apr 2019 13:26:44 -0700
Subject: [PATCH 01/18] revmoed trailing spaces

---
 cpp/src/cugraph.cu | 118 ++++++++++++++++++++++-----------------------
 1 file changed, 59 insertions(+), 59 deletions(-)
diff --git a/cpp/src/cugraph.cu b/cpp/src/cugraph.cu
index 867f8981e58..8a22c9e7005 100644
--- a/cpp/src/cugraph.cu
+++ b/cpp/src/cugraph.cu
@@ -10,7 +10,7 @@
  *
  */
 
-// Graph analytics features 
+// Graph analytics features
 // Author: Alex Fender afender@nvidia.com
 
 #include <cugraph.h>
@@ -31,7 +31,7 @@ using Vector = thrust::device_vector<T, rmm_allocator<T>>;
 
 void gdf_col_delete(gdf_column* col) {
   if (col) {
-    col->size = 0; 
+    col->size = 0;
     if(col->data) {
       ALLOC_FREE_TRY(col->data, nullptr);
     }
@@ -62,13 +62,13 @@ void cpy_column_view(const gdf_column *in, gdf_column *out) {
   }
 }
 
-gdf_error gdf_adj_list_view(gdf_graph *graph, const gdf_column *offsets, 
+gdf_error gdf_adj_list_view(gdf_graph *graph, const gdf_column *offsets,
                                  const gdf_column *indices, const gdf_column *edge_data) {
-  GDF_REQUIRE( offsets->null_count == 0 , GDF_VALIDITY_UNSUPPORTED );                    
+  GDF_REQUIRE( offsets->null_count == 0 , GDF_VALIDITY_UNSUPPORTED );
   GDF_REQUIRE( indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED );
   GDF_REQUIRE( (offsets->dtype == indices->dtype), GDF_UNSUPPORTED_DTYPE );
   GDF_REQUIRE( ((offsets->dtype == GDF_INT32) || (offsets->dtype == GDF_INT64)), GDF_UNSUPPORTED_DTYPE );
-  GDF_REQUIRE( (offsets->size > 0), GDF_DATASET_EMPTY ); 
+  GDF_REQUIRE( (offsets->size > 0), GDF_DATASET_EMPTY );
   GDF_REQUIRE( (graph->adjList == nullptr) , GDF_INVALID_API_CALL);
 
   graph->adjList = new gdf_adj_list;
@@ -101,19 +101,19 @@ gdf_error gdf_adj_list::get_source_indices (gdf_column *src_indices) {
   GDF_REQUIRE( offsets->data != nullptr , GDF_INVALID_API_CALL);
   GDF_REQUIRE( src_indices->size == indices->size, GDF_COLUMN_SIZE_MISMATCH );
   GDF_REQUIRE( src_indices->dtype == indices->dtype, GDF_UNSUPPORTED_DTYPE );
-  GDF_REQUIRE( src_indices->size > 0, GDF_DATASET_EMPTY ); 
+  GDF_REQUIRE( src_indices->size > 0, GDF_DATASET_EMPTY );
   cugraph::offsets_to_indices<int>((int*)offsets->data, offsets->size-1, (int*)src_indices->data);
 
   return GDF_SUCCESS;
 }
 
-gdf_error gdf_edge_list_view(gdf_graph *graph, const gdf_column *src_indices, 
+gdf_error gdf_edge_list_view(gdf_graph *graph, const gdf_column *src_indices,
                                  const gdf_column *dest_indices, const gdf_column *edge_data) {
   GDF_REQUIRE( src_indices->size == dest_indices->size, GDF_COLUMN_SIZE_MISMATCH );
   GDF_REQUIRE( src_indices->dtype == dest_indices->dtype, GDF_UNSUPPORTED_DTYPE );
   GDF_REQUIRE( ((src_indices->dtype == GDF_INT32) || (src_indices->dtype == GDF_INT64)), GDF_UNSUPPORTED_DTYPE );
-  GDF_REQUIRE( src_indices->size > 0, GDF_DATASET_EMPTY ); 
-  GDF_REQUIRE( src_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED );                    
+  GDF_REQUIRE( src_indices->size > 0, GDF_DATASET_EMPTY );
+  GDF_REQUIRE( src_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED );
   GDF_REQUIRE( dest_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED );
   GDF_REQUIRE( graph->edgeList == nullptr , GDF_INVALID_API_CALL);
 
@@ -150,20 +150,20 @@ gdf_error gdf_add_adj_list_impl (gdf_graph *graph) {
 
       CSR_Result_Weighted<int,WT> adj_list;
       status = ConvertCOOtoCSR_weighted((int*)graph->edgeList->src_indices->data, (int*)graph->edgeList->dest_indices->data, (WT*)graph->edgeList->edge_data->data, nnz, adj_list);
-      
-      gdf_column_view(graph->adjList->offsets, adj_list.rowOffsets, 
+
+      gdf_column_view(graph->adjList->offsets, adj_list.rowOffsets,
                             nullptr, adj_list.size+1, graph->edgeList->src_indices->dtype);
-      gdf_column_view(graph->adjList->indices, adj_list.colIndices, 
+      gdf_column_view(graph->adjList->indices, adj_list.colIndices,
                             nullptr, adj_list.nnz, graph->edgeList->src_indices->dtype);
-      gdf_column_view(graph->adjList->edge_data, adj_list.edgeWeights, 
+      gdf_column_view(graph->adjList->edge_data, adj_list.edgeWeights,
                           nullptr, adj_list.nnz, graph->edgeList->edge_data->dtype);
     }
     else {
       CSR_Result<int> adj_list;
-      status = ConvertCOOtoCSR((int*)graph->edgeList->src_indices->data,(int*)graph->edgeList->dest_indices->data, nnz, adj_list);      
-      gdf_column_view(graph->adjList->offsets, adj_list.rowOffsets, 
+      status = ConvertCOOtoCSR((int*)graph->edgeList->src_indices->data,(int*)graph->edgeList->dest_indices->data, nnz, adj_list);
+      gdf_column_view(graph->adjList->offsets, adj_list.rowOffsets,
                             nullptr, adj_list.size+1, graph->edgeList->src_indices->dtype);
-      gdf_column_view(graph->adjList->indices, adj_list.colIndices, 
+      gdf_column_view(graph->adjList->indices, adj_list.colIndices,
                             nullptr, adj_list.nnz, graph->edgeList->src_indices->dtype);
     }
     if (status !=0) {
@@ -185,14 +185,14 @@ gdf_error gdf_add_edge_list (gdf_graph *graph) {
 
       CUDA_TRY(cudaMallocManaged ((void**)&d_src, sizeof(int) * graph->adjList->indices->size));
 
-      cugraph::offsets_to_indices<int>((int*)graph->adjList->offsets->data, 
-                                  graph->adjList->offsets->size-1, 
+      cugraph::offsets_to_indices<int>((int*)graph->adjList->offsets->data,
+                                  graph->adjList->offsets->size-1,
                                   (int*)d_src);
 
-      gdf_column_view(graph->edgeList->src_indices, d_src, 
+      gdf_column_view(graph->edgeList->src_indices, d_src,
                       nullptr, graph->adjList->indices->size, graph->adjList->indices->dtype);
       cpy_column_view(graph->adjList->indices, graph->edgeList->dest_indices);
-      
+
       if (graph->adjList->edge_data != nullptr) {
         graph->edgeList->edge_data = new gdf_column;
         cpy_column_view(graph->adjList->edge_data, graph->edgeList->edge_data);
@@ -211,25 +211,25 @@ gdf_error gdf_add_transposed_adj_list_impl (gdf_graph *graph) {
       graph->transposedAdjList->offsets = new gdf_column;
       graph->transposedAdjList->indices = new gdf_column;
       graph->transposedAdjList->ownership = 1;
-    
+
       if (graph->edgeList->edge_data) {
         graph->transposedAdjList->edge_data = new gdf_column;
         CSR_Result_Weighted<int,WT> adj_list;
         status = ConvertCOOtoCSR_weighted( (int*)graph->edgeList->dest_indices->data, (int*)graph->edgeList->src_indices->data, (WT*)graph->edgeList->edge_data->data, nnz, adj_list);
-        gdf_column_view(graph->transposedAdjList->offsets, adj_list.rowOffsets, 
+        gdf_column_view(graph->transposedAdjList->offsets, adj_list.rowOffsets,
                               nullptr, adj_list.size+1, graph->edgeList->src_indices->dtype);
-        gdf_column_view(graph->transposedAdjList->indices, adj_list.colIndices, 
+        gdf_column_view(graph->transposedAdjList->indices, adj_list.colIndices,
                               nullptr, adj_list.nnz, graph->edgeList->src_indices->dtype);
-        gdf_column_view(graph->transposedAdjList->edge_data, adj_list.edgeWeights, 
+        gdf_column_view(graph->transposedAdjList->edge_data, adj_list.edgeWeights,
                             nullptr, adj_list.nnz, graph->edgeList->edge_data->dtype);
       }
       else {
 
         CSR_Result<int> adj_list;
-        status = ConvertCOOtoCSR((int*)graph->edgeList->dest_indices->data, (int*)graph->edgeList->src_indices->data, nnz, adj_list);      
-        gdf_column_view(graph->transposedAdjList->offsets, adj_list.rowOffsets, 
+        status = ConvertCOOtoCSR((int*)graph->edgeList->dest_indices->data, (int*)graph->edgeList->src_indices->data, nnz, adj_list);
+        gdf_column_view(graph->transposedAdjList->offsets, adj_list.rowOffsets,
                               nullptr, adj_list.size+1, graph->edgeList->src_indices->dtype);
-        gdf_column_view(graph->transposedAdjList->indices, adj_list.colIndices, 
+        gdf_column_view(graph->transposedAdjList->indices, adj_list.colIndices,
                               nullptr, adj_list.nnz, graph->edgeList->src_indices->dtype);
       }
       if (status !=0) {
@@ -263,46 +263,46 @@ gdf_error gdf_degree_impl(int n, int e, gdf_column* col_ptr, gdf_column* degree,
     nblocks.x = min((e + nthreads.x - 1) / nthreads.x, CUDA_MAX_BLOCKS);
     nblocks.y = 1;
     nblocks.z = 1;
-    
+
     switch (col_ptr->dtype) {
       case GDF_INT32:   cugraph::degree_coo<int, float> <<<nblocks, nthreads>>>(n, e, static_cast<int*>(col_ptr->data), static_cast<int*>(degree->data));break;
       default: return GDF_UNSUPPORTED_DTYPE;
     }
   }
   return GDF_SUCCESS;
-} 
+}
 
 
 gdf_error gdf_degree(gdf_graph *graph, gdf_column *degree, int x) {
   // Calculates the degree of all nodes of the graph
   // x = 0: in+out degree
-  // x = 1: in-degree 
+  // x = 1: in-degree
   // x = 2: out-degree
   GDF_REQUIRE(graph->adjList != nullptr || graph->transposedAdjList != nullptr, GDF_INVALID_API_CALL);
-  int n; 
+  int n;
   int e;
   if(graph->adjList != nullptr) {
     n = graph->adjList->offsets->size -1;
     e = graph->adjList->indices->size;
   }
   else {
-    n = graph->transposedAdjList->offsets->size - 1; 
+    n = graph->transposedAdjList->offsets->size - 1;
     e = graph->transposedAdjList->indices->size;
-  } 
+  }
 
-  if(x!=1) { 
+  if(x!=1) {
     // Computes out-degree for x=0 and x=2
-    if(graph->adjList) 
+    if(graph->adjList)
       gdf_degree_impl(n, e, graph->adjList->offsets, degree, true);
-    else 
+    else
       gdf_degree_impl(n, e, graph->transposedAdjList->indices, degree, false);
   }
 
-  if(x!=2) { 
+  if(x!=2) {
     // Computes in-degree for x=0 and x=1
-    if(graph->adjList)  
+    if(graph->adjList)
       gdf_degree_impl(n, e, graph->adjList->indices, degree, false);
-    else  
+    else
       gdf_degree_impl(n, e, graph->transposedAdjList->offsets, degree, true);
   }
   return GDF_SUCCESS;
@@ -315,17 +315,17 @@ gdf_error gdf_pagerank_impl (gdf_graph *graph,
                       float tolerance = 1e-4, int max_iter = 200,
                       bool has_guess = false) {
   GDF_REQUIRE( graph->edgeList != nullptr, GDF_VALIDITY_UNSUPPORTED );
-  GDF_REQUIRE( graph->edgeList->src_indices->size == graph->edgeList->dest_indices->size, GDF_COLUMN_SIZE_MISMATCH ); 
-  GDF_REQUIRE( graph->edgeList->src_indices->dtype == graph->edgeList->dest_indices->dtype, GDF_UNSUPPORTED_DTYPE );  
-  GDF_REQUIRE( graph->edgeList->src_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED );                 
-  GDF_REQUIRE( graph->edgeList->dest_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED );  
-  GDF_REQUIRE( pagerank != nullptr , GDF_INVALID_API_CALL ); 
-  GDF_REQUIRE( pagerank->data != nullptr , GDF_INVALID_API_CALL ); 
-  GDF_REQUIRE( pagerank->null_count == 0 , GDF_VALIDITY_UNSUPPORTED );          
-  GDF_REQUIRE( pagerank->size > 0 , GDF_INVALID_API_CALL );         
+  GDF_REQUIRE( graph->edgeList->src_indices->size == graph->edgeList->dest_indices->size, GDF_COLUMN_SIZE_MISMATCH );
+  GDF_REQUIRE( graph->edgeList->src_indices->dtype == graph->edgeList->dest_indices->dtype, GDF_UNSUPPORTED_DTYPE );
+  GDF_REQUIRE( graph->edgeList->src_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED );
+  GDF_REQUIRE( graph->edgeList->dest_indices->null_count == 0 , GDF_VALIDITY_UNSUPPORTED );
+  GDF_REQUIRE( pagerank != nullptr , GDF_INVALID_API_CALL );
+  GDF_REQUIRE( pagerank->data != nullptr , GDF_INVALID_API_CALL );
+  GDF_REQUIRE( pagerank->null_count == 0 , GDF_VALIDITY_UNSUPPORTED );
+  GDF_REQUIRE( pagerank->size > 0 , GDF_INVALID_API_CALL );
 
   int m=pagerank->size, nnz = graph->edgeList->src_indices->size, status = 0;
-  WT *d_pr, *d_val = nullptr, *d_leaf_vector = nullptr; 
+  WT *d_pr, *d_val = nullptr, *d_leaf_vector = nullptr;
   WT res = 1.0;
   WT *residual = &res;
 
@@ -345,16 +345,16 @@ gdf_error gdf_pagerank_impl (gdf_graph *graph,
     cugraph::copy<WT>(m, (WT*)pagerank->data, d_pr);
   }
 
-  status = cugraph::pagerank<int,WT>( m,nnz, (int*)graph->transposedAdjList->offsets->data, (int*)graph->transposedAdjList->indices->data, 
+  status = cugraph::pagerank<int,WT>( m,nnz, (int*)graph->transposedAdjList->offsets->data, (int*)graph->transposedAdjList->indices->data,
     d_val, alpha, d_leaf_vector, false, tolerance, max_iter, d_pr, residual);
- 
+
   if (status !=0)
-    switch ( status ) { 
-      case -1: std::cerr<< "Error : bad parameters in Pagerank"<<std::endl; return GDF_CUDA_ERROR; 
-      case 1: std::cerr<< "Warning : Pagerank did not reached the desired tolerance"<<std::endl;  return GDF_CUDA_ERROR; 
-      default:  std::cerr<< "Pagerank failed"<<std::endl;  return GDF_CUDA_ERROR; 
-    }   
- 
+    switch ( status ) {
+      case -1: std::cerr<< "Error : bad parameters in Pagerank"<<std::endl; return GDF_CUDA_ERROR;
+      case 1: std::cerr<< "Warning : Pagerank did not reached the desired tolerance"<<std::endl;  return GDF_CUDA_ERROR;
+      default:  std::cerr<< "Pagerank failed"<<std::endl;  return GDF_CUDA_ERROR;
+    }
+
   cugraph::copy<WT>(m, d_pr, (WT*)pagerank->data);
 
   ALLOC_FREE_TRY(d_val, stream);
@@ -466,9 +466,9 @@ gdf_error gdf_louvain(gdf_graph *graph, void *final_modularity, void *num_level,
 
   void* offsets_ptr = graph->adjList->offsets->data;
   void* indices_ptr = graph->adjList->indices->data;
-  
+
   void* value_ptr;
-  Vector<float> d_values; 
+  Vector<float> d_values;
   if(graph->adjList->edge_data) {
       value_ptr = graph->adjList->edge_data->data;
   }
@@ -496,7 +496,7 @@ gdf_error gdf_louvain(gdf_graph *graph, void *final_modularity, void *num_level,
   cudaDataType_t index_type = gdf_to_cudadtype(graph->adjList->indices);
   cudaDataType_t val_type = graph->adjList->edge_data? gdf_to_cudadtype(graph->adjList->edge_data): CUDA_R_32F;
 
-  nvgraphLouvain(index_type, val_type, n, e, offsets_ptr, indices_ptr, value_ptr, 1, 0, NULL, 
+  nvgraphLouvain(index_type, val_type, n, e, offsets_ptr, indices_ptr, value_ptr, 1, 0, NULL,
                  final_modularity, louvain_parts_ptr, num_level);
   return GDF_SUCCESS;
 }

From e06c7432a6cef780d563e6c22482a7ae1293028c Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Tue, 16 Apr 2019 14:07:57 -0700
Subject: [PATCH 02/18] replaced heap allocation of temporary C++ gdf_column
 objects with stack allocation, these objects are only used to pass the data
 encapsulated in Python cudf Series objects to C++ functions expecting
 (pointers to) C++ gdf_column objects and sizeof(gdf_column) is not large
 enough to blow stack, no need to involve heap allocation overhead and risk
 memory leak (if forget to free).

---
 python/cugraph/graph/c_graph.pyx | 156 ++++++++++++++++---------------
 1 file changed, 81 insertions(+), 75 deletions(-)

diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx
index 190c56bfaf1..b96986ac377 100755
--- a/python/cugraph/graph/c_graph.pyx
+++ b/python/cugraph/graph/c_graph.pyx
@@ -23,6 +23,31 @@ import numpy as np
 dtypes = {np.int32: GDF_INT32, np.int64: GDF_INT64, np.float32: GDF_FLOAT32, np.float64: GDF_FLOAT64}
 
 
+cdef gdf_column get_gdf_column_view(col):
+    """
+    This function returns a C++ gdf_column object from the Python cudf Series
+    object by shallow copying. The returned C++ object is expected to be used
+    as a temporary variable to pass the column data encapsulated in the Python
+    cudf Series object to C++ functions expecting (pointers to) C++ gdf_column
+    objects.
+    """
+    cdef gdf_column c_col
+    cdef uintptr_t data_ptr = cudf.bindings.cudf_cpp.get_column_data_ptr(col._column)
+    # cdef uintptr_t valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column)
+    cdef gdf_dtype_extra_info c_extra_dtype_info = gdf_dtype_extra_info(time_unit=TIME_UNIT_NONE)
+
+    err = gdf_column_view_augmented(< gdf_column *> &c_col,
+                                    < void *> data_ptr,
+                                    < gdf_valid_type *> 0,
+                                    < gdf_size_type > len(col),
+                                    dtypes[col.dtype.type],
+                                    < gdf_size_type > col.null_count,
+                                    c_extra_dtype_info)
+    cudf.bindings.cudf_cpp.check_gdf_error(err)
+
+    return c_col
+
+
 cdef create_column(col):
     cdef gdf_column * c_col = < gdf_column *> malloc(sizeof(gdf_column))
     cdef uintptr_t data_ptr = cudf.bindings.cudf_cpp.get_column_data_ptr(col._column)
@@ -43,13 +68,6 @@ cdef create_column(col):
     return col_ptr
 
 
-cdef delete_column(col_ptr):
-    cdef uintptr_t col = col_ptr
-    cdef gdf_column * c_col = < gdf_column *> col
-    free(c_col)
-    return
-
-
 class Graph:
     """
     cuGraph graph class containing basic graph creation and transformation operations.
@@ -138,34 +156,30 @@ class Graph:
         # (if not None) to avoid garbage collection while they are still in use
         # inside this class. If copy is set to True, deep-copy the objects.
         if copy is False:
-            self.edge_list_source_col = source_col;
-            self.edge_list_dest_col = dest_col;
-            self.edge_list_value_col = value_col;
+            self.edge_list_source_col = source_col
+            self.edge_list_dest_col = dest_col
+            self.edge_list_value_col = value_col
         else:
-            self.edge_list_source_col = source_col.copy();
-            self.edge_list_dest_col = dest_col.copy();
-            self.edge_list_value_col = value_col.copy();
+            self.edge_list_source_col = source_col.copy()
+            self.edge_list_dest_col = dest_col.copy()
+            self.edge_list_value_col = value_col.copy()
 
         cdef uintptr_t graph = self.graph_ptr
-        cdef uintptr_t source = create_column(self.edge_list_source_col)
-        cdef uintptr_t dest = create_column(self.edge_list_dest_col)
-        cdef uintptr_t value
+        cdef gdf_column c_source_col = get_gdf_column_view(self.edge_list_source_col)
+        cdef gdf_column c_dest_col = get_gdf_column_view(self.edge_list_dest_col)
+        cdef gdf_column c_value_col
+        cdef gdf_column * c_value_col_ptr
         if value_col is None:
-            value = 0
+            c_value_col_ptr = NULL
         else:
-            value = create_column(self.edge_list_value_col)
+            c_value_col = get_gdf_column_view(self.edge_list_value_col)
+            c_value_col_ptr = &c_value_col
 
-        try:
-            err = gdf_edge_list_view(< gdf_graph *> graph,
-                                     < gdf_column *> source,
-                                     < gdf_column *> dest,
-                                     < gdf_column *> value)
-            cudf.bindings.cudf_cpp.check_gdf_error(err)
-        finally:
-            delete_column(source)
-            delete_column(dest)
-            if value is not 0:
-                delete_column(value)
+        err = gdf_edge_list_view(< gdf_graph *> graph,
+                                 &c_source_col,
+                                 &c_dest_col,
+                                 c_value_col_ptr)
+        cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     def num_vertices(self):
         """
@@ -280,34 +294,30 @@ class Graph:
         # still in use inside this class. If copy is set to True, deep-copy the
         # objects.
         if copy is False:
-            self.adj_list_offset_col = offset_col;
-            self.adj_list_index_col = index_col;
-            self.adj_list_value_col = value_col;
+            self.adj_list_offset_col = offset_col
+            self.adj_list_index_col = index_col
+            self.adj_list_value_col = value_col
         else:
-            self.adj_list_offset_col = offset_col.copy();
-            self.adj_list_index_col = index_col.copy();
-            self_adj_list_value_col = value_col.copy();
+            self.adj_list_offset_col = offset_col.copy()
+            self.adj_list_index_col = index_col.copy()
+            self_adj_list_value_col = value_col.copy()
 
         cdef uintptr_t graph = self.graph_ptr
-        cdef uintptr_t offsets = create_column(self.adj_list_offset_col)
-        cdef uintptr_t indices = create_column(self.adj_list_index_col)
-        cdef uintptr_t value
+        cdef gdf_column c_offset_col = get_gdf_column_view(self.adj_list_offset_col)
+        cdef gdf_column c_index_col = get_gdf_column_view(self.adj_list_index_col)
+        cdef gdf_column c_value_col
+        cdef gdf_column * c_value_col_ptr
         if value_col is None:
-            value = 0
+            c_value_col_ptr = NULL
         else:
-            value = create_column(self.adj_list_value_col)
+            c_value_col = get_gdf_column_view(self.adj_list_value_col)
+            c_value_col_ptr = &c_value_col
 
-        try:
-            err = gdf_adj_list_view(< gdf_graph *> graph,
-                                    < gdf_column *> offsets,
-                                    < gdf_column *> indices,
-                                    < gdf_column *> value)
-            cudf.bindings.cudf_cpp.check_gdf_error(err)
-        finally:
-            delete_column(offsets)
-            delete_column(indices)
-            if value is not 0:
-                delete_column(value)
+        err = gdf_adj_list_view(< gdf_graph *> graph,
+                                &c_offset_col,
+                                &c_index_col,
+                                c_value_col_ptr)
+        cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     def view_adj_list(self):
         """
@@ -404,32 +414,30 @@ class Graph:
         """
         cdef uintptr_t graph = self.graph_ptr
         cdef gdf_graph * g = < gdf_graph *> graph
-        cdef gdf_column * first = < gdf_column *> malloc(sizeof(gdf_column))
-        cdef gdf_column * second = < gdf_column *> malloc(sizeof(gdf_column))
-        err = gdf_get_two_hop_neighbors(g, first, second)
+        cdef gdf_column c_first_col
+        cdef gdf_column c_second_col
+        err = gdf_get_two_hop_neighbors(g, &c_first_col, &c_second_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
         df = cudf.DataFrame()
-        if first.dtype == GDF_INT32:
-            first_out = rmm.device_array_from_ptr(<uintptr_t>first.data, 
-                                                  nelem=first.size, 
+        if c_first_col.dtype == GDF_INT32:
+            first_out = rmm.device_array_from_ptr(<uintptr_t>c_first_col.data,
+                                                  nelem=c_first_col.size,
                                                   dtype=np.int32)
-            second_out = rmm.device_array_from_ptr(<uintptr_t>second.data, 
-                                                   nelem=second.size, 
+            second_out = rmm.device_array_from_ptr(<uintptr_t>c_second_col.data,
+                                                   nelem=c_second_col.size,
                                                    dtype=np.int32)
             df['first'] = first_out
             df['second'] = second_out
-        if first.dtype == GDF_INT64:
-            first_out = rmm.device_array_from_ptr(<uintptr_t>first.data, 
-                                                  nelem=first.size, 
+        if c_first_col.dtype == GDF_INT64:
+            first_out = rmm.device_array_from_ptr(<uintptr_t>c_first_col.data,
+                                                  nelem=c_first_col.size,
                                                   dtype=np.int64)
-            second_out = rmm.device_array_from_ptr(<uintptr_t>second.data, 
-                                                   nelem=second.size, 
+            second_out = rmm.device_array_from_ptr(<uintptr_t>c_second_col.data,
+                                                   nelem=c_second_col.size,
                                                    dtype=np.int64)
             df['first'] = first_out
             df['second'] = second_out
 
-        delete_column(<uintptr_t>first)
-        delete_column(<uintptr_t>second)
         return df
 
     def delete_adj_list(self):
@@ -510,7 +518,7 @@ class Graph:
         given by the specified vertex_subset.
 
         df['vertex']: The vertex ID of node (will be identical to vertex_subset if specified)
-        df['degree']: The computed out-degree of the corresponding vertex                    
+        df['degree']: The computed out-degree of the corresponding vertex
         Examples
         --------
         >>> import numpy as np
@@ -546,7 +554,7 @@ class Graph:
         given by the specified vertex_subset.
 
         df['vertex']: The vertex ID of node (will be identical to vertex_subset if specified)
-        df['degree']: The computed degree of the corresponding vertex            
+        df['degree']: The computed degree of the corresponding vertex
         Examples
         --------
         >>> import numpy as np
@@ -574,16 +582,16 @@ class Graph:
 
         df = cudf.DataFrame()
         vertex_col = cudf.Series(np.zeros(n, dtype=np.int32))
-        cdef uintptr_t identifier_ptr = create_column(vertex_col)
+        c_vertex_col = get_gdf_column_view(vertex_col)
         if g.adjList:
-            err = g.adjList.get_vertex_identifiers(<gdf_column*>identifier_ptr)
+            err = g.adjList.get_vertex_identifiers(&c_vertex_col)
         else:
-            err = g.transposedAdjList.get_vertex_identifiers(<gdf_column*>identifier_ptr)
+            err = g.transposedAdjList.get_vertex_identifiers(&c_vertex_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
         
         degree_col = cudf.Series(np.zeros(n, dtype=np.int32))
-        cdef uintptr_t degree_col_ptr = create_column(degree_col)
-        err = gdf_degree(g, <gdf_column*>degree_col_ptr, <int>x)
+        cdef gdf_column c_degree_col = get_gdf_column_view(degree_col)
+        err = gdf_degree(g, &c_degree_col, <int>x)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
         if vertex_subset is None:
@@ -595,6 +603,4 @@ class Graph:
             del vertex_col
             del degree_col
         
-        delete_column(identifier_ptr)
-        delete_column(degree_col_ptr)
         return df

From ff98880a30a35fe4edf4bcb45c483096b82a6f7e Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Tue, 16 Apr 2019 15:23:18 -0700
Subject: [PATCH 03/18] replaced create_column (heap-allocation for gdf_column)
 wiht get_gdf_column_view (stack-allocation for gdf_column)

---
 python/cugraph/bfs/bfs_wrapper.pyx            | 18 ++++-----
 python/cugraph/graph/c_graph.pyx              | 20 ----------
 python/cugraph/jaccard/jaccard_wrapper.pyx    | 28 +++++++-------
 python/cugraph/jaccard/wjaccard_wrapper.pyx   | 37 +++++++++----------
 python/cugraph/louvain/louvain_wrapper.pyx    | 10 ++---
 python/cugraph/overlap/overlap_wrapper.pyx    | 28 +++++++-------
 python/cugraph/overlap/woverlap_wrapper.pyx   | 37 +++++++++----------
 python/cugraph/pagerank/pagerank_wrapper.pyx  |  8 ++--
 .../spectral_clustering.pyx                   | 28 +++++++-------
 python/cugraph/sssp/sssp_wrapper.pyx          |  8 ++--
 10 files changed, 100 insertions(+), 122 deletions(-)

diff --git a/python/cugraph/bfs/bfs_wrapper.pyx b/python/cugraph/bfs/bfs_wrapper.pyx
index 615bbb33daf..348d0c62d6c 100644
--- a/python/cugraph/bfs/bfs_wrapper.pyx
+++ b/python/cugraph/bfs/bfs_wrapper.pyx
@@ -52,21 +52,21 @@ cpdef bfs(G, start, directed=True):
     >>> G.add_edge_list(sources,destinations,none)
     >>> dist, pred = cuGraph.bfs(G, 0, false)
     """
-    
+
     cdef uintptr_t graph = G.graph_ptr
     cdef gdf_graph* g = <gdf_graph*>graph
     num_verts = G.num_vertices()
-    
+
     df = cudf.DataFrame()
     df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32))
-    cdef uintptr_t vertex_ptr = create_column(df['vertex'])
+    cdef gdf_column c_vertex_col = get_gdf_column_view(df['vertex'])
     df['distance'] = cudf.Series(np.zeros(num_verts, dtype=np.int32))
-    cdef uintptr_t distances_ptr = create_column(df['distance'])
+    cdef gdf_column c_distances_col = get_gdf_column_view(df['distance'])
     df['predecessor'] = cudf.Series(np.zeros(num_verts, dtype=np.int32))
-    cdef uintptr_t predecessors_ptr = create_column(df['predecessor'])
-    
-    err = g.adjList.get_vertex_identifiers(<gdf_column*>vertex_ptr)
+    cdef gdf_column c_predecessors_col = get_gdf_column_view(df['predecessor'])
+
+    err = g.adjList.get_vertex_identifiers(&c_vertex_col)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
-    
-    gdf_bfs(<gdf_graph*>g, <gdf_column*>distances_ptr, <gdf_column*>predecessors_ptr, <int>start, <bool>directed)
+
+    gdf_bfs(g, &c_distances_col, &c_predecessors_col, <int>start, <bool>directed)
     return df
diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx
index b96986ac377..0e43084873b 100755
--- a/python/cugraph/graph/c_graph.pyx
+++ b/python/cugraph/graph/c_graph.pyx
@@ -48,26 +48,6 @@ cdef gdf_column get_gdf_column_view(col):
     return c_col
 
 
-cdef create_column(col):
-    cdef gdf_column * c_col = < gdf_column *> malloc(sizeof(gdf_column))
-    cdef uintptr_t data_ptr = cudf.bindings.cudf_cpp.get_column_data_ptr(col._column)
-    # cdef uintptr_t valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column)
-    cdef gdf_dtype_extra_info c_extra_dtype_info = gdf_dtype_extra_info(time_unit=TIME_UNIT_NONE)
-
-
-    err = gdf_column_view_augmented(< gdf_column *> c_col,
-                                    < void *> data_ptr,
-                                    < gdf_valid_type *> 0,
-                                    < gdf_size_type > len(col),
-                                    dtypes[col.dtype.type],
-                                    < gdf_size_type > col.null_count,
-                                    c_extra_dtype_info)
-    cudf.bindings.cudf_cpp.check_gdf_error(err)
-
-    cdef uintptr_t col_ptr = < uintptr_t > c_col
-    return col_ptr
-
-
 class Graph:
     """
     cuGraph graph class containing basic graph creation and transformation operations.
diff --git a/python/cugraph/jaccard/jaccard_wrapper.pyx b/python/cugraph/jaccard/jaccard_wrapper.pyx
index 17f021027b4..c602d8db414 100644
--- a/python/cugraph/jaccard/jaccard_wrapper.pyx
+++ b/python/cugraph/jaccard/jaccard_wrapper.pyx
@@ -73,22 +73,22 @@ cpdef jaccard(input_graph, first=None, second=None):
     err = gdf_add_adj_list(< gdf_graph *> graph)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-    cdef uintptr_t result_ptr
-    cdef uintptr_t first_ptr
-    cdef uintptr_t second_ptr
-    cdef uintptr_t src_indices_ptr
+    cdef gdf_column c_result_col
+    cdef gdf_column c_first_col
+    cdef gdf_column c_second_col
+    cdef gdf_column c_src_index_col
 
     if type(first) == cudf.dataframe.series.Series and type(second) == cudf.dataframe.series.Series:
         resultSize = len(first)
         result = cudf.Series(np.ones(resultSize, dtype=np.float32))
-        result_ptr = create_column(result)
-        first_ptr = create_column(first)
-        second_ptr = create_column(second)
+        c_result_col = get_gdf_column_view(result)
+        c_first_col = get_gdf_column_view(first)
+        c_second_col = get_gdf_column_view(second)
         err = gdf_jaccard_list(g,
                                < gdf_column *> NULL,
-                               < gdf_column *> first_ptr,
-                               < gdf_column *> second_ptr,
-                               < gdf_column *> result_ptr)
+                               &c_first_col,
+                               &c_second_col,
+                               &c_result_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
         df = cudf.DataFrame()
         df['source'] = first
@@ -99,9 +99,9 @@ cpdef jaccard(input_graph, first=None, second=None):
     elif first is None and second is None:
         e = g.adjList.indices.size
         result = cudf.Series(np.ones(e, dtype=np.float32), nan_as_null=False)
-        result_ptr = create_column(result)
+        c_result_col = get_gdf_column_view(result)
 
-        err = gdf_jaccard(g, < gdf_column *> NULL, < gdf_column *> result_ptr)
+        err = gdf_jaccard(g, < gdf_column *> NULL, &c_result_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
         dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data,
@@ -109,8 +109,8 @@ cpdef jaccard(input_graph, first=None, second=None):
                                             dtype=gdf_to_np_dtypes[g.adjList.indices.dtype])
         df = cudf.DataFrame()
         df['source'] = cudf.Series(np.zeros(e, dtype=gdf_to_np_dtypes[g.adjList.indices.dtype]))
-        src_indices_ptr = create_column(df['source']) 
-        err = g.adjList.get_source_indices(< gdf_column *> src_indices_ptr);
+        c_src_index_col = get_gdf_column_view(df['source'])
+        err = g.adjList.get_source_indices(&c_src_index_col);
         cudf.bindings.cudf_cpp.check_gdf_error(err)
         df['destination'] = cudf.Series(dest_data)
         df['jaccard_coeff'] = result
diff --git a/python/cugraph/jaccard/wjaccard_wrapper.pyx b/python/cugraph/jaccard/wjaccard_wrapper.pyx
index 7dddcbf6fd3..f0f61f79d54 100644
--- a/python/cugraph/jaccard/wjaccard_wrapper.pyx
+++ b/python/cugraph/jaccard/wjaccard_wrapper.pyx
@@ -79,24 +79,24 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None):
     err = gdf_add_adj_list(g)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-    cdef uintptr_t result_ptr
-    cdef uintptr_t weight_ptr
-    cdef uintptr_t first_ptr
-    cdef uintptr_t second_ptr
-    cdef uintptr_t indices_ptr
+    cdef gdf_column c_result_col
+    cdef gdf_column c_weight_col
+    cdef gdf_column c_first_col
+    cdef gdf_column c_second_col
+    cdef gdf_column c_indices_col
 
     if type(first) == cudf.dataframe.series.Series and type(second) == cudf.dataframe.series.Series:
         resultSize = len(first)
         result = cudf.Series(np.ones(resultSize, dtype=np.float32))
-        result_ptr = create_column(result)
-        weight_ptr = create_column(weights)
-        first_ptr = create_column(first)
-        second_ptr = create_column(second)
+        c_result_col = get_gdf_column_view(result)
+        c_weight_col = get_gdf_column_view(weights)
+        c_first_col = get_gdf_column_view(first)
+        c_second_col = get_gdf_column_view(second)
         err = gdf_jaccard_list(g,
-                               < gdf_column *> weight_ptr,
-                               < gdf_column *> first_ptr,
-                               < gdf_column *> second_ptr,
-                               < gdf_column *> result_ptr)
+                               &c_weight_col,
+                               &c_first_col,
+                               &c_second_col,
+                               &c_result_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
         df = cudf.DataFrame()
         df['source'] = first
@@ -107,10 +107,10 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None):
     elif first is None and second is None:
         resultSize = g.adjList.indices.size
         result = cudf.Series(np.ones(resultSize, dtype=np.float32))
-        result_ptr = create_column(result)
-        weight_ptr = create_column(weights)
+        c_result_col = get_gdf_column_view(result)
+        c_weight_col = get_gdf_column_view(weights)
 
-        err = gdf_jaccard(g, < gdf_column *> weight_ptr, < gdf_column *> result_ptr)
+        err = gdf_jaccard(g, &c_weight_col, &c_result_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
         dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data,
@@ -118,8 +118,8 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None):
                                             dtype=gdf_to_np_dtypes[g.adjList.indices.dtype])
         df = cudf.DataFrame()
         df['source'] = cudf.Series(np.zeros(resultSize, dtype=gdf_to_np_dtypes[g.adjList.indices.dtype]))
-        indices_ptr = create_column(df['source']) 
-        err = g.adjList.get_source_indices(< gdf_column *> indices_ptr);
+        c_index_col = get_gdf_column_view(df['source']) 
+        err = g.adjList.get_source_indices(&c_index_col);
         cudf.bindings.cudf_cpp.check_gdf_error(err)
         df['destination'] = cudf.Series(dest_data)
         df['jaccard_coeff'] = result
@@ -127,4 +127,3 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None):
         return df
 
     raise ValueError("Specify first and second or neither")
-
diff --git a/python/cugraph/louvain/louvain_wrapper.pyx b/python/cugraph/louvain/louvain_wrapper.pyx
index aff1e3399ee..3b0db53e8bc 100644
--- a/python/cugraph/louvain/louvain_wrapper.pyx
+++ b/python/cugraph/louvain/louvain_wrapper.pyx
@@ -56,16 +56,16 @@ cpdef nvLouvain(input_graph):
 
     df = cudf.DataFrame()
     df['vertex'] = cudf.Series(np.zeros(n, dtype=np.int32))
-    cdef uintptr_t identifier_ptr = create_column(df['vertex'])
-    err = g.adjList.get_vertex_identifiers(<gdf_column*>identifier_ptr)
+    cdef gdf_column c_index_col = get_gdf_column_view(df['vertex'])
+    err = g.adjList.get_vertex_identifiers(&c_index_col)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
     
     df['partition'] = cudf.Series(np.zeros(n,dtype=np.int32))
-    cdef uintptr_t louvain_parts_col_ptr = create_column(df['partition'])
+    cdef gdf_column c_louvain_parts_col = get_gdf_column_view(df['partition'])
     cdef double final_modularity = 1.0
     cdef int num_level
 
-    err = gdf_louvain(<gdf_graph*>g, <void*>&final_modularity, <void*>&num_level, <gdf_column*>louvain_parts_col_ptr)
+    err = gdf_louvain(<gdf_graph*>g, <void*>&final_modularity, <void*>&num_level, &c_louvain_parts_col)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     cdef double fm = final_modularity
@@ -75,4 +75,4 @@ cpdef nvLouvain(input_graph):
             fm = tmp
     else:
         fm = tmp
-    return df, fm                                      
+    return df, fm
diff --git a/python/cugraph/overlap/overlap_wrapper.pyx b/python/cugraph/overlap/overlap_wrapper.pyx
index f216075b37a..ecad68ea939 100644
--- a/python/cugraph/overlap/overlap_wrapper.pyx
+++ b/python/cugraph/overlap/overlap_wrapper.pyx
@@ -73,22 +73,22 @@ cpdef overlap(input_graph, first=None, second=None):
     err = gdf_add_adj_list(< gdf_graph *> graph)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-    cdef uintptr_t result_ptr
-    cdef uintptr_t first_ptr
-    cdef uintptr_t second_ptr
-    cdef uintptr_t src_indices_ptr
+    cdef gdf_column c_result_col
+    cdef gdf_column c_first_col
+    cdef gdf_column c_second_col
+    cdef gdf_column c_src_index_col
 
     if type(first) == cudf.dataframe.series.Series and type(second) == cudf.dataframe.series.Series:
         resultSize = len(first)
         result = cudf.Series(np.ones(resultSize, dtype=np.float32))
-        result_ptr = create_column(result)
-        first_ptr = create_column(first)
-        second_ptr = create_column(second)
+        c_result_col = get_gdf_column_view(result)
+        c_first_col = get_gdf_column_view(first)
+        c_second_col = get_gdf_column_view(second)
         err = gdf_overlap_list(g,
                                < gdf_column *> NULL,
-                               < gdf_column *> first_ptr,
-                               < gdf_column *> second_ptr,
-                               < gdf_column *> result_ptr)
+                               &c_first_col,
+                               &c_second_col,
+                               &c_result_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
         df = cudf.DataFrame()
         df['source'] = first
@@ -99,9 +99,9 @@ cpdef overlap(input_graph, first=None, second=None):
     elif first is None and second is None:
         e = g.adjList.indices.size
         result = cudf.Series(np.ones(e, dtype=np.float32), nan_as_null=False)
-        result_ptr = create_column(result)
+        c_result_col = get_gdf_column_view(result)
 
-        err = gdf_overlap(g, < gdf_column *> NULL, < gdf_column *> result_ptr)
+        err = gdf_overlap(g, < gdf_column *> NULL, &c_result_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
         dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data,
@@ -109,8 +109,8 @@ cpdef overlap(input_graph, first=None, second=None):
                                             dtype=gdf_to_np_dtypes[g.adjList.indices.dtype])
         df = cudf.DataFrame()
         df['source'] = cudf.Series(np.zeros(e, dtype=gdf_to_np_dtypes[g.adjList.indices.dtype]))
-        src_indices_ptr = create_column(df['source']) 
-        err = g.adjList.get_source_indices(< gdf_column *> src_indices_ptr);
+        c_src_index_col = get_gdf_column_view(df['source']) 
+        err = g.adjList.get_source_indices(&c_src_index_col);
         cudf.bindings.cudf_cpp.check_gdf_error(err)
         df['destination'] = cudf.Series(dest_data)
         df['overlap_coeff'] = result
diff --git a/python/cugraph/overlap/woverlap_wrapper.pyx b/python/cugraph/overlap/woverlap_wrapper.pyx
index 6307a964f87..c55a48d147a 100644
--- a/python/cugraph/overlap/woverlap_wrapper.pyx
+++ b/python/cugraph/overlap/woverlap_wrapper.pyx
@@ -79,24 +79,24 @@ cpdef overlap_w(input_graph, weights, first=None, second=None):
     err = gdf_add_adj_list(g)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-    cdef uintptr_t result_ptr
-    cdef uintptr_t weight_ptr
-    cdef uintptr_t first_ptr
-    cdef uintptr_t second_ptr
-    cdef uintptr_t indices_ptr
+    cdef gdf_column c_result_col
+    cdef gdf_column c_weight_col
+    cdef gdf_column c_first_col
+    cdef gdf_column c_second_col
+    cdef gdf_column c_index_col
 
     if type(first) == cudf.dataframe.series.Series and type(second) == cudf.dataframe.series.Series:
         resultSize = len(first)
         result = cudf.Series(np.ones(resultSize, dtype=np.float32))
-        result_ptr = create_column(result)
-        weight_ptr = create_column(weights)
-        first_ptr = create_column(first)
-        second_ptr = create_column(second)
+        c_result_col = get_gdf_column_view(result)
+        c_weight_col = get_gdf_column_view(weights)
+        c_first_col = get_gdf_column_view(first)
+        c_second_col = get_gdf_column_view(second)
         err = gdf_overlap_list(g,
-                               < gdf_column *> weight_ptr,
-                               < gdf_column *> first_ptr,
-                               < gdf_column *> second_ptr,
-                               < gdf_column *> result_ptr)
+                               &c_weight_col,
+                               &c_first_col,
+                               &c_second_col,
+                               &c_result_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
         df = cudf.DataFrame()
         df['source'] = first
@@ -107,10 +107,10 @@ cpdef overlap_w(input_graph, weights, first=None, second=None):
     elif first is None and second is None:
         resultSize = g.adjList.indices.size
         result = cudf.Series(np.ones(resultSize, dtype=np.float32))
-        result_ptr = create_column(result)
-        weight_ptr = create_column(weights)
+        c_result_col = get_gdf_column_view(result)
+        c_weight_col = get_gdf_column_view(weights)
 
-        err = gdf_overlap(g, < gdf_column *> weight_ptr, < gdf_column *> result_ptr)
+        err = gdf_overlap(g, &c_weight_col, &c_result_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
         dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data,
@@ -118,8 +118,8 @@ cpdef overlap_w(input_graph, weights, first=None, second=None):
                                             dtype=gdf_to_np_dtypes[g.adjList.indices.dtype])
         df = cudf.DataFrame()
         df['source'] = cudf.Series(np.zeros(resultSize, dtype=gdf_to_np_dtypes[g.adjList.indices.dtype]))
-        indices_ptr = create_column(df['source']) 
-        err = g.adjList.get_source_indices(< gdf_column *> indices_ptr);
+        c_index_col = get_gdf_column_view(df['source']) 
+        err = g.adjList.get_source_indices(&c_index_col);
         cudf.bindings.cudf_cpp.check_gdf_error(err)
         df['destination'] = cudf.Series(dest_data)
         df['overlap_coeff'] = result
@@ -127,4 +127,3 @@ cpdef overlap_w(input_graph, weights, first=None, second=None):
         return df
 
     raise ValueError("Specify first and second or neither")
-
diff --git a/python/cugraph/pagerank/pagerank_wrapper.pyx b/python/cugraph/pagerank/pagerank_wrapper.pyx
index d2d8371ec05..821ceee1d22 100755
--- a/python/cugraph/pagerank/pagerank_wrapper.pyx
+++ b/python/cugraph/pagerank/pagerank_wrapper.pyx
@@ -65,13 +65,13 @@ cpdef pagerank(G,alpha=0.85, max_iter=100, tol=1.0e-5):
     cdef gdf_graph* g = <gdf_graph*>graph
     df = cudf.DataFrame()  
     df['vertex'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.int32))
-    cdef uintptr_t identifier_ptr = create_column(df['vertex']) 
+    cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex']) 
     df['pagerank'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.float32))
-    cdef uintptr_t pagerank_ptr = create_column(df['pagerank'])    
+    cdef gdf_column c_pagerank_col = get_gdf_column_view(df['pagerank'])    
 
-    err = g.transposedAdjList.get_vertex_identifiers(<gdf_column*>identifier_ptr)
+    err = g.transposedAdjList.get_vertex_identifiers(&c_identifier_col)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
-    err = gdf_pagerank(<gdf_graph*>graph, <gdf_column*>pagerank_ptr, <float> alpha, <float> tol, <int> max_iter, <bool> 0)
+    err = gdf_pagerank(g, &c_pagerank_col, <float> alpha, <float> tol, <int> max_iter, <bool> 0)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     return df
diff --git a/python/cugraph/spectral_clustering/spectral_clustering.pyx b/python/cugraph/spectral_clustering/spectral_clustering.pyx
index e8af4f27139..1491692385e 100644
--- a/python/cugraph/spectral_clustering/spectral_clustering.pyx
+++ b/python/cugraph/spectral_clustering/spectral_clustering.pyx
@@ -78,12 +78,12 @@ cpdef spectralBalancedCutClustering(G,
     # Create the output dataframe
     df = cudf.DataFrame()
     df['vertex'] = cudf.Series(np.zeros(num_vert, dtype=np.int32))
-    cdef uintptr_t identifier_ptr = create_column(df['vertex'])
+    cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex'])
     df['cluster'] = cudf.Series(np.zeros(num_vert, dtype=np.int32))
-    cdef uintptr_t cluster_ptr = create_column(df['cluster'])
+    cdef gdf_column c_cluster_col = get_gdf_column_view(df['cluster'])
     
     # Set the vertex identifiers
-    err = g.adjList.get_vertex_identifiers(< gdf_column *> identifier_ptr)
+    err = g.adjList.get_vertex_identifiers(&c_identifier_col)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     err = gdf_balancedCutClustering_nvgraph(g,
@@ -93,7 +93,7 @@ cpdef spectralBalancedCutClustering(G,
                                             evs_max_iter,
                                             kmean_tolerance,
                                             kmean_max_iter,
-                                            < gdf_column *> cluster_ptr)
+                                            &c_cluster_col)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     return df
@@ -155,12 +155,12 @@ cpdef spectralModularityMaximizationClustering(G,
     # Create the output dataframe
     df = cudf.DataFrame()
     df['vertex'] = cudf.Series(np.zeros(num_vert, dtype=np.int32))
-    cdef uintptr_t identifier_ptr = create_column(df['vertex'])
+    cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex'])
     df['cluster'] = cudf.Series(np.zeros(num_vert, dtype=np.int32))
-    cdef uintptr_t cluster_ptr = create_column(df['cluster'])
+    cdef gdf_column c_cluster_col = get_gdf_column_view(df['cluster'])
     
     # Set the vertex identifiers
-    err = g.adjList.get_vertex_identifiers(< gdf_column *> identifier_ptr)
+    err = g.adjList.get_vertex_identifiers(&c_identifier_col)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     err = gdf_spectralModularityMaximization_nvgraph(g,
@@ -170,7 +170,7 @@ cpdef spectralModularityMaximizationClustering(G,
                                                      evs_max_iter,
                                                      kmean_tolerance,
                                                      kmean_max_iter,
-                                                     < gdf_column *> cluster_ptr)
+                                                     &c_cluster_col)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     return df
@@ -209,9 +209,9 @@ cpdef analyzeClustering_modularity(G, n_clusters, clustering):
     err = gdf_add_adj_list(g)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
     
-    cdef uintptr_t clustering_ptr = create_column(clustering)
+    cdef gdf_column c_clustering_col = get_gdf_column_view(clustering)
     cdef float score
-    err = gdf_AnalyzeClustering_modularity_nvgraph(g, n_clusters, <gdf_column*>clustering_ptr, &score)
+    err = gdf_AnalyzeClustering_modularity_nvgraph(g, n_clusters, &c_clustering_col, &score)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
     return score
 
@@ -249,9 +249,9 @@ cpdef analyzeClustering_edge_cut(G, n_clusters, clustering):
     err = gdf_add_adj_list(g)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
     
-    cdef uintptr_t clustering_ptr = create_column(clustering)
+    cdef gdf_column c_clustering_col = get_gdf_column_view(clustering)
     cdef float score
-    err = gdf_AnalyzeClustering_edge_cut_nvgraph(g, n_clusters, <gdf_column*>clustering_ptr, &score)
+    err = gdf_AnalyzeClustering_edge_cut_nvgraph(g, n_clusters, &c_clustering_col, &score)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
     return score
 
@@ -289,8 +289,8 @@ cpdef analyzeClustering_ratio_cut(G, n_clusters, clustering):
     err = gdf_add_adj_list(g)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
     
-    cdef uintptr_t clustering_ptr = create_column(clustering)
+    cdef gdf_column c_clustering_col = get_gdf_column_view(clustering)
     cdef float score
-    err = gdf_AnalyzeClustering_ratio_cut_nvgraph(g, n_clusters, <gdf_column*>clustering_ptr, &score)
+    err = gdf_AnalyzeClustering_ratio_cut_nvgraph(g, n_clusters, &c_clustering_col, &score)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
     return score
diff --git a/python/cugraph/sssp/sssp_wrapper.pyx b/python/cugraph/sssp/sssp_wrapper.pyx
index e0f8fd8b13e..439b09b7cb9 100644
--- a/python/cugraph/sssp/sssp_wrapper.pyx
+++ b/python/cugraph/sssp/sssp_wrapper.pyx
@@ -64,16 +64,16 @@ cpdef sssp(G, source):
 
     df = cudf.DataFrame()
     df['vertex'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.int32))
-    cdef uintptr_t identifier_ptr = create_column(df['vertex'])
+    cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex'])
     df['distance'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=data_type))
-    cdef uintptr_t distance_ptr = create_column(df['distance'])
+    cdef gdf_column c_distance_col = get_gdf_column_view(df['distance'])
 
-    err = g.transposedAdjList.get_vertex_identifiers(<gdf_column*>identifier_ptr)
+    err = g.transposedAdjList.get_vertex_identifiers(&c_identifier_col)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     cdef int[1] sources
     sources[0] = source
-    err = gdf_sssp_nvgraph(<gdf_graph*>graph, sources, <gdf_column*>distance_ptr)
+    err = gdf_sssp_nvgraph(g, sources, &c_distance_col)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     return df

From b08841465b159ffd7b81e2fdef88f867eb8aa6cc Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Tue, 16 Apr 2019 15:34:59 -0700
Subject: [PATCH 04/18] ReadMtxFile is renamed to read_mtx_file, but the
 comments were still using ReadMtxFile, fixed this.

---
 python/cugraph/bfs/bfs_wrapper.pyx                     |  2 +-
 python/cugraph/jaccard/jaccard_wrapper.pyx             |  2 +-
 python/cugraph/jaccard/wjaccard_wrapper.pyx            |  2 +-
 python/cugraph/louvain/louvain_wrapper.pyx             |  2 +-
 python/cugraph/overlap/overlap_wrapper.pyx             |  2 +-
 python/cugraph/overlap/woverlap_wrapper.pyx            |  2 +-
 python/cugraph/pagerank/pagerank_wrapper.pyx           |  2 +-
 .../spectral_clustering/spectral_clustering.pyx        | 10 +++++-----
 python/cugraph/sssp/sssp_wrapper.pyx                   |  2 +-
 .../cugraph/triangle_count/triangle_count_wrapper.pyx  |  2 +-
 10 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/python/cugraph/bfs/bfs_wrapper.pyx b/python/cugraph/bfs/bfs_wrapper.pyx
index 348d0c62d6c..ac226469390 100644
--- a/python/cugraph/bfs/bfs_wrapper.pyx
+++ b/python/cugraph/bfs/bfs_wrapper.pyx
@@ -45,7 +45,7 @@ cpdef bfs(G, start, directed=True):
         
     Examples
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
diff --git a/python/cugraph/jaccard/jaccard_wrapper.pyx b/python/cugraph/jaccard/jaccard_wrapper.pyx
index c602d8db414..74fe01ed649 100644
--- a/python/cugraph/jaccard/jaccard_wrapper.pyx
+++ b/python/cugraph/jaccard/jaccard_wrapper.pyx
@@ -60,7 +60,7 @@ cpdef jaccard(input_graph, first=None, second=None):
  
     Examples
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
diff --git a/python/cugraph/jaccard/wjaccard_wrapper.pyx b/python/cugraph/jaccard/wjaccard_wrapper.pyx
index f0f61f79d54..52ff299bd8a 100644
--- a/python/cugraph/jaccard/wjaccard_wrapper.pyx
+++ b/python/cugraph/jaccard/wjaccard_wrapper.pyx
@@ -65,7 +65,7 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None):
           the source and destination vertices. 
     Examples
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
diff --git a/python/cugraph/louvain/louvain_wrapper.pyx b/python/cugraph/louvain/louvain_wrapper.pyx
index 3b0db53e8bc..8393e464cb7 100644
--- a/python/cugraph/louvain/louvain_wrapper.pyx
+++ b/python/cugraph/louvain/louvain_wrapper.pyx
@@ -38,7 +38,7 @@ cpdef nvLouvain(input_graph):
  
     Examples
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
diff --git a/python/cugraph/overlap/overlap_wrapper.pyx b/python/cugraph/overlap/overlap_wrapper.pyx
index ecad68ea939..41f628edf37 100644
--- a/python/cugraph/overlap/overlap_wrapper.pyx
+++ b/python/cugraph/overlap/overlap_wrapper.pyx
@@ -60,7 +60,7 @@ cpdef overlap(input_graph, first=None, second=None):
  
     Examples
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
diff --git a/python/cugraph/overlap/woverlap_wrapper.pyx b/python/cugraph/overlap/woverlap_wrapper.pyx
index c55a48d147a..93194f5b96c 100644
--- a/python/cugraph/overlap/woverlap_wrapper.pyx
+++ b/python/cugraph/overlap/woverlap_wrapper.pyx
@@ -65,7 +65,7 @@ cpdef overlap_w(input_graph, weights, first=None, second=None):
           the source and destination vertices. 
     Examples
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
diff --git a/python/cugraph/pagerank/pagerank_wrapper.pyx b/python/cugraph/pagerank/pagerank_wrapper.pyx
index 821ceee1d22..c48af92bebe 100755
--- a/python/cugraph/pagerank/pagerank_wrapper.pyx
+++ b/python/cugraph/pagerank/pagerank_wrapper.pyx
@@ -50,7 +50,7 @@ cpdef pagerank(G,alpha=0.85, max_iter=100, tol=1.0e-5):
 
     Examples
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
diff --git a/python/cugraph/spectral_clustering/spectral_clustering.pyx b/python/cugraph/spectral_clustering/spectral_clustering.pyx
index 1491692385e..674faeb8034 100644
--- a/python/cugraph/spectral_clustering/spectral_clustering.pyx
+++ b/python/cugraph/spectral_clustering/spectral_clustering.pyx
@@ -58,7 +58,7 @@ cpdef spectralBalancedCutClustering(G,
         
     Example:
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
@@ -135,7 +135,7 @@ cpdef spectralModularityMaximizationClustering(G,
         
     Example:
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
@@ -194,7 +194,7 @@ cpdef analyzeClustering_modularity(G, n_clusters, clustering):
         
     Example:
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
@@ -234,7 +234,7 @@ cpdef analyzeClustering_edge_cut(G, n_clusters, clustering):
         
     Example:
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
@@ -274,7 +274,7 @@ cpdef analyzeClustering_ratio_cut(G, n_clusters, clustering):
         
     Example:
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
diff --git a/python/cugraph/sssp/sssp_wrapper.pyx b/python/cugraph/sssp/sssp_wrapper.pyx
index 439b09b7cb9..8c3bcff536a 100644
--- a/python/cugraph/sssp/sssp_wrapper.pyx
+++ b/python/cugraph/sssp/sssp_wrapper.pyx
@@ -44,7 +44,7 @@ cpdef sssp(G, source):
     
     Examples
     --------
-    >>> M = ReadMtxFile(graph_file)
+    >>> M = read_mtx_file(graph_file)
     >>> sources = cudf.Series(M.row)
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
diff --git a/python/cugraph/triangle_count/triangle_count_wrapper.pyx b/python/cugraph/triangle_count/triangle_count_wrapper.pyx
index bcddaf3524c..e90ccf4ff6d 100644
--- a/python/cugraph/triangle_count/triangle_count_wrapper.pyx
+++ b/python/cugraph/triangle_count/triangle_count_wrapper.pyx
@@ -37,7 +37,7 @@ cpdef triangles(input_graph):
       
     Example
     -------
-    >>>> M = ReadMtxFile(graph_file)
+    >>>> M = read_mtx_file(graph_file)
     >>>> sources = cudf.Series(M.row)
     >>>> destinations = cudf.Series(M.col)
     >>>> G = cugraph.Graph()

From 971c1bd4d0c2734be4036cb06e4e4c203f0fe3c0 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Tue, 16 Apr 2019 16:13:24 -0700
Subject: [PATCH 05/18] updated indentation inside cython typecasting operator
 to match cudf (no spacing between < and >), cudf is inconsistent in placing a
 space between > and the name of the variable to be casted, so left this part
 as is.

---
 python/cugraph/graph/c_graph.pyx              | 56 +++++++++----------
 python/cugraph/jaccard/jaccard_wrapper.pyx    | 10 ++--
 python/cugraph/jaccard/wjaccard_wrapper.pyx   |  4 +-
 python/cugraph/overlap/overlap_wrapper.pyx    | 10 ++--
 python/cugraph/overlap/woverlap_wrapper.pyx   |  4 +-
 .../spectral_clustering.pyx                   | 10 ++--
 6 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx
index 0e43084873b..dd7376e5de7 100755
--- a/python/cugraph/graph/c_graph.pyx
+++ b/python/cugraph/graph/c_graph.pyx
@@ -36,12 +36,12 @@ cdef gdf_column get_gdf_column_view(col):
     # cdef uintptr_t valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column)
     cdef gdf_dtype_extra_info c_extra_dtype_info = gdf_dtype_extra_info(time_unit=TIME_UNIT_NONE)
 
-    err = gdf_column_view_augmented(< gdf_column *> &c_col,
-                                    < void *> data_ptr,
-                                    < gdf_valid_type *> 0,
-                                    < gdf_size_type > len(col),
+    err = gdf_column_view_augmented(<gdf_column*> &c_col,
+                                    <void*> data_ptr,
+                                    <gdf_valid_type*> 0,
+                                    <gdf_size_type> len(col),
                                     dtypes[col.dtype.type],
-                                    < gdf_size_type > col.null_count,
+                                    <gdf_size_type> col.null_count,
                                     c_extra_dtype_info)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
@@ -63,9 +63,9 @@ class Graph:
         >>> G = cuGraph.Graph()
         """
         cdef gdf_graph * g
-        g = < gdf_graph *> calloc(1, sizeof(gdf_graph))
+        g = <gdf_graph*> calloc(1, sizeof(gdf_graph))
 
-        cdef uintptr_t graph_ptr = < uintptr_t > g
+        cdef uintptr_t graph_ptr = <uintptr_t> g
         self.graph_ptr = graph_ptr
 
         self.edge_list_source_col = None
@@ -78,7 +78,7 @@ class Graph:
 
     def __del__(self):
         cdef uintptr_t graph = self.graph_ptr
-        cdef gdf_graph * g = < gdf_graph *> graph
+        cdef gdf_graph * g = <gdf_graph*> graph
         self.delete_edge_list()
         self.delete_adj_list()
         self.delete_transposed_adj_list()
@@ -155,7 +155,7 @@ class Graph:
             c_value_col = get_gdf_column_view(self.edge_list_value_col)
             c_value_col_ptr = &c_value_col
 
-        err = gdf_edge_list_view(< gdf_graph *> graph,
+        err = gdf_edge_list_view(<gdf_graph*> graph,
                                  &c_source_col,
                                  &c_dest_col,
                                  c_value_col_ptr)
@@ -166,7 +166,7 @@ class Graph:
         Get the number of vertices in the graph
         """
         cdef uintptr_t graph = self.graph_ptr
-        cdef gdf_graph * g = < gdf_graph *> graph
+        cdef gdf_graph * g = <gdf_graph*> graph
         if g.adjList:
             return g.adjList.offsets.size - 1
         elif g.transposedAdjList:
@@ -181,14 +181,14 @@ class Graph:
         Display the edge list. Compute it if needed.
         """
         cdef uintptr_t graph = self.graph_ptr
-        cdef gdf_graph * g = < gdf_graph *> graph
+        cdef gdf_graph * g = <gdf_graph*> graph
         err = gdf_add_edge_list(g)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
         col_size = g.edgeList.src_indices.size
 
-        cdef uintptr_t src_col_data = < uintptr_t > g.edgeList.src_indices.data
-        cdef uintptr_t dest_col_data = < uintptr_t > g.edgeList.dest_indices.data
+        cdef uintptr_t src_col_data = <uintptr_t> g.edgeList.src_indices.data
+        cdef uintptr_t dest_col_data = <uintptr_t> g.edgeList.dest_indices.data
 
         src_data = rmm.device_array_from_ptr(src_col_data,
                                      nelem=col_size,
@@ -209,7 +209,7 @@ class Graph:
         Delete the edge list.
         """
         cdef uintptr_t graph = self.graph_ptr
-        err = gdf_delete_edge_list(< gdf_graph *> graph)
+        err = gdf_delete_edge_list(<gdf_graph*> graph)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
         # decrease reference count to free memory if the referenced objects are
@@ -293,7 +293,7 @@ class Graph:
             c_value_col = get_gdf_column_view(self.adj_list_value_col)
             c_value_col_ptr = &c_value_col
 
-        err = gdf_adj_list_view(< gdf_graph *> graph,
+        err = gdf_adj_list_view(<gdf_graph*> graph,
                                 &c_offset_col,
                                 &c_index_col,
                                 c_value_col_ptr)
@@ -304,15 +304,15 @@ class Graph:
         Display the adjacency list. Compute it if needed.
         """
         cdef uintptr_t graph = self.graph_ptr
-        cdef gdf_graph * g = < gdf_graph *> graph
+        cdef gdf_graph * g = <gdf_graph*> graph
         err = gdf_add_adj_list(g)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
         col_size_off = g.adjList.offsets.size
         col_size_ind = g.adjList.indices.size
 
-        cdef uintptr_t offset_col_data = < uintptr_t > g.adjList.offsets.data
-        cdef uintptr_t index_col_data = < uintptr_t > g.adjList.indices.data
+        cdef uintptr_t offset_col_data = <uintptr_t> g.adjList.offsets.data
+        cdef uintptr_t index_col_data = <uintptr_t> g.adjList.indices.data
 
         offsets_data = rmm.device_array_from_ptr(offset_col_data,
                                      nelem=col_size_off,
@@ -333,7 +333,7 @@ class Graph:
         Delete the adjacency list.
         """
         cdef uintptr_t graph = self.graph_ptr
-        err = gdf_delete_adj_list(< gdf_graph *> graph)
+        err = gdf_delete_adj_list(<gdf_graph*> graph)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
         # decrease reference count to free memory if the referenced objects are
@@ -348,7 +348,7 @@ class Graph:
         the existing graph.
         """
         cdef uintptr_t graph = self.graph_ptr
-        err = gdf_add_transposed_adj_list(< gdf_graph *> graph)
+        err = gdf_add_transposed_adj_list(<gdf_graph*> graph)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     def view_transposed_adj_list(self):
@@ -356,15 +356,15 @@ class Graph:
         Display the transposed adjacency list. Compute it if needed.
         """
         cdef uintptr_t graph = self.graph_ptr
-        cdef gdf_graph * g = < gdf_graph *> graph
+        cdef gdf_graph * g = <gdf_graph*> graph
         err = gdf_add_transposed_adj_list(g)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
         off_size = g.transposedAdjList.offsets.size
         ind_size = g.transposedAdjList.indices.size
 
-        cdef uintptr_t offset_col_data = < uintptr_t > g.transposedAdjList.offsets.data
-        cdef uintptr_t indices_col_data = < uintptr_t > g.transposedAdjList.indices.data
+        cdef uintptr_t offset_col_data = <uintptr_t> g.transposedAdjList.offsets.data
+        cdef uintptr_t indices_col_data = <uintptr_t> g.transposedAdjList.indices.data
 
         offsets_data = rmm.device_array_from_ptr(offset_col_data,
                                      nelem=off_size,
@@ -393,7 +393,7 @@ class Graph:
         df['second'] the second vertex id of a pair
         """
         cdef uintptr_t graph = self.graph_ptr
-        cdef gdf_graph * g = < gdf_graph *> graph
+        cdef gdf_graph * g = <gdf_graph*> graph
         cdef gdf_column c_first_col
         cdef gdf_column c_second_col
         err = gdf_get_two_hop_neighbors(g, &c_first_col, &c_second_col)
@@ -425,7 +425,7 @@ class Graph:
         Delete the adjacency list.
         """
         cdef uintptr_t graph = self.graph_ptr
-        err = gdf_delete_adj_list(< gdf_graph *> graph)
+        err = gdf_delete_adj_list(<gdf_graph*> graph)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     def delete_transposed_adj_list(self):
@@ -433,7 +433,7 @@ class Graph:
         Delete the transposed adjacency list.
         """
         cdef uintptr_t graph = self.graph_ptr
-        err = gdf_delete_transposed_adj_list(< gdf_graph *> graph)
+        err = gdf_delete_transposed_adj_list(<gdf_graph*> graph)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     def num_vertices(self):
@@ -441,7 +441,7 @@ class Graph:
         Get the number of vertices in the graph
         """
         cdef uintptr_t graph = self.graph_ptr
-        cdef gdf_graph* g = < gdf_graph *> graph
+        cdef gdf_graph* g = <gdf_graph*> graph
         err = gdf_add_adj_list(g)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
         return g.adjList.offsets.size - 1
@@ -556,7 +556,7 @@ class Graph:
 
     def _degree(self, vertex_subset, x = 0):
         cdef uintptr_t graph = self.graph_ptr
-        cdef gdf_graph* g = < gdf_graph *> graph
+        cdef gdf_graph* g = <gdf_graph*> graph
 
         n = self.num_vertices()
 
diff --git a/python/cugraph/jaccard/jaccard_wrapper.pyx b/python/cugraph/jaccard/jaccard_wrapper.pyx
index 74fe01ed649..164e794f6a7 100644
--- a/python/cugraph/jaccard/jaccard_wrapper.pyx
+++ b/python/cugraph/jaccard/jaccard_wrapper.pyx
@@ -68,9 +68,9 @@ cpdef jaccard(input_graph, first=None, second=None):
     >>> jaccard_weights = cugraph.jaccard(G)
     """
     cdef uintptr_t graph = input_graph.graph_ptr
-    cdef gdf_graph * g = < gdf_graph *> graph
+    cdef gdf_graph * g = <gdf_graph*> graph
 
-    err = gdf_add_adj_list(< gdf_graph *> graph)
+    err = gdf_add_adj_list(<gdf_graph*> graph)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     cdef gdf_column c_result_col
@@ -85,7 +85,7 @@ cpdef jaccard(input_graph, first=None, second=None):
         c_first_col = get_gdf_column_view(first)
         c_second_col = get_gdf_column_view(second)
         err = gdf_jaccard_list(g,
-                               < gdf_column *> NULL,
+                               <gdf_column*> NULL,
                                &c_first_col,
                                &c_second_col,
                                &c_result_col)
@@ -101,10 +101,10 @@ cpdef jaccard(input_graph, first=None, second=None):
         result = cudf.Series(np.ones(e, dtype=np.float32), nan_as_null=False)
         c_result_col = get_gdf_column_view(result)
 
-        err = gdf_jaccard(g, < gdf_column *> NULL, &c_result_col)
+        err = gdf_jaccard(g, <gdf_column*> NULL, &c_result_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-        dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data,
+        dest_data = rmm.device_array_from_ptr(<uintptr_t> g.adjList.indices.data,
                                             nelem=e,
                                             dtype=gdf_to_np_dtypes[g.adjList.indices.dtype])
         df = cudf.DataFrame()
diff --git a/python/cugraph/jaccard/wjaccard_wrapper.pyx b/python/cugraph/jaccard/wjaccard_wrapper.pyx
index 52ff299bd8a..cba0da8fc53 100644
--- a/python/cugraph/jaccard/wjaccard_wrapper.pyx
+++ b/python/cugraph/jaccard/wjaccard_wrapper.pyx
@@ -74,7 +74,7 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None):
     """
 
     cdef uintptr_t graph = input_graph.graph_ptr
-    cdef gdf_graph * g = < gdf_graph *> graph
+    cdef gdf_graph * g = <gdf_graph*> graph
     
     err = gdf_add_adj_list(g)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
@@ -113,7 +113,7 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None):
         err = gdf_jaccard(g, &c_weight_col, &c_result_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-        dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data,
+        dest_data = rmm.device_array_from_ptr(<uintptr_t> g.adjList.indices.data,
                                             nelem=resultSize,
                                             dtype=gdf_to_np_dtypes[g.adjList.indices.dtype])
         df = cudf.DataFrame()
diff --git a/python/cugraph/overlap/overlap_wrapper.pyx b/python/cugraph/overlap/overlap_wrapper.pyx
index 41f628edf37..d7a799a8bd4 100644
--- a/python/cugraph/overlap/overlap_wrapper.pyx
+++ b/python/cugraph/overlap/overlap_wrapper.pyx
@@ -68,9 +68,9 @@ cpdef overlap(input_graph, first=None, second=None):
     >>> df = cugraph.overlap(G)
     """
     cdef uintptr_t graph = input_graph.graph_ptr
-    cdef gdf_graph * g = < gdf_graph *> graph
+    cdef gdf_graph * g = <gdf_graph*> graph
 
-    err = gdf_add_adj_list(< gdf_graph *> graph)
+    err = gdf_add_adj_list(<gdf_graph*> graph)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     cdef gdf_column c_result_col
@@ -85,7 +85,7 @@ cpdef overlap(input_graph, first=None, second=None):
         c_first_col = get_gdf_column_view(first)
         c_second_col = get_gdf_column_view(second)
         err = gdf_overlap_list(g,
-                               < gdf_column *> NULL,
+                               <gdf_column*> NULL,
                                &c_first_col,
                                &c_second_col,
                                &c_result_col)
@@ -101,10 +101,10 @@ cpdef overlap(input_graph, first=None, second=None):
         result = cudf.Series(np.ones(e, dtype=np.float32), nan_as_null=False)
         c_result_col = get_gdf_column_view(result)
 
-        err = gdf_overlap(g, < gdf_column *> NULL, &c_result_col)
+        err = gdf_overlap(g, <gdf_column*> NULL, &c_result_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-        dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data,
+        dest_data = rmm.device_array_from_ptr(<uintptr_t> g.adjList.indices.data,
                                             nelem=e,
                                             dtype=gdf_to_np_dtypes[g.adjList.indices.dtype])
         df = cudf.DataFrame()
diff --git a/python/cugraph/overlap/woverlap_wrapper.pyx b/python/cugraph/overlap/woverlap_wrapper.pyx
index 93194f5b96c..b69bbb48d92 100644
--- a/python/cugraph/overlap/woverlap_wrapper.pyx
+++ b/python/cugraph/overlap/woverlap_wrapper.pyx
@@ -74,7 +74,7 @@ cpdef overlap_w(input_graph, weights, first=None, second=None):
     """
 
     cdef uintptr_t graph = input_graph.graph_ptr
-    cdef gdf_graph * g = < gdf_graph *> graph
+    cdef gdf_graph * g = <gdf_graph*> graph
     
     err = gdf_add_adj_list(g)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
@@ -113,7 +113,7 @@ cpdef overlap_w(input_graph, weights, first=None, second=None):
         err = gdf_overlap(g, &c_weight_col, &c_result_col)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-        dest_data = rmm.device_array_from_ptr(< uintptr_t > g.adjList.indices.data,
+        dest_data = rmm.device_array_from_ptr(<uintptr_t> g.adjList.indices.data,
                                             nelem=resultSize,
                                             dtype=gdf_to_np_dtypes[g.adjList.indices.dtype])
         df = cudf.DataFrame()
diff --git a/python/cugraph/spectral_clustering/spectral_clustering.pyx b/python/cugraph/spectral_clustering/spectral_clustering.pyx
index 674faeb8034..0a9dc261db1 100644
--- a/python/cugraph/spectral_clustering/spectral_clustering.pyx
+++ b/python/cugraph/spectral_clustering/spectral_clustering.pyx
@@ -67,7 +67,7 @@ cpdef spectralBalancedCutClustering(G,
     """
 
     cdef uintptr_t graph = G.graph_ptr
-    cdef gdf_graph * g = < gdf_graph *> graph
+    cdef gdf_graph * g = <gdf_graph*> graph
     
     # Ensure that the graph has CSR adjacency list
     err = gdf_add_adj_list(g)
@@ -144,7 +144,7 @@ cpdef spectralModularityMaximizationClustering(G,
     """
 
     cdef uintptr_t graph = G.graph_ptr
-    cdef gdf_graph * g = < gdf_graph *> graph
+    cdef gdf_graph * g = <gdf_graph*> graph
 
     # Ensure that the graph has CSR adjacency list
     err = gdf_add_adj_list(g)
@@ -203,7 +203,7 @@ cpdef analyzeClustering_modularity(G, n_clusters, clustering):
     >>> score = cuGraph.analyzeClustering_modularity(G, 5, DF['cluster'])
     """
     cdef uintptr_t graph = G.graph_ptr
-    cdef gdf_graph * g = < gdf_graph *> graph
+    cdef gdf_graph * g = <gdf_graph*> graph
     
     # Ensure that the graph has CSR adjacency list
     err = gdf_add_adj_list(g)
@@ -243,7 +243,7 @@ cpdef analyzeClustering_edge_cut(G, n_clusters, clustering):
     >>> score = cuGraph.analyzeClustering_edge_cut(G, 5, DF['cluster'])
     """
     cdef uintptr_t graph = G.graph_ptr
-    cdef gdf_graph * g = < gdf_graph *> graph
+    cdef gdf_graph * g = <gdf_graph*> graph
     
     # Ensure that the graph has CSR adjacency list
     err = gdf_add_adj_list(g)
@@ -283,7 +283,7 @@ cpdef analyzeClustering_ratio_cut(G, n_clusters, clustering):
     >>> score = cuGraph.analyzeClustering_ratio_cut(G, 5, DF['cluster'])
     """
     cdef uintptr_t graph = G.graph_ptr
-    cdef gdf_graph * g = < gdf_graph *> graph
+    cdef gdf_graph * g = <gdf_graph*> graph
     
     # Ensure that the graph has CSR adjacency list
     err = gdf_add_adj_list(g)

From d5f37e61831ae915479d14a4575087260fa712b7 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Thu, 18 Apr 2019 09:49:00 -0700
Subject: [PATCH 06/18] updated comments on get_gdf_column_view

---
 python/cugraph/graph/c_graph.pyx | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx
index dd7376e5de7..6f7df83a8b9 100755
--- a/python/cugraph/graph/c_graph.pyx
+++ b/python/cugraph/graph/c_graph.pyx
@@ -29,16 +29,26 @@ cdef gdf_column get_gdf_column_view(col):
     object by shallow copying. The returned C++ object is expected to be used
     as a temporary variable to pass the column data encapsulated in the Python
     cudf Series object to C++ functions expecting (pointers to) C++ gdf_column
-    objects.
+    objects. It is the caller's responsibility to insure that col out-lives the
+    returned view object. cudf has column_view_from_column and using this is,
+    in general, better design than creating our own, but we will keep this as
+    cudf is planning to remove the function. cudf plans to redesign
+    cudf::column to fundamentally solve this problem, so once they finished the
+    redesign, we need to update this code to use their new features. Until that
+    time, we may rely on this as a temporary solution.
     """
     cdef gdf_column c_col
     cdef uintptr_t data_ptr = cudf.bindings.cudf_cpp.get_column_data_ptr(col._column)
-    # cdef uintptr_t valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column)
+    cdef uintptr_t valid_ptr
+    if col._column._mask is None:
+        valid_ptr = 0
+    else:
+        valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column)
     cdef gdf_dtype_extra_info c_extra_dtype_info = gdf_dtype_extra_info(time_unit=TIME_UNIT_NONE)
 
     err = gdf_column_view_augmented(<gdf_column*> &c_col,
                                     <void*> data_ptr,
-                                    <gdf_valid_type*> 0,
+                                    <gdf_valid_type*> valid_ptr,
                                     <gdf_size_type> len(col),
                                     dtypes[col.dtype.type],
                                     <gdf_size_type> col.null_count,

From 5b37c8497d2aa8960938485b7702d76b08dbca50 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Thu, 18 Apr 2019 16:12:41 -0700
Subject: [PATCH 07/18] fixed a bug (not properly freeing valid) in
 gdf_col_delete

---
 cpp/src/cugraph.cu | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/cpp/src/cugraph.cu b/cpp/src/cugraph.cu
index 8a22c9e7005..175a3c8e43a 100644
--- a/cpp/src/cugraph.cu
+++ b/cpp/src/cugraph.cu
@@ -29,26 +29,27 @@
 template<typename T>
 using Vector = thrust::device_vector<T, rmm_allocator<T>>;
 
+/*
+ * cudf has gdf_column_free and using this is, in general, better design than
+ * creating our own, but we will keep this as cudf is planning to remove the
+ * function. cudf plans to redesign cudf::column to fundamentally solve this
+ * problem, so once they finished the redesign, we need to update this code to
+ * use their new features. Until that time, we may rely on this as a temporary
+ * solution.
+ */
 void gdf_col_delete(gdf_column* col) {
-  if (col) {
-    col->size = 0;
-    if(col->data) {
-      ALLOC_FREE_TRY(col->data, nullptr);
+  if (col != nullptr) {
+    auto stream = cudaStream_t{nullptr};
+    if (col->data != nullptr) {
+      ALLOC_FREE_TRY(col->data, stream);
+    }
+    if (col->valid != nullptr) {
+      ALLOC_FREE_TRY(col->valid, stream);
+    }
+    if (col->col_name != nullptr) {
+      free(col->col_name);
     }
-#if 1
-// If delete col is executed, the memory pointed by col is no longer valid and
-// can be used in another memory allocation, so executing col->data = nullptr
-// after delete col is dangerous, also, col = nullptr has no effect here (the
-// address is passed by value, for col = nullptr should work, the input
-// parameter should be gdf_column*& col (or alternatively, gdf_column** col and
-// *col = nullptr also work)
-    col->data = nullptr;
-    delete col;
-#else
     delete col;
-    col->data = nullptr;
-    col = nullptr;
-#endif
   }
 }
 

From e80277eab8a38f8ed040bbeb69fd2e5ca2db09de Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Thu, 18 Apr 2019 17:56:16 -0700
Subject: [PATCH 08/18] commented out freeing gdf_column's col_name if not
 nullptr, currently, cudf's gdf_column_view does not properly initialize
 col_name to nullptr, freeing col_name can result in freeing unallocated
 memory, this problem should be cleaned up once cudf finishes redesigning
 cudf::column.

---
 cpp/src/cugraph.cu | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/cpp/src/cugraph.cu b/cpp/src/cugraph.cu
index 175a3c8e43a..1506fdfafab 100644
--- a/cpp/src/cugraph.cu
+++ b/cpp/src/cugraph.cu
@@ -46,9 +46,14 @@ void gdf_col_delete(gdf_column* col) {
     if (col->valid != nullptr) {
       ALLOC_FREE_TRY(col->valid, stream);
     }
+#if 0/* Currently, gdf_column_view does not set col_name, and col_name can have
+        an arbitrary value, so freeing col_name can lead to freeing a ranodom
+        address. This problem should be cleaned up once cudf finishes
+        redesigning cudf::column. */
     if (col->col_name != nullptr) {
       free(col->col_name);
     }
+#endif
     delete col;
   }
 }

From 57daf68486bb2547c05eb5d7226f69a0ad84c28f Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Thu, 18 Apr 2019 22:43:39 -0700
Subject: [PATCH 09/18] removed tab space in algorithms.h

---
 cpp/include/algorithms.h | 46 ++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/cpp/include/algorithms.h b/cpp/include/algorithms.h
index 985b7a838db..2116fabe7eb 100644
--- a/cpp/include/algorithms.h
+++ b/cpp/include/algorithms.h
@@ -40,11 +40,11 @@
  */
 /* ----------------------------------------------------------------------------*/
 gdf_error gdf_pagerank(gdf_graph *graph,
-												gdf_column *pagerank,
-												float alpha,
-												float tolerance,
-												int max_iter,
-												bool has_guess);
+                       gdf_column *pagerank,
+                       float alpha,
+                       float tolerance,
+                       int max_iter,
+                       bool has_guess);
 
 /**
  * @Synopsis   Creates source, destination and value columns based on the specified R-MAT model
@@ -78,11 +78,11 @@ gdf_error gdf_pagerank(gdf_graph *graph,
  */
 /* ----------------------------------------------------------------------------*/
 gdf_error gdf_grmat_gen(const char* argv,
-												size_t &vertices,
-												size_t &edges,
-												gdf_column* src,
-												gdf_column* dest,
-												gdf_column* val);
+                        size_t &vertices,
+                        size_t &edges,
+                        gdf_column* src,
+                        gdf_column* dest,
+                        gdf_column* val);
 
 /**
  * @Synopsis   Performs a breadth first search traversal of a graph starting from a node.
@@ -101,10 +101,10 @@ gdf_error gdf_grmat_gen(const char* argv,
  */
 /* ----------------------------------------------------------------------------*/
 gdf_error gdf_bfs(gdf_graph *graph,
-									gdf_column *distances,
-									gdf_column *predecessors,
-									int start_node,
-									bool directed);
+                  gdf_column *distances,
+                  gdf_column *predecessors,
+                  int start_node,
+                  bool directed);
 
 /**
  * Computes the Jaccard similarity coefficient for every pair of vertices in the graph
@@ -116,8 +116,8 @@ gdf_error gdf_bfs(gdf_graph *graph,
  * @return Error code
  */
 gdf_error gdf_jaccard(gdf_graph *graph,
-											gdf_column *weights,
-											gdf_column *result);
+                      gdf_column *weights,
+                      gdf_column *result);
 
 /**
  * Computes the Jaccard similarity coefficient for each pair of specified vertices.
@@ -131,10 +131,10 @@ gdf_error gdf_jaccard(gdf_graph *graph,
  * @return Error code
  */
 gdf_error gdf_jaccard_list(gdf_graph *graph,
-														gdf_column *weights,
-														gdf_column *first,
-														gdf_column *second,
-														gdf_column *result);
+                           gdf_column *weights,
+                           gdf_column *first,
+                           gdf_column *second,
+                           gdf_column *result);
 
 /**
  * Computes the Overlap Coefficient for every pair of vertices in the graph which are
@@ -167,6 +167,6 @@ gdf_error gdf_overlap_list(gdf_graph *graph,
                            gdf_column *result);
 
 gdf_error gdf_louvain(gdf_graph *graph,
-											void *final_modularity,
-											void *num_level,
-											gdf_column *louvain_parts);
+                      void *final_modularity,
+                      void *num_level,
+                      gdf_column *louvain_parts);

From dea90dc820f7d99d115e0f324e64cf96d6cbc23c Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Thu, 18 Apr 2019 23:51:05 -0700
Subject: [PATCH 10/18] fixed a warning in louvain_wrapper.pyx (dereferencing
 type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing])

---
 python/cugraph/louvain/louvain_wrapper.pyx | 46 ++++++++++++++++------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/python/cugraph/louvain/louvain_wrapper.pyx b/python/cugraph/louvain/louvain_wrapper.pyx
index 8393e464cb7..6f3870b7d83 100644
--- a/python/cugraph/louvain/louvain_wrapper.pyx
+++ b/python/cugraph/louvain/louvain_wrapper.pyx
@@ -25,9 +25,9 @@ cpdef nvLouvain(input_graph):
 
     Parameters
     ----------
-    graph : cuGraph.Graph                 
+    input_graph : cuGraph.Graph
       cuGraph graph descriptor, should contain the connectivity information as an edge list (edge weights are not used for this algorithm).
-      The adjacency list will be computed if not already present.   
+      The adjacency list will be computed if not already present.
 
     Returns
     -------
@@ -43,7 +43,7 @@ cpdef nvLouvain(input_graph):
     >>> destinations = cudf.Series(M.col)
     >>> G = cuGraph.Graph()
     >>> G.add_edge_list(sources,destinations,None)
-    >>> louvain_parts = cuGraph.louvain(G)
+    >>> louvain_parts, modularity_score = cuGraph.louvain(G)
     """
 
     cdef uintptr_t graph = input_graph.graph_ptr
@@ -62,17 +62,37 @@ cpdef nvLouvain(input_graph):
     
     df['partition'] = cudf.Series(np.zeros(n,dtype=np.int32))
     cdef gdf_column c_louvain_parts_col = get_gdf_column_view(df['partition'])
-    cdef double final_modularity = 1.0
-    cdef int num_level
 
-    err = gdf_louvain(<gdf_graph*>g, <void*>&final_modularity, <void*>&num_level, &c_louvain_parts_col)
-    cudf.bindings.cudf_cpp.check_gdf_error(err)
-
-    cdef double fm = final_modularity
-    cdef float tmp = (<float*>(<void*>&final_modularity))[0]
+    cdef bool single_precision = False
+    # this implementation is tied to cugraph.cu line 503
+    # cudaDataType_t val_type = graph->adjList->edge_data?
+    #     gdf_to_cudadtype(graph->adjList->edge_data): CUDA_R_32F;
+    # this is tied to the low-level implementation detail of the lower level
+    # function, and very vulnerable to low level changes. Better be
+    # reimplemented, but we are planning to eventually remove nvgraph, so I may
+    # leave as is right at this moment.
     if g.adjList.edge_data:
         if g.adjList.edge_data.dtype == GDF_FLOAT32:
-            fm = tmp
+            single_precision = True;
+    else:
+        single_precision = True;
+
+    cdef float final_modularity_single_precision = 1.0
+    cdef double final_modularity_double_precision = 1.0
+    cdef int num_level = 0
+    cdef gdf_error error
+
+    if single_precision:
+        err = gdf_louvain(<gdf_graph*>g,
+                          <void*>&final_modularity_single_precision,
+                          <void*>&num_level, &c_louvain_parts_col)
+    else:
+        err = gdf_louvain(<gdf_graph*>g,
+                          <void*>&final_modularity_double_precision,
+                          <void*>&num_level, &c_louvain_parts_col)
+    cudf.bindings.cudf_cpp.check_gdf_error(err)
+
+    if single_precision:
+        return df, <double>final_modularity_single_precision
     else:
-        fm = tmp
-    return df, fm
+        return df, final_modularity_double_precision

From 71a6bf7962ae3823a713965ab8f3917efc7917ee Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Fri, 19 Apr 2019 00:22:22 -0700
Subject: [PATCH 11/18] replaced adjList.offsets.size - 1 and
 transposedAdjList.offsets.size - 1 with num_vertices(), this is a better
 abstraction and less vulnerable to low level changes in class Graph

---
 python/cugraph/graph/c_graph.pyx                    |  4 ++--
 python/cugraph/louvain/louvain_wrapper.pyx          |  6 +++---
 python/cugraph/pagerank/pagerank_wrapper.pyx        | 13 ++++++++-----
 .../spectral_clustering/spectral_clustering.pyx     |  4 ++--
 python/cugraph/sssp/sssp_wrapper.pyx                | 11 ++++++-----
 5 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx
index 6f7df83a8b9..6ee6cb61b15 100755
--- a/python/cugraph/graph/c_graph.pyx
+++ b/python/cugraph/graph/c_graph.pyx
@@ -318,7 +318,7 @@ class Graph:
         err = gdf_add_adj_list(g)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-        col_size_off = g.adjList.offsets.size
+        col_size_off = self.num_vertices() + 1
         col_size_ind = g.adjList.indices.size
 
         cdef uintptr_t offset_col_data = <uintptr_t> g.adjList.offsets.data
@@ -370,7 +370,7 @@ class Graph:
         err = gdf_add_transposed_adj_list(g)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-        off_size = g.transposedAdjList.offsets.size
+        off_size = self.num_vertices() + 1
         ind_size = g.transposedAdjList.indices.size
 
         cdef uintptr_t offset_col_data = <uintptr_t> g.transposedAdjList.offsets.data
diff --git a/python/cugraph/louvain/louvain_wrapper.pyx b/python/cugraph/louvain/louvain_wrapper.pyx
index 6f3870b7d83..d7ab7fb294d 100644
--- a/python/cugraph/louvain/louvain_wrapper.pyx
+++ b/python/cugraph/louvain/louvain_wrapper.pyx
@@ -52,15 +52,15 @@ cpdef nvLouvain(input_graph):
     err = gdf_add_adj_list(g)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-    n = g.adjList.offsets.size - 1
+    num_vert = input_graph.num_vertices()
 
     df = cudf.DataFrame()
-    df['vertex'] = cudf.Series(np.zeros(n, dtype=np.int32))
+    df['vertex'] = cudf.Series(np.zeros(num_vert, dtype=np.int32))
     cdef gdf_column c_index_col = get_gdf_column_view(df['vertex'])
     err = g.adjList.get_vertex_identifiers(&c_index_col)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
     
-    df['partition'] = cudf.Series(np.zeros(n,dtype=np.int32))
+    df['partition'] = cudf.Series(np.zeros(num_vert,dtype=np.int32))
     cdef gdf_column c_louvain_parts_col = get_gdf_column_view(df['partition'])
 
     cdef bool single_precision = False
diff --git a/python/cugraph/pagerank/pagerank_wrapper.pyx b/python/cugraph/pagerank/pagerank_wrapper.pyx
index c48af92bebe..c09d118f09a 100755
--- a/python/cugraph/pagerank/pagerank_wrapper.pyx
+++ b/python/cugraph/pagerank/pagerank_wrapper.pyx
@@ -59,14 +59,17 @@ cpdef pagerank(G,alpha=0.85, max_iter=100, tol=1.0e-5):
     """
 
     cdef uintptr_t graph = G.graph_ptr
-    err = gdf_add_transposed_adj_list(<gdf_graph*>graph)
-    cudf.bindings.cudf_cpp.check_gdf_error(err)
-    
     cdef gdf_graph* g = <gdf_graph*>graph
+
+    err = gdf_add_transposed_adj_list(g)
+    cudf.bindings.cudf_cpp.check_gdf_error(err)
+
+    num_vert = G.num_vertices()
+
     df = cudf.DataFrame()  
-    df['vertex'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.int32))
+    df['vertex'] = cudf.Series(np.zeros(num_vert, dtype=np.int32))
     cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex']) 
-    df['pagerank'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.float32))
+    df['pagerank'] = cudf.Series(np.zeros(num_vert, dtype=np.float32))
     cdef gdf_column c_pagerank_col = get_gdf_column_view(df['pagerank'])    
 
     err = g.transposedAdjList.get_vertex_identifiers(&c_identifier_col)
diff --git a/python/cugraph/spectral_clustering/spectral_clustering.pyx b/python/cugraph/spectral_clustering/spectral_clustering.pyx
index 0a9dc261db1..3b0bf170988 100644
--- a/python/cugraph/spectral_clustering/spectral_clustering.pyx
+++ b/python/cugraph/spectral_clustering/spectral_clustering.pyx
@@ -73,7 +73,7 @@ cpdef spectralBalancedCutClustering(G,
     err = gdf_add_adj_list(g)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-    num_vert = g.adjList.offsets.size - 1
+    num_vert = G.num_vertices()
 
     # Create the output dataframe
     df = cudf.DataFrame()
@@ -150,7 +150,7 @@ cpdef spectralModularityMaximizationClustering(G,
     err = gdf_add_adj_list(g)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-    num_vert = g.adjList.offsets.size - 1
+    num_vert = G.num_vertices()
 
     # Create the output dataframe
     df = cudf.DataFrame()
diff --git a/python/cugraph/sssp/sssp_wrapper.pyx b/python/cugraph/sssp/sssp_wrapper.pyx
index 8c3bcff536a..035bbc6d68a 100644
--- a/python/cugraph/sssp/sssp_wrapper.pyx
+++ b/python/cugraph/sssp/sssp_wrapper.pyx
@@ -53,19 +53,21 @@ cpdef sssp(G, source):
     """
 
     cdef uintptr_t graph = G.graph_ptr
-    err = gdf_add_transposed_adj_list(<gdf_graph*>graph)
+    cdef gdf_graph* g = <gdf_graph*>graph
+
+    err = gdf_add_transposed_adj_list(g)
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-    cdef gdf_graph* g = <gdf_graph*>graph
+    num_vert = G.num_vertices()
 
     data_type = np.float32
     if g.transposedAdjList.edge_data:
         data_type = gdf_to_np_dtypes[g.transposedAdjList.edge_data.dtype]
 
     df = cudf.DataFrame()
-    df['vertex'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=np.int32))
+    df['vertex'] = cudf.Series(np.zeros(num_vert, dtype=np.int32))
     cdef gdf_column c_identifier_col = get_gdf_column_view(df['vertex'])
-    df['distance'] = cudf.Series(np.zeros(g.transposedAdjList.offsets.size-1,dtype=data_type))
+    df['distance'] = cudf.Series(np.zeros(num_vert, dtype=data_type))
     cdef gdf_column c_distance_col = get_gdf_column_view(df['distance'])
 
     err = g.transposedAdjList.get_vertex_identifiers(&c_identifier_col)
@@ -77,4 +79,3 @@ cpdef sssp(G, source):
     cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     return df
-

From 839bb3c27f0f8a359df42444529ef4afd965c92f Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Fri, 19 Apr 2019 00:25:27 -0700
Subject: [PATCH 12/18] there were two implementations of num_vertices in class
 Graph, removed one.

---
 python/cugraph/graph/c_graph.pyx | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx
index 6ee6cb61b15..15a07543355 100755
--- a/python/cugraph/graph/c_graph.pyx
+++ b/python/cugraph/graph/c_graph.pyx
@@ -171,21 +171,6 @@ class Graph:
                                  c_value_col_ptr)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-    def num_vertices(self):
-        """
-        Get the number of vertices in the graph
-        """
-        cdef uintptr_t graph = self.graph_ptr
-        cdef gdf_graph * g = <gdf_graph*> graph
-        if g.adjList:
-            return g.adjList.offsets.size - 1
-        elif g.transposedAdjList:
-            return g.transposedAdjList.offsets.size - 1
-        else:
-            err = gdf_add_adj_list(g)
-            cudf.bindings.cudf_cpp.check_gdf_error(err)
-            return g.adjList.offsets.size - 1   
-
     def view_edge_list(self):
         """
         Display the edge list. Compute it if needed.
@@ -451,10 +436,15 @@ class Graph:
         Get the number of vertices in the graph
         """
         cdef uintptr_t graph = self.graph_ptr
-        cdef gdf_graph* g = <gdf_graph*> graph
-        err = gdf_add_adj_list(g)
-        cudf.bindings.cudf_cpp.check_gdf_error(err)
-        return g.adjList.offsets.size - 1
+        cdef gdf_graph * g = <gdf_graph*> graph
+        if g.adjList:
+            return g.adjList.offsets.size - 1
+        elif g.transposedAdjList:
+            return g.transposedAdjList.offsets.size - 1
+        else:
+            err = gdf_add_adj_list(g)
+            cudf.bindings.cudf_cpp.check_gdf_error(err)
+            return g.adjList.offsets.size - 1   
 
     def in_degree(self, vertex_subset = None):
         """

From 85adcd8dd773d7fb3864010ed3c0f5286dabe493 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Fri, 19 Apr 2019 00:29:45 -0700
Subject: [PATCH 13/18] there were two implementations of delete_adj_list in
 class Graph, removed one.

---
 python/cugraph/graph/c_graph.pyx | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx
index 15a07543355..63cafa582a2 100755
--- a/python/cugraph/graph/c_graph.pyx
+++ b/python/cugraph/graph/c_graph.pyx
@@ -375,6 +375,13 @@ class Graph:
 
         return cudf.Series(offsets_data), cudf.Series(indices_data)
 
+    def delete_transposed_adj_list(self):
+        """
+        Delete the transposed adjacency list.
+        """
+        cdef uintptr_t graph = self.graph_ptr
+        err = gdf_delete_transposed_adj_list(<gdf_graph*> graph)
+        cudf.bindings.cudf_cpp.check_gdf_error(err)
 
     def get_two_hop_neighbors(self):
         """
@@ -415,22 +422,6 @@ class Graph:
 
         return df
 
-    def delete_adj_list(self):
-        """
-        Delete the adjacency list.
-        """
-        cdef uintptr_t graph = self.graph_ptr
-        err = gdf_delete_adj_list(<gdf_graph*> graph)
-        cudf.bindings.cudf_cpp.check_gdf_error(err)
-
-    def delete_transposed_adj_list(self):
-        """
-        Delete the transposed adjacency list.
-        """
-        cdef uintptr_t graph = self.graph_ptr
-        err = gdf_delete_transposed_adj_list(<gdf_graph*> graph)
-        cudf.bindings.cudf_cpp.check_gdf_error(err)
-
     def num_vertices(self):
         """
         Get the number of vertices in the graph

From ee30f6fb78e1259f0715cbac1d9bf3aa4f892152 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Fri, 19 Apr 2019 00:35:06 -0700
Subject: [PATCH 14/18] changed variable names in view_adj_list and
 view_transposed_adj_list so, view_transposed_adj_list can better mirror
 view_adj_list (except for replacing adjList with transposedAdjList)

---
 python/cugraph/graph/c_graph.pyx | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx
index 63cafa582a2..9edb7ec8d19 100755
--- a/python/cugraph/graph/c_graph.pyx
+++ b/python/cugraph/graph/c_graph.pyx
@@ -303,18 +303,18 @@ class Graph:
         err = gdf_add_adj_list(g)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-        col_size_off = self.num_vertices() + 1
-        col_size_ind = g.adjList.indices.size
+        offset_col_size = self.num_vertices() + 1
+        index_col_size = g.adjList.indices.size
 
         cdef uintptr_t offset_col_data = <uintptr_t> g.adjList.offsets.data
         cdef uintptr_t index_col_data = <uintptr_t> g.adjList.indices.data
 
         offsets_data = rmm.device_array_from_ptr(offset_col_data,
-                                     nelem=col_size_off,
+                                     nelem=offset_col_size,
                                      dtype=np.int32) # ,
                                      # finalizer=rmm._make_finalizer(offset_col_data, 0))
         indices_data = rmm.device_array_from_ptr(index_col_data,
-                                     nelem=col_size_ind,
+                                     nelem=index_col_size,
                                      dtype=np.int32) # ,
                                      # finalizer=rmm._make_finalizer(index_col_data, 0))
         # g.adjList.offsets.data and g.adjList.indices.data are not owned by
@@ -355,20 +355,20 @@ class Graph:
         err = gdf_add_transposed_adj_list(g)
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
-        off_size = self.num_vertices() + 1
-        ind_size = g.transposedAdjList.indices.size
+        offset_col_size = self.num_vertices() + 1
+        inex_col_size = g.transposedAdjList.indices.size
 
         cdef uintptr_t offset_col_data = <uintptr_t> g.transposedAdjList.offsets.data
-        cdef uintptr_t indices_col_data = <uintptr_t> g.transposedAdjList.indices.data
+        cdef uintptr_t index_col_data = <uintptr_t> g.transposedAdjList.indices.data
 
         offsets_data = rmm.device_array_from_ptr(offset_col_data,
-                                     nelem=off_size,
+                                     nelem=offset_col_size,
                                      dtype=np.int32)  # ,
-                                     # finalizer=rmm._make_finalizer(offsets_col_data, 0))
-        indices_data = rmm.device_array_from_ptr(indices_col_data,
-                                     nelem=ind_size,
+                                     # finalizer=rmm._make_finalizer(offset_col_data, 0))
+        indices_data = rmm.device_array_from_ptr(index_col_data,
+                                     nelem=inex_col_size,
                                      dtype=np.int32)  # ,
-                                     # finalizer=rmm._make_finalizer(indices_col_data, 0))
+                                     # finalizer=rmm._make_finalizer(index_col_data, 0))
         # g.transposedAdjList.offsets.data and g.transposedAdjList.indices.data
         # are not owned by this instance, so should not be freed here (this
         # will lead to double free, and undefined behavior).

From 2b96d97e56298aa4d1686f528e864541cbf72192 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Fri, 19 Apr 2019 00:52:29 -0700
Subject: [PATCH 15/18] removed unnecessary imports

---
 python/cugraph/bfs/bfs_wrapper.pyx | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/cugraph/bfs/bfs_wrapper.pyx b/python/cugraph/bfs/bfs_wrapper.pyx
index ac226469390..caa965d44e6 100644
--- a/python/cugraph/bfs/bfs_wrapper.pyx
+++ b/python/cugraph/bfs/bfs_wrapper.pyx
@@ -14,9 +14,7 @@
 from c_bfs cimport *
 from libcpp cimport bool
 from libc.stdint cimport uintptr_t
-from libc.stdlib cimport calloc, malloc, free
 import cudf
-from librmm_cffi import librmm as rmm
 #from pygdf import Column
 import numpy as np
 

From ee1f0259c53dd2b242c366377b4dcf400e6d0360 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Fri, 19 Apr 2019 09:17:33 -0700
Subject: [PATCH 16/18] updated change log.

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b7d17641c67..e8856c14cde 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -32,6 +32,7 @@
 - PR #218 Update c_graph.pyx 
 - PR #224 Update erroneous comments in overlap_wrapper.pyx, woverlap_wrapper.pyx, test_louvain.py, and spectral_clustering.pyx
 - PR #220 Fixed bugs in Nvgraph triangle counting
+- PR #232 Fixed memory leaks in managing cudf columns.
 
 # cuGraph 0.6.0 (22 Mar 2019)
 

From ab837dda7a1914a6ca45926cd85180a58446a618 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Fri, 19 Apr 2019 15:11:19 -0700
Subject: [PATCH 17/18] fixed typo in variable name

---
 python/cugraph/graph/c_graph.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx
index 9edb7ec8d19..172aae60999 100755
--- a/python/cugraph/graph/c_graph.pyx
+++ b/python/cugraph/graph/c_graph.pyx
@@ -356,7 +356,7 @@ class Graph:
         cudf.bindings.cudf_cpp.check_gdf_error(err)
 
         offset_col_size = self.num_vertices() + 1
-        inex_col_size = g.transposedAdjList.indices.size
+        index_col_size = g.transposedAdjList.indices.size
 
         cdef uintptr_t offset_col_data = <uintptr_t> g.transposedAdjList.offsets.data
         cdef uintptr_t index_col_data = <uintptr_t> g.transposedAdjList.indices.data
@@ -366,7 +366,7 @@ class Graph:
                                      dtype=np.int32)  # ,
                                      # finalizer=rmm._make_finalizer(offset_col_data, 0))
         indices_data = rmm.device_array_from_ptr(index_col_data,
-                                     nelem=inex_col_size,
+                                     nelem=index_col_size,
                                      dtype=np.int32)  # ,
                                      # finalizer=rmm._make_finalizer(index_col_data, 0))
         # g.transposedAdjList.offsets.data and g.transposedAdjList.indices.data

From 65a63103e47a1e9264e775d6e8dca2e7ccf05dc1 Mon Sep 17 00:00:00 2001
From: Seunghwa Kang <seunghwak@nvidia.com>
Date: Mon, 22 Apr 2019 07:49:25 -0700
Subject: [PATCH 18/18] fixed a typo

---
 python/cugraph/jaccard/wjaccard_wrapper.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cugraph/jaccard/wjaccard_wrapper.pyx b/python/cugraph/jaccard/wjaccard_wrapper.pyx
index cba0da8fc53..df04dce512a 100644
--- a/python/cugraph/jaccard/wjaccard_wrapper.pyx
+++ b/python/cugraph/jaccard/wjaccard_wrapper.pyx
@@ -83,7 +83,7 @@ cpdef jaccard_w(input_graph, weights, first=None, second=None):
     cdef gdf_column c_weight_col
     cdef gdf_column c_first_col
     cdef gdf_column c_second_col
-    cdef gdf_column c_indices_col
+    cdef gdf_column c_index_col
 
     if type(first) == cudf.dataframe.series.Series and type(second) == cudf.dataframe.series.Series:
         resultSize = len(first)