diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp
index e5bba7bd5ce..e41aa9a9050 100644
--- a/cpp/include/cugraph/algorithms.hpp
+++ b/cpp/include/cugraph/algorithms.hpp
@@ -142,6 +142,8 @@ void overlap_list(GraphCSRView<VT, ET, WT> const &graph,
  * @tparam weight_t                                   Type of edge weights. Supported values : float
  * or double.
  *
+ * @param[in] handle                          Library handle (RAFT). If a communicator is set in the
+ * handle, the multi GPU version will be selected.
  * @param[in] graph                             cuGraph graph descriptor, should contain the
  * connectivity information as a COO. Graph is considered undirected. Edge weights are used for this
  * algorithm and set to 1 by default.
@@ -178,7 +180,8 @@ void overlap_list(GraphCSRView<VT, ET, WT> const &graph,
  *
  */
 template <typename vertex_t, typename edge_t, typename weight_t>
-void force_atlas2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
+void force_atlas2(raft::handle_t const &handle,
+                  GraphCOOView<vertex_t, edge_t, weight_t> &graph,
                   float *pos,
                   const int max_iter                            = 500,
                   float *x_start                                = nullptr,
diff --git a/cpp/src/layout/barnes_hut.hpp b/cpp/src/layout/barnes_hut.hpp
index ebef93e74fb..a3d413225e4 100644
--- a/cpp/src/layout/barnes_hut.hpp
+++ b/cpp/src/layout/barnes_hut.hpp
@@ -17,14 +17,15 @@
 #pragma once
 
 #include <rmm/thrust_rmm_allocator.h>
-#include <cugraph/utilities/error.hpp>
+#include <rmm/device_uvector.hpp>
 
-#include <stdio.h>
 #include <converters/COOtoCSR.cuh>
+#include <utilities/graph_utils.cuh>
+
 #include <cugraph/graph.hpp>
 #include <cugraph/internals.hpp>
+#include <cugraph/utilities/error.hpp>
 
-#include <utilities/graph_utils.cuh>
 #include "bh_kernels.hpp"
 #include "fa2_kernels.hpp"
 #include "utils.hpp"
@@ -33,7 +34,8 @@ namespace cugraph {
 namespace detail {
 
 template <typename vertex_t, typename edge_t, typename weight_t>
-void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
+void barnes_hut(raft::handle_t const &handle,
+                GraphCOOView<vertex_t, edge_t, weight_t> &graph,
                 float *pos,
                 const int max_iter                            = 500,
                 float *x_start                                = nullptr,
@@ -50,7 +52,7 @@ void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
                 bool verbose                                  = false,
                 internals::GraphBasedDimRedCallback *callback = nullptr)
 {
-  cudaStream_t stream = {nullptr};
+  cudaStream_t stream = handle.get_stream();
   const edge_t e      = graph.number_of_edges;
   const vertex_t n    = graph.number_of_vertices;
 
@@ -65,15 +67,15 @@ void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
 
   // Allocate more space
   //---------------------------------------------------
-  rmm::device_vector<unsigned> d_limiter(1);
-  rmm::device_vector<int> d_maxdepthd(1);
-  rmm::device_vector<int> d_bottomd(1);
-  rmm::device_vector<float> d_radiusd(1);
+  rmm::device_uvector<unsigned> d_limiter(1, stream);
+  rmm::device_uvector<int> d_maxdepthd(1, stream);
+  rmm::device_uvector<int> d_bottomd(1, stream);
+  rmm::device_uvector<float> d_radiusd(1, stream);
 
-  unsigned *limiter = d_limiter.data().get();
-  int *maxdepthd    = d_maxdepthd.data().get();
-  int *bottomd      = d_bottomd.data().get();
-  float *radiusd    = d_radiusd.data().get();
+  unsigned *limiter = d_limiter.data();
+  int *maxdepthd    = d_maxdepthd.data();
+  int *bottomd      = d_bottomd.data();
+  float *radiusd    = d_radiusd.data();
 
   InitializationKernel<<<1, 1, 0, stream>>>(limiter, maxdepthd, radiusd);
   CHECK_CUDA(stream);
@@ -83,51 +85,52 @@ void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
   const float theta_squared = theta * theta;
   const int NNODES          = nnodes;
 
-  rmm::device_vector<int> d_startl(nnodes + 1, 0);
-  rmm::device_vector<int> d_childl((nnodes + 1) * 4, 0);
+  rmm::device_uvector<int> d_startl(nnodes + 1, stream);
+  rmm::device_uvector<int> d_childl((nnodes + 1) * 4, stream);
   // FA2 requires degree + 1
-  rmm::device_vector<int> d_massl(nnodes + 1, 1.f);
+  rmm::device_uvector<int> d_massl(nnodes + 1, stream);
+  thrust::fill(rmm::exec_policy(stream)->on(stream), d_massl.begin(), d_massl.end(), 1.f);
 
-  rmm::device_vector<float> d_maxxl(blocks * FACTOR1, 0);
-  rmm::device_vector<float> d_maxyl(blocks * FACTOR1, 0);
-  rmm::device_vector<float> d_minxl(blocks * FACTOR1, 0);
-  rmm::device_vector<float> d_minyl(blocks * FACTOR1, 0);
+  rmm::device_uvector<float> d_maxxl(blocks * FACTOR1, stream);
+  rmm::device_uvector<float> d_maxyl(blocks * FACTOR1, stream);
+  rmm::device_uvector<float> d_minxl(blocks * FACTOR1, stream);
+  rmm::device_uvector<float> d_minyl(blocks * FACTOR1, stream);
 
   // Actual mallocs
-  int *startl = d_startl.data().get();
-  int *childl = d_childl.data().get();
-  int *massl  = d_massl.data().get();
+  int *startl = d_startl.data();
+  int *childl = d_childl.data();
+  int *massl  = d_massl.data();
 
-  float *maxxl = d_maxxl.data().get();
-  float *maxyl = d_maxyl.data().get();
-  float *minxl = d_minxl.data().get();
-  float *minyl = d_minyl.data().get();
+  float *maxxl = d_maxxl.data();
+  float *maxyl = d_maxyl.data();
+  float *minxl = d_minxl.data();
+  float *minyl = d_minyl.data();
 
   // SummarizationKernel
-  rmm::device_vector<int> d_countl(nnodes + 1, 0);
-  int *countl = d_countl.data().get();
+  rmm::device_uvector<int> d_countl(nnodes + 1, stream);
+  int *countl = d_countl.data();
 
   // SortKernel
-  rmm::device_vector<int> d_sortl(nnodes + 1, 0);
-  int *sortl = d_sortl.data().get();
+  rmm::device_uvector<int> d_sortl(nnodes + 1, stream);
+  int *sortl = d_sortl.data();
 
   // RepulsionKernel
-  rmm::device_vector<float> d_rep_forces((nnodes + 1) * 2, 0);
-  float *rep_forces = d_rep_forces.data().get();
+  rmm::device_uvector<float> d_rep_forces((nnodes + 1) * 2, stream);
+  float *rep_forces = d_rep_forces.data();
 
-  rmm::device_vector<float> d_radius_squared(1, 0);
-  float *radiusd_squared = d_radius_squared.data().get();
+  rmm::device_uvector<float> d_radius_squared(1, stream);
+  float *radiusd_squared = d_radius_squared.data();
 
-  rmm::device_vector<float> d_nodes_pos((nnodes + 1) * 2, 0);
-  float *nodes_pos = d_nodes_pos.data().get();
+  rmm::device_uvector<float> d_nodes_pos((nnodes + 1) * 2, stream);
+  float *nodes_pos = d_nodes_pos.data();
 
   // Initialize positions with random values
   int random_state = 0;
 
   // Copy start x and y positions.
   if (x_start && y_start) {
-    copy(n, x_start, nodes_pos);
-    copy(n, y_start, nodes_pos + nnodes + 1);
+    raft::copy(nodes_pos, x_start, n, stream);
+    raft::copy(nodes_pos + nnodes + 1, y_start, n, stream);
   } else {
     random_vector(nodes_pos, (nnodes + 1) * 2, random_state, stream);
   }
@@ -138,15 +141,15 @@ void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
   float *swinging{nullptr};
   float *traction{nullptr};
 
-  rmm::device_vector<float> d_attract(n * 2, 0);
-  rmm::device_vector<float> d_old_forces(n * 2, 0);
-  rmm::device_vector<float> d_swinging(n, 0);
-  rmm::device_vector<float> d_traction(n, 0);
+  rmm::device_uvector<float> d_attract(n * 2, stream);
+  rmm::device_uvector<float> d_old_forces(n * 2, stream);
+  rmm::device_uvector<float> d_swinging(n, stream);
+  rmm::device_uvector<float> d_traction(n, stream);
 
-  attract    = d_attract.data().get();
-  old_forces = d_old_forces.data().get();
-  swinging   = d_swinging.data().get();
-  traction   = d_traction.data().get();
+  attract    = d_attract.data();
+  old_forces = d_old_forces.data();
+  swinging   = d_swinging.data();
+  traction   = d_traction.data();
 
   // Sort COO for coalesced memory access.
   sort(graph, stream);
@@ -191,10 +194,11 @@ void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
 
   for (int iter = 0; iter < max_iter; ++iter) {
     // Reset force values
-    fill((nnodes + 1) * 2, rep_forces, 0.f);
-    fill(n * 2, attract, 0.f);
-    fill(n, swinging, 0.f);
-    fill(n, traction, 0.f);
+    thrust::fill(
+      rmm::exec_policy(stream)->on(stream), d_rep_forces.begin(), d_rep_forces.end(), 0.f);
+    thrust::fill(rmm::exec_policy(stream)->on(stream), d_attract.begin(), d_attract.end(), 0.f);
+    thrust::fill(rmm::exec_policy(stream)->on(stream), d_swinging.begin(), d_swinging.end(), 0.f);
+    thrust::fill(rmm::exec_policy(stream)->on(stream), d_traction.begin(), d_traction.end(), 0.f);
 
     ResetKernel<<<1, 1, 0, stream>>>(radiusd_squared, bottomd, NNODES, radiusd);
     CHECK_CUDA(stream);
@@ -320,15 +324,15 @@ void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
     if (callback) callback->on_epoch_end(nodes_pos);
 
     if (verbose) {
-      printf("iteration %i, speed: %f, speed_efficiency: %f, ", iter + 1, speed, speed_efficiency);
-      printf("jt: %f, ", jt);
-      printf("swinging: %f, traction: %f\n", s, t);
+      std::cout << "iteration: " << iter + 1 << ", speed: " << speed
+                << ", speed_efficiency: " << speed_efficiency << ", jt: " << jt
+                << ", swinging: " << s << ", traction: " << t << "\n";
     }
   }
 
   // Copy nodes positions into final output pos
-  copy(n, nodes_pos, pos);
-  copy(n, nodes_pos + nnodes + 1, pos + n);
+  raft::copy(pos, nodes_pos, n, stream);
+  raft::copy(pos + n, nodes_pos + nnodes + 1, n, stream);
 
   if (callback) callback->on_train_end(nodes_pos);
 }
diff --git a/cpp/src/layout/exact_fa2.hpp b/cpp/src/layout/exact_fa2.hpp
index abad5a5630f..d34f8843e9b 100644
--- a/cpp/src/layout/exact_fa2.hpp
+++ b/cpp/src/layout/exact_fa2.hpp
@@ -17,12 +17,13 @@
 #pragma once
 
 #include <rmm/thrust_rmm_allocator.h>
-#include <cugraph/utilities/error.hpp>
+#include <rmm/device_uvector.hpp>
 
-#include <stdio.h>
 #include <converters/COOtoCSR.cuh>
+
 #include <cugraph/graph.hpp>
 #include <cugraph/internals.hpp>
+#include <cugraph/utilities/error.hpp>
 
 #include "exact_repulsion.hpp"
 #include "fa2_kernels.hpp"
@@ -32,7 +33,8 @@ namespace cugraph {
 namespace detail {
 
 template <typename vertex_t, typename edge_t, typename weight_t>
-void exact_fa2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
+void exact_fa2(raft::handle_t const &handle,
+               GraphCOOView<vertex_t, edge_t, weight_t> &graph,
                float *pos,
                const int max_iter                            = 500,
                float *x_start                                = nullptr,
@@ -48,7 +50,7 @@ void exact_fa2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
                bool verbose                                  = false,
                internals::GraphBasedDimRedCallback *callback = nullptr)
 {
-  cudaStream_t stream = {nullptr};
+  cudaStream_t stream = handle.get_stream();
   const edge_t e      = graph.number_of_edges;
   const vertex_t n    = graph.number_of_vertices;
 
@@ -59,27 +61,28 @@ void exact_fa2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
   float *d_swinging{nullptr};
   float *d_traction{nullptr};
 
-  rmm::device_vector<float> repel(n * 2, 0);
-  rmm::device_vector<float> attract(n * 2, 0);
-  rmm::device_vector<float> old_forces(n * 2, 0);
+  rmm::device_uvector<float> repel(n * 2, stream);
+  rmm::device_uvector<float> attract(n * 2, stream);
+  rmm::device_uvector<float> old_forces(n * 2, stream);
   // FA2 requires degree + 1.
-  rmm::device_vector<int> mass(n, 1);
-  rmm::device_vector<float> swinging(n, 0);
-  rmm::device_vector<float> traction(n, 0);
-
-  d_repel      = repel.data().get();
-  d_attract    = attract.data().get();
-  d_old_forces = old_forces.data().get();
-  d_mass       = mass.data().get();
-  d_swinging   = swinging.data().get();
-  d_traction   = traction.data().get();
+  rmm::device_uvector<int> mass(n, stream);
+  thrust::fill(rmm::exec_policy(stream)->on(stream), mass.begin(), mass.end(), 1.f);
+  rmm::device_uvector<float> swinging(n, stream);
+  rmm::device_uvector<float> traction(n, stream);
+
+  d_repel      = repel.data();
+  d_attract    = attract.data();
+  d_old_forces = old_forces.data();
+  d_mass       = mass.data();
+  d_swinging   = swinging.data();
+  d_traction   = traction.data();
 
   int random_state = 0;
   random_vector(pos, n * 2, random_state, stream);
 
   if (x_start && y_start) {
-    copy(n, x_start, pos);
-    copy(n, y_start, pos + n);
+    raft::copy(pos, x_start, n, stream);
+    raft::copy(pos + n, y_start, n, stream);
   }
 
   // Sort COO for coalesced memory access.
@@ -110,10 +113,10 @@ void exact_fa2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
 
   for (int iter = 0; iter < max_iter; ++iter) {
     // Reset force arrays
-    fill(n * 2, d_repel, 0.f);
-    fill(n * 2, d_attract, 0.f);
-    fill(n, d_swinging, 0.f);
-    fill(n, d_traction, 0.f);
+    thrust::fill(rmm::exec_policy(stream)->on(stream), repel.begin(), repel.end(), 0.f);
+    thrust::fill(rmm::exec_policy(stream)->on(stream), attract.begin(), attract.end(), 0.f);
+    thrust::fill(rmm::exec_policy(stream)->on(stream), swinging.begin(), swinging.end(), 0.f);
+    thrust::fill(rmm::exec_policy(stream)->on(stream), traction.begin(), traction.end(), 0.f);
 
     // Exact repulsion
     apply_repulsion<vertex_t>(pos, pos + n, d_repel, d_repel + n, d_mass, scaling_ratio, n, stream);
@@ -180,9 +183,9 @@ void exact_fa2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
     if (callback) callback->on_epoch_end(pos);
 
     if (verbose) {
-      printf("iteration %i, speed: %f, speed_efficiency: %f, ", iter + 1, speed, speed_efficiency);
-      printf("jt: %f, ", jt);
-      printf("swinging: %f, traction: %f\n", s, t);
+      std::cout << "iteration: " << iter + 1 << ", speed: " << speed
+                << ", speed_efficiency: " << speed_efficiency << ", jt: " << jt
+                << ", swinging: " << s << ", traction: " << t << "\n";
     }
   }
 
diff --git a/cpp/src/layout/force_atlas2.cu b/cpp/src/layout/force_atlas2.cu
index 6da9b77b45d..86c95cc883e 100644
--- a/cpp/src/layout/force_atlas2.cu
+++ b/cpp/src/layout/force_atlas2.cu
@@ -20,7 +20,8 @@
 namespace cugraph {
 
 template <typename vertex_t, typename edge_t, typename weight_t>
-void force_atlas2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
+void force_atlas2(raft::handle_t const &handle,
+                  GraphCOOView<vertex_t, edge_t, weight_t> &graph,
                   float *pos,
                   const int max_iter,
                   float *x_start,
@@ -42,7 +43,8 @@ void force_atlas2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
   CUGRAPH_EXPECTS(graph.number_of_vertices != 0, "Invalid input: Graph is empty");
 
   if (!barnes_hut_optimize) {
-    cugraph::detail::exact_fa2<vertex_t, edge_t, weight_t>(graph,
+    cugraph::detail::exact_fa2<vertex_t, edge_t, weight_t>(handle,
+                                                           graph,
                                                            pos,
                                                            max_iter,
                                                            x_start,
@@ -58,7 +60,8 @@ void force_atlas2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
                                                            verbose,
                                                            callback);
   } else {
-    cugraph::detail::barnes_hut<vertex_t, edge_t, weight_t>(graph,
+    cugraph::detail::barnes_hut<vertex_t, edge_t, weight_t>(handle,
+                                                            graph,
                                                             pos,
                                                             max_iter,
                                                             x_start,
@@ -77,7 +80,8 @@ void force_atlas2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
   }
 }
 
-template void force_atlas2<int, int, float>(GraphCOOView<int, int, float> &graph,
+template void force_atlas2<int, int, float>(raft::handle_t const &handle,
+                                            GraphCOOView<int, int, float> &graph,
                                             float *pos,
                                             const int max_iter,
                                             float *x_start,
@@ -95,7 +99,8 @@ template void force_atlas2<int, int, float>(GraphCOOView<int, int, float> &graph
                                             bool verbose,
                                             internals::GraphBasedDimRedCallback *callback);
 
-template void force_atlas2<int, int, double>(GraphCOOView<int, int, double> &graph,
+template void force_atlas2<int, int, double>(raft::handle_t const &handle,
+                                             GraphCOOView<int, int, double> &graph,
                                              float *pos,
                                              const int max_iter,
                                              float *x_start,
diff --git a/cpp/tests/layout/force_atlas2_test.cu b/cpp/tests/layout/force_atlas2_test.cu
index c22c256ae02..f2f5561a7d8 100644
--- a/cpp/tests/layout/force_atlas2_test.cu
+++ b/cpp/tests/layout/force_atlas2_test.cu
@@ -111,9 +111,10 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam<Force_Atlas2_Usecase>
     std::vector<std::vector<int>> adj_matrix(m, std::vector<int>(m));
     std::vector<float> force_atlas2(m * 2);
 
+    raft::handle_t const handle;
+    auto stream = handle.get_stream();
     // device alloc
-    rmm::device_vector<float> force_atlas2_vector(m * 2);
-    float* d_force_atlas2 = force_atlas2_vector.data().get();
+    rmm::device_uvector<float> pos(m * 2, stream);
 
     // Read
     ASSERT_EQ((cugraph::test::mm_to_coo<int, T>(
@@ -131,13 +132,13 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam<Force_Atlas2_Usecase>
     }
 
     // Allocate COO on device
-    rmm::device_vector<int> srcs_v(nnz);
-    rmm::device_vector<int> dests_v(nnz);
-    rmm::device_vector<T> weights_v(nnz);
+    rmm::device_uvector<int> srcs_v(nnz, stream);
+    rmm::device_uvector<int> dests_v(nnz, stream);
+    rmm::device_uvector<T> weights_v(nnz, stream);
 
-    int* srcs  = srcs_v.data().get();
-    int* dests = dests_v.data().get();
-    T* weights = weights_v.data().get();
+    int* srcs  = srcs_v.data();
+    int* dests = dests_v.data();
+    T* weights = weights_v.data();
 
     // FIXME: RAFT error handling mechanism should be used instead
     CUDA_TRY(cudaMemcpy(srcs, &cooRowInd[0], sizeof(int) * nnz, cudaMemcpyDefault));
@@ -163,8 +164,9 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam<Force_Atlas2_Usecase>
     if (PERF) {
       hr_clock.start();
       for (int i = 0; i < PERF_MULTIPLIER; ++i) {
-        cugraph::force_atlas2<int, int, T>(G,
-                                           d_force_atlas2,
+        cugraph::force_atlas2<int, int, T>(handle,
+                                           G,
+                                           pos.data(),
                                            max_iter,
                                            x_start,
                                            y_start,
@@ -185,8 +187,9 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam<Force_Atlas2_Usecase>
       force_atlas2_time.push_back(time_tmp);
     } else {
       cudaProfilerStart();
-      cugraph::force_atlas2<int, int, T>(G,
-                                         d_force_atlas2,
+      cugraph::force_atlas2<int, int, T>(handle,
+                                         G,
+                                         pos.data(),
                                          max_iter,
                                          x_start,
                                          y_start,
@@ -207,7 +210,7 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam<Force_Atlas2_Usecase>
 
     // Copy pos to host
     std::vector<float> h_pos(m * 2);
-    CUDA_TRY(cudaMemcpy(&h_pos[0], d_force_atlas2, sizeof(float) * m * 2, cudaMemcpyDeviceToHost));
+    CUDA_TRY(cudaMemcpy(&h_pos[0], pos.data(), sizeof(float) * m * 2, cudaMemcpyDeviceToHost));
 
     // Transpose the data
     std::vector<std::vector<double>> C_contiguous_embedding(m, std::vector<double>(2));
diff --git a/python/cugraph/layout/force_atlas2.pxd b/python/cugraph/layout/force_atlas2.pxd
index bf5186c91f9..5496d1b655e 100644
--- a/python/cugraph/layout/force_atlas2.pxd
+++ b/python/cugraph/layout/force_atlas2.pxd
@@ -25,6 +25,7 @@ cdef extern from "cugraph/internals.hpp" namespace "cugraph::internals":
 cdef extern from "cugraph/algorithms.hpp" namespace "cugraph":
 
     cdef void force_atlas2[vertex_t, edge_t, weight_t](
+        const handle_t &handle,
         GraphCOOView[vertex_t, edge_t, weight_t] &graph,
         float *pos,
         const int max_iter,
diff --git a/python/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/layout/force_atlas2_wrapper.pyx
index 7b801d19f1c..1644875f034 100644
--- a/python/cugraph/layout/force_atlas2_wrapper.pyx
+++ b/python/cugraph/layout/force_atlas2_wrapper.pyx
@@ -49,6 +49,10 @@ def force_atlas2(input_graph,
     Call force_atlas2
     """
 
+    cdef unique_ptr[handle_t] handle_ptr
+    handle_ptr.reset(new handle_t())
+    handle_ = handle_ptr.get();
+
     if not input_graph.edgelist:
         input_graph.view_edge_list()
 
@@ -61,12 +65,19 @@ def force_atlas2(input_graph,
     df = cudf.DataFrame()
     df['vertex'] = cudf.Series(np.arange(num_verts, dtype=np.int32))
 
-    cdef uintptr_t c_src_indices = input_graph.edgelist.edgelist_df['src'].__cuda_array_interface__['data'][0]
-    cdef uintptr_t c_dst_indices = input_graph.edgelist.edgelist_df['dst'].__cuda_array_interface__['data'][0]
+    src = input_graph.edgelist.edgelist_df['src']
+    dst = input_graph.edgelist.edgelist_df['dst']
+
+    [src, dst] = graph_primtypes_wrapper.datatype_cast([src, dst], [np.int32])
+
+    cdef uintptr_t c_src_indices = src.__cuda_array_interface__['data'][0]
+    cdef uintptr_t c_dst_indices = dst.__cuda_array_interface__['data'][0]
     cdef uintptr_t c_weights = <uintptr_t> NULL
 
     if input_graph.edgelist.weights:
-        c_weights = input_graph.edgelist.edgelist_df['weights'].__cuda_array_interface__['data'][0]
+        weights = input_graph.edgelist.edgelist_df["weights"]
+        [weights] = graph_primtypes_wrapper.datatype_cast([weights], [np.float32, np.float64])
+        c_weights = weights.__cuda_array_interface__['data'][0]
 
     cdef uintptr_t x_start = <uintptr_t>NULL
     cdef uintptr_t y_start = <uintptr_t>NULL
@@ -100,7 +111,8 @@ def force_atlas2(input_graph,
         graph_double = GraphCOOView[int,int, double](<int*>c_src_indices,
                         <int*>c_dst_indices, <double*>c_weights, num_verts, num_edges)
 
-        c_force_atlas2[int, int, double](graph_double,
+        c_force_atlas2[int, int, double](handle_[0],
+                        graph_double,
                         <float*>pos_ptr,
                         <int>max_iter,
                         <float*>x_start,
@@ -121,7 +133,8 @@ def force_atlas2(input_graph,
         graph_float = GraphCOOView[int,int,float](<int*>c_src_indices,
                 <int*>c_dst_indices, <float*>c_weights, num_verts,
                 num_edges)
-        c_force_atlas2[int, int, float](graph_float,
+        c_force_atlas2[int, int, float](handle_[0],
+                graph_float,
                 <float*>pos_ptr,
                 <int>max_iter,
                 <float*>x_start,