Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle int64 in force atlas wrapper and update to uvector #1607

Merged
merged 8 commits into from
May 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cpp/include/cugraph/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ void overlap_list(GraphCSRView<VT, ET, WT> const &graph,
* @tparam weight_t Type of edge weights. Supported values : float
* or double.
*
* @param[in] handle Library handle (RAFT). If a communicator is set in the
* handle, the multi GPU version will be selected.
* @param[in] graph cuGraph graph descriptor, should contain the
* connectivity information as a COO. Graph is considered undirected. Edge weights are used for this
* algorithm and set to 1 by default.
Expand Down Expand Up @@ -178,7 +180,8 @@ void overlap_list(GraphCSRView<VT, ET, WT> const &graph,
*
*/
template <typename vertex_t, typename edge_t, typename weight_t>
void force_atlas2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
void force_atlas2(raft::handle_t const &handle,
GraphCOOView<vertex_t, edge_t, weight_t> &graph,
float *pos,
const int max_iter = 500,
float *x_start = nullptr,
Expand Down
116 changes: 60 additions & 56 deletions cpp/src/layout/barnes_hut.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@
#pragma once

#include <rmm/thrust_rmm_allocator.h>
#include <cugraph/utilities/error.hpp>
#include <rmm/device_uvector.hpp>

#include <stdio.h>
#include <converters/COOtoCSR.cuh>
#include <utilities/graph_utils.cuh>

#include <cugraph/graph.hpp>
#include <cugraph/internals.hpp>
#include <cugraph/utilities/error.hpp>

#include <utilities/graph_utils.cuh>
#include "bh_kernels.hpp"
#include "fa2_kernels.hpp"
#include "utils.hpp"
Expand All @@ -33,7 +34,8 @@ namespace cugraph {
namespace detail {

template <typename vertex_t, typename edge_t, typename weight_t>
void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
void barnes_hut(raft::handle_t const &handle,
GraphCOOView<vertex_t, edge_t, weight_t> &graph,
float *pos,
const int max_iter = 500,
float *x_start = nullptr,
Expand All @@ -50,7 +52,7 @@ void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
bool verbose = false,
internals::GraphBasedDimRedCallback *callback = nullptr)
{
cudaStream_t stream = {nullptr};
cudaStream_t stream = handle.get_stream();
const edge_t e = graph.number_of_edges;
const vertex_t n = graph.number_of_vertices;

Expand All @@ -65,15 +67,15 @@ void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,

// Allocate more space
//---------------------------------------------------
rmm::device_vector<unsigned> d_limiter(1);
rmm::device_vector<int> d_maxdepthd(1);
rmm::device_vector<int> d_bottomd(1);
rmm::device_vector<float> d_radiusd(1);
rmm::device_uvector<unsigned> d_limiter(1, stream);
rmm::device_uvector<int> d_maxdepthd(1, stream);
rmm::device_uvector<int> d_bottomd(1, stream);
rmm::device_uvector<float> d_radiusd(1, stream);

unsigned *limiter = d_limiter.data().get();
int *maxdepthd = d_maxdepthd.data().get();
int *bottomd = d_bottomd.data().get();
float *radiusd = d_radiusd.data().get();
unsigned *limiter = d_limiter.data();
int *maxdepthd = d_maxdepthd.data();
int *bottomd = d_bottomd.data();
float *radiusd = d_radiusd.data();

InitializationKernel<<<1, 1, 0, stream>>>(limiter, maxdepthd, radiusd);
CHECK_CUDA(stream);
Expand All @@ -83,51 +85,52 @@ void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
const float theta_squared = theta * theta;
const int NNODES = nnodes;

rmm::device_vector<int> d_startl(nnodes + 1, 0);
rmm::device_vector<int> d_childl((nnodes + 1) * 4, 0);
rmm::device_uvector<int> d_startl(nnodes + 1, stream);
rmm::device_uvector<int> d_childl((nnodes + 1) * 4, stream);
// FA2 requires degree + 1
rmm::device_vector<int> d_massl(nnodes + 1, 1.f);
rmm::device_uvector<int> d_massl(nnodes + 1, stream);
thrust::fill(rmm::exec_policy(stream)->on(stream), d_massl.begin(), d_massl.end(), 1.f);

rmm::device_vector<float> d_maxxl(blocks * FACTOR1, 0);
rmm::device_vector<float> d_maxyl(blocks * FACTOR1, 0);
rmm::device_vector<float> d_minxl(blocks * FACTOR1, 0);
rmm::device_vector<float> d_minyl(blocks * FACTOR1, 0);
rmm::device_uvector<float> d_maxxl(blocks * FACTOR1, stream);
rmm::device_uvector<float> d_maxyl(blocks * FACTOR1, stream);
rmm::device_uvector<float> d_minxl(blocks * FACTOR1, stream);
rmm::device_uvector<float> d_minyl(blocks * FACTOR1, stream);

// Actual mallocs
int *startl = d_startl.data().get();
int *childl = d_childl.data().get();
int *massl = d_massl.data().get();
int *startl = d_startl.data();
int *childl = d_childl.data();
int *massl = d_massl.data();

float *maxxl = d_maxxl.data().get();
float *maxyl = d_maxyl.data().get();
float *minxl = d_minxl.data().get();
float *minyl = d_minyl.data().get();
float *maxxl = d_maxxl.data();
float *maxyl = d_maxyl.data();
float *minxl = d_minxl.data();
float *minyl = d_minyl.data();

// SummarizationKernel
rmm::device_vector<int> d_countl(nnodes + 1, 0);
int *countl = d_countl.data().get();
rmm::device_uvector<int> d_countl(nnodes + 1, stream);
int *countl = d_countl.data();

// SortKernel
rmm::device_vector<int> d_sortl(nnodes + 1, 0);
int *sortl = d_sortl.data().get();
rmm::device_uvector<int> d_sortl(nnodes + 1, stream);
int *sortl = d_sortl.data();

// RepulsionKernel
rmm::device_vector<float> d_rep_forces((nnodes + 1) * 2, 0);
float *rep_forces = d_rep_forces.data().get();
rmm::device_uvector<float> d_rep_forces((nnodes + 1) * 2, stream);
float *rep_forces = d_rep_forces.data();

rmm::device_vector<float> d_radius_squared(1, 0);
float *radiusd_squared = d_radius_squared.data().get();
rmm::device_uvector<float> d_radius_squared(1, stream);
float *radiusd_squared = d_radius_squared.data();

rmm::device_vector<float> d_nodes_pos((nnodes + 1) * 2, 0);
float *nodes_pos = d_nodes_pos.data().get();
rmm::device_uvector<float> d_nodes_pos((nnodes + 1) * 2, stream);
float *nodes_pos = d_nodes_pos.data();

// Initialize positions with random values
int random_state = 0;

// Copy start x and y positions.
if (x_start && y_start) {
copy(n, x_start, nodes_pos);
copy(n, y_start, nodes_pos + nnodes + 1);
raft::copy(nodes_pos, x_start, n, stream);
raft::copy(nodes_pos + nnodes + 1, y_start, n, stream);
} else {
random_vector(nodes_pos, (nnodes + 1) * 2, random_state, stream);
}
Expand All @@ -138,15 +141,15 @@ void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
float *swinging{nullptr};
float *traction{nullptr};

rmm::device_vector<float> d_attract(n * 2, 0);
rmm::device_vector<float> d_old_forces(n * 2, 0);
rmm::device_vector<float> d_swinging(n, 0);
rmm::device_vector<float> d_traction(n, 0);
rmm::device_uvector<float> d_attract(n * 2, stream);
rmm::device_uvector<float> d_old_forces(n * 2, stream);
rmm::device_uvector<float> d_swinging(n, stream);
rmm::device_uvector<float> d_traction(n, stream);

attract = d_attract.data().get();
old_forces = d_old_forces.data().get();
swinging = d_swinging.data().get();
traction = d_traction.data().get();
attract = d_attract.data();
old_forces = d_old_forces.data();
swinging = d_swinging.data();
traction = d_traction.data();

// Sort COO for coalesced memory access.
sort(graph, stream);
Expand Down Expand Up @@ -191,10 +194,11 @@ void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,

for (int iter = 0; iter < max_iter; ++iter) {
// Reset force values
fill((nnodes + 1) * 2, rep_forces, 0.f);
fill(n * 2, attract, 0.f);
fill(n, swinging, 0.f);
fill(n, traction, 0.f);
thrust::fill(
rmm::exec_policy(stream)->on(stream), d_rep_forces.begin(), d_rep_forces.end(), 0.f);
thrust::fill(rmm::exec_policy(stream)->on(stream), d_attract.begin(), d_attract.end(), 0.f);
thrust::fill(rmm::exec_policy(stream)->on(stream), d_swinging.begin(), d_swinging.end(), 0.f);
thrust::fill(rmm::exec_policy(stream)->on(stream), d_traction.begin(), d_traction.end(), 0.f);

ResetKernel<<<1, 1, 0, stream>>>(radiusd_squared, bottomd, NNODES, radiusd);
CHECK_CUDA(stream);
Expand Down Expand Up @@ -320,15 +324,15 @@ void barnes_hut(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
if (callback) callback->on_epoch_end(nodes_pos);

if (verbose) {
printf("iteration %i, speed: %f, speed_efficiency: %f, ", iter + 1, speed, speed_efficiency);
printf("jt: %f, ", jt);
printf("swinging: %f, traction: %f\n", s, t);
std::cout << "iteration: " << iter + 1 << ", speed: " << speed
<< ", speed_efficiency: " << speed_efficiency << ", jt: " << jt
<< ", swinging: " << s << ", traction: " << t << "\n";
}
}

// Copy nodes positions into final output pos
copy(n, nodes_pos, pos);
copy(n, nodes_pos + nnodes + 1, pos + n);
raft::copy(pos, nodes_pos, n, stream);
raft::copy(pos + n, nodes_pos + nnodes + 1, n, stream);

if (callback) callback->on_train_end(nodes_pos);
}
Expand Down
55 changes: 29 additions & 26 deletions cpp/src/layout/exact_fa2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
#pragma once

#include <rmm/thrust_rmm_allocator.h>
#include <cugraph/utilities/error.hpp>
#include <rmm/device_uvector.hpp>

#include <stdio.h>
#include <converters/COOtoCSR.cuh>

#include <cugraph/graph.hpp>
#include <cugraph/internals.hpp>
#include <cugraph/utilities/error.hpp>

#include "exact_repulsion.hpp"
#include "fa2_kernels.hpp"
Expand All @@ -32,7 +33,8 @@ namespace cugraph {
namespace detail {

template <typename vertex_t, typename edge_t, typename weight_t>
void exact_fa2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
void exact_fa2(raft::handle_t const &handle,
GraphCOOView<vertex_t, edge_t, weight_t> &graph,
float *pos,
const int max_iter = 500,
float *x_start = nullptr,
Expand All @@ -48,7 +50,7 @@ void exact_fa2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
bool verbose = false,
internals::GraphBasedDimRedCallback *callback = nullptr)
{
cudaStream_t stream = {nullptr};
cudaStream_t stream = handle.get_stream();
const edge_t e = graph.number_of_edges;
const vertex_t n = graph.number_of_vertices;

Expand All @@ -59,27 +61,28 @@ void exact_fa2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
float *d_swinging{nullptr};
float *d_traction{nullptr};

rmm::device_vector<float> repel(n * 2, 0);
rmm::device_vector<float> attract(n * 2, 0);
rmm::device_vector<float> old_forces(n * 2, 0);
rmm::device_uvector<float> repel(n * 2, stream);
rmm::device_uvector<float> attract(n * 2, stream);
rmm::device_uvector<float> old_forces(n * 2, stream);
// FA2 requires degree + 1.
rmm::device_vector<int> mass(n, 1);
rmm::device_vector<float> swinging(n, 0);
rmm::device_vector<float> traction(n, 0);

d_repel = repel.data().get();
d_attract = attract.data().get();
d_old_forces = old_forces.data().get();
d_mass = mass.data().get();
d_swinging = swinging.data().get();
d_traction = traction.data().get();
rmm::device_uvector<int> mass(n, stream);
thrust::fill(rmm::exec_policy(stream)->on(stream), mass.begin(), mass.end(), 1.f);
rmm::device_uvector<float> swinging(n, stream);
rmm::device_uvector<float> traction(n, stream);

d_repel = repel.data();
d_attract = attract.data();
d_old_forces = old_forces.data();
d_mass = mass.data();
d_swinging = swinging.data();
d_traction = traction.data();

int random_state = 0;
random_vector(pos, n * 2, random_state, stream);

if (x_start && y_start) {
copy(n, x_start, pos);
copy(n, y_start, pos + n);
raft::copy(pos, x_start, n, stream);
raft::copy(pos + n, y_start, n, stream);
}

// Sort COO for coalesced memory access.
Expand Down Expand Up @@ -110,10 +113,10 @@ void exact_fa2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,

for (int iter = 0; iter < max_iter; ++iter) {
// Reset force arrays
fill(n * 2, d_repel, 0.f);
fill(n * 2, d_attract, 0.f);
fill(n, d_swinging, 0.f);
fill(n, d_traction, 0.f);
thrust::fill(rmm::exec_policy(stream)->on(stream), repel.begin(), repel.end(), 0.f);
thrust::fill(rmm::exec_policy(stream)->on(stream), attract.begin(), attract.end(), 0.f);
thrust::fill(rmm::exec_policy(stream)->on(stream), swinging.begin(), swinging.end(), 0.f);
thrust::fill(rmm::exec_policy(stream)->on(stream), traction.begin(), traction.end(), 0.f);

// Exact repulsion
apply_repulsion<vertex_t>(pos, pos + n, d_repel, d_repel + n, d_mass, scaling_ratio, n, stream);
Expand Down Expand Up @@ -180,9 +183,9 @@ void exact_fa2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
if (callback) callback->on_epoch_end(pos);

if (verbose) {
printf("iteration %i, speed: %f, speed_efficiency: %f, ", iter + 1, speed, speed_efficiency);
printf("jt: %f, ", jt);
printf("swinging: %f, traction: %f\n", s, t);
std::cout << "iteration: " << iter + 1 << ", speed: " << speed
<< ", speed_efficiency: " << speed_efficiency << ", jt: " << jt
<< ", swinging: " << s << ", traction: " << t << "\n";
}
}

Expand Down
15 changes: 10 additions & 5 deletions cpp/src/layout/force_atlas2.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
namespace cugraph {

template <typename vertex_t, typename edge_t, typename weight_t>
void force_atlas2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
void force_atlas2(raft::handle_t const &handle,
GraphCOOView<vertex_t, edge_t, weight_t> &graph,
float *pos,
const int max_iter,
float *x_start,
Expand All @@ -42,7 +43,8 @@ void force_atlas2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
CUGRAPH_EXPECTS(graph.number_of_vertices != 0, "Invalid input: Graph is empty");

if (!barnes_hut_optimize) {
cugraph::detail::exact_fa2<vertex_t, edge_t, weight_t>(graph,
cugraph::detail::exact_fa2<vertex_t, edge_t, weight_t>(handle,
graph,
pos,
max_iter,
x_start,
Expand All @@ -58,7 +60,8 @@ void force_atlas2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
verbose,
callback);
} else {
cugraph::detail::barnes_hut<vertex_t, edge_t, weight_t>(graph,
cugraph::detail::barnes_hut<vertex_t, edge_t, weight_t>(handle,
graph,
pos,
max_iter,
x_start,
Expand All @@ -77,7 +80,8 @@ void force_atlas2(GraphCOOView<vertex_t, edge_t, weight_t> &graph,
}
}

template void force_atlas2<int, int, float>(GraphCOOView<int, int, float> &graph,
template void force_atlas2<int, int, float>(raft::handle_t const &handle,
GraphCOOView<int, int, float> &graph,
float *pos,
const int max_iter,
float *x_start,
Expand All @@ -95,7 +99,8 @@ template void force_atlas2<int, int, float>(GraphCOOView<int, int, float> &graph
bool verbose,
internals::GraphBasedDimRedCallback *callback);

template void force_atlas2<int, int, double>(GraphCOOView<int, int, double> &graph,
template void force_atlas2<int, int, double>(raft::handle_t const &handle,
GraphCOOView<int, int, double> &graph,
float *pos,
const int max_iter,
float *x_start,
Expand Down
Loading