Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Replace UMAP functionality dependency on nvgraph with RAFT Spectral Clustering #2500

Merged
merged 12 commits into from
Jul 17, 2020
Merged
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- PR #2394: Adding cosine & correlation distance for KNN
- PR #2392: PCA can accept sparse inputs, and sparse prim for computing covariance
- PR #2465: Support pandas 1.0+
- PR #2500: Replace UMAP functionality dependency on nvgraph with RAFT Spectral Clustering

## Improvements
- PR #2336: Eliminate `rmm.device_array` usage
Expand Down
7 changes: 4 additions & 3 deletions cpp/cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,16 @@ else(DEFINED ENV{RAFT_PATH})
set(RAFT_DIR ${CMAKE_CURRENT_BINARY_DIR}/raft CACHE STRING "Path to RAFT repo")

ExternalProject_Add(raft
GIT_REPOSITORY https://github.com/rapidsai/raft.git
GIT_TAG b58f97f2b5382a633e43daec31b26adf52e19a3b
GIT_REPOSITORY https://github.com/aschaffer/raft.git
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
GIT_TAG 28e9d4adfe604bac89c825d27cc5a545417d535f
PREFIX ${RAFT_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND "")

# Redefining RAFT_DIR so it coincides with the one inferred by env variable.
set(RAFT_DIR ${RAFT_DIR}/src/raft/ CACHE STRING "Path to RAFT repo")
#set(RAFT_DIR ${RAFT_DIR}/src/raft/ CACHE STRING "Path to RAFT repo")
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
set(RAFT_DIR ${RAFT_DIR}/src/raft/)
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
endif(DEFINED ENV{RAFT_PATH})


Expand Down
66 changes: 34 additions & 32 deletions cpp/src_prims/sparse/spectral.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include "coo.cuh"
#include "cusparse_wrappers.h"

#include <raft/spectral/partition.hpp>

namespace MLCommon {
namespace Spectral {

Expand Down Expand Up @@ -147,53 +149,53 @@ void fit_embedding(cusparseHandle_t handle, int *rows, int *cols, T *vals,
int nnz, int n, int n_components, T *out,
std::shared_ptr<deviceAllocator> d_alloc,
cudaStream_t stream) {
nvgraphHandle_t grapHandle;
cudaDataType_t edge_dimT = CUDA_R_32F;
NVGRAPH_CHECK(nvgraphCreate(&grapHandle));

device_buffer<int> src_offsets(d_alloc, stream, n + 1);
device_buffer<int> dst_cols(d_alloc, stream, nnz);
device_buffer<T> dst_vals(d_alloc, stream, nnz);
coo2csr(handle, rows, cols, vals, nnz, n, src_offsets.data(), dst_cols.data(),
dst_vals.data(), d_alloc, stream);

nvgraphCSRTopology32I_st CSR_input;
CSR_input.destination_indices = dst_cols.data();
CSR_input.nedges = nnz;
CSR_input.nvertices = n;
CSR_input.source_offsets = src_offsets.data();

int weight_index = 0;

device_buffer<T> eigVals(d_alloc, stream, n_components + 1);
device_buffer<T> eigVecs(d_alloc, stream, n * (n_components + 1));
device_buffer<int> labels(d_alloc, stream, n);

CUDA_CHECK(cudaStreamSynchronize(stream));
//raft spectral clustering:
//
using index_type = int;
using value_type = T;

// Spectral clustering parameters
struct SpectralClusteringParameter clustering_params;
clustering_params.n_clusters = n_components + 1;
clustering_params.n_eig_vects = n_components + 1;
clustering_params.algorithm = NVGRAPH_BALANCED_CUT_LANCZOS;
clustering_params.evs_tolerance = 0.01f;
clustering_params.evs_max_iter = 0;
clustering_params.kmean_tolerance = 0.0f;
clustering_params.kmean_max_iter = 1;
raft::handle_t r_handle;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@divyegala JFYI. Assuming this PR goes first before yours, we'll have to update this logic to use the raft handle that comes inside our cumlHandle.

r_handle.set_stream(stream);

nvgraphGraphDescr_t graph;
NVGRAPH_CHECK(nvgraphCreateGraphDescr(grapHandle, &graph));
NVGRAPH_CHECK(nvgraphSetGraphStructure(grapHandle, graph, (void *)&CSR_input,
NVGRAPH_CSR_32));
NVGRAPH_CHECK(nvgraphAllocateEdgeData(grapHandle, graph, 1, &edge_dimT));
NVGRAPH_CHECK(nvgraphSetEdgeData(grapHandle, graph, (void *)vals, 0));
//TODO: r_handle to be passed as argument;
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
//this will be fixed in a separate refactoring PR;

index_type *ro = src_offsets.data();
index_type *ci = dst_cols.data();
value_type *vs = dst_vals.data();

raft::matrix::sparse_matrix_t<index_type, value_type> const r_csr_m{
r_handle, ro, ci, vs, n, nnz};

index_type neigvs = n_components + 1;
index_type maxiter = 4000; //default reset value (when set to 0);
value_type tol = 0.01;
index_type restart_iter = 15 + neigvs; //what cugraph is using
auto t_exe_p = thrust::cuda::par.on(stream);

raft::eigen_solver_config_t<index_type, value_type> cfg{neigvs, maxiter,
restart_iter, tol};

raft::lanczos_solver_t<index_type, value_type> eig_solver{cfg};

NVGRAPH_CHECK(nvgraphSpectralClustering(grapHandle, graph, weight_index,
&clustering_params, labels.data(),
eigVals.data(), eigVecs.data()));
raft::cluster_solver_config_t<index_type, value_type> clust_cfg{
aschaffer marked this conversation as resolved.
Show resolved Hide resolved
n_components + 1, 1, 0.1}; // kmeans is not really meant to be run, here
raft::kmeans_solver_t<index_type, value_type> cluster_solver{clust_cfg};

NVGRAPH_CHECK(nvgraphDestroyGraphDescr(grapHandle, graph));
NVGRAPH_CHECK(nvgraphDestroy(grapHandle));
raft::spectral::partition(r_handle, t_exe_p, r_csr_m, eig_solver,
cluster_solver, labels.data(), eigVals.data(),
eigVecs.data());

MLCommon::copy<T>(out, eigVecs.data() + n, n * n_components, stream);

Expand Down