Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Sparse semiring cleanup + hash table and batching strategies #207

Closed
Closed
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
5b28699
moving sparse dist optim to raft
divyegala Mar 18, 2021
8e841ac
bucketing bloom filter
divyegala Mar 24, 2021
efcf972
trying stuff
divyegala Mar 26, 2021
8f5cc26
dropping tpb as template in strategies
divyegala Mar 29, 2021
78aff5a
New distances
cjnolet Apr 1, 2021
9aa58f2
Uncommenting hash strategy
cjnolet Apr 1, 2021
cf57c9f
Udating hash strategy
cjnolet Apr 1, 2021
530dd87
Updating to add hash table and bloom strategies
cjnolet Apr 1, 2021
62b3df4
Updates
cjnolet Apr 1, 2021
28d6e2d
More updates
cjnolet Apr 2, 2021
c79ee7a
Updating hash strategy
cjnolet Apr 2, 2021
e5d93dd
Merge branch 'branch-0.19' into HEAD
cjnolet Apr 2, 2021
628b169
Updates
cjnolet Apr 7, 2021
fc708a9
Merge branch 'branch-0.19' into fea-020-sparse_spmv_optim
cjnolet Apr 7, 2021
d9ecc40
trying to merge chunking bug fixes
divyegala Apr 7, 2021
d299cb8
correcting int to value_idx
divyegala Apr 7, 2021
900ad8c
correction to expansion and start index
divyegala Apr 7, 2021
634bc99
Adding correlation distance
cjnolet Apr 7, 2021
82cdbe1
Baseline
cjnolet Apr 9, 2021
4e819c0
Enabling baseline
cjnolet Apr 9, 2021
6bd450f
Enabling optimized primitive
cjnolet Apr 9, 2021
f1c1d06
Fixing style
cjnolet Apr 19, 2021
219a813
Cleanup
cjnolet Apr 19, 2021
0fee984
Style
cjnolet Apr 19, 2021
b7105d7
Removing unecessary deltas
cjnolet Apr 20, 2021
e264514
Updating distances_config_t to use handle directly
cjnolet Apr 20, 2021
65f82bd
Merge branch 'branch-0.20' into semiring_primitives_optim_final
cjnolet May 4, 2021
2975fe3
Adding tests for newer distances
cjnolet May 4, 2021
0d064cc
NOrmalizing
cjnolet May 20, 2021
2057985
Separating new distances from optimizations
cjnolet May 21, 2021
3a669f5
Fixing style
cjnolet May 21, 2021
a415ebf
Merge branch 'branch-21.06' into semiring_primitives_optim_final
cjnolet May 21, 2021
99b4e14
Trying to get cuco working
cjnolet May 21, 2021
46c7d10
Removing dependencies.cmake
cjnolet May 21, 2021
ef9efa7
Raft is building all gpu archs. Checking this in the meantime
cjnolet May 21, 2021
a9d3608
changing cuo to dev branch
divyegala May 26, 2021
d613a77
working through build
divyegala Jun 7, 2021
2394084
fixing build
divyegala Jun 7, 2021
69ff625
tests passing for all strategies
divyegala Jun 8, 2021
7739d1f
integrating cuco changes and some small refactors
divyegala Jun 9, 2021
1098d89
merging upstream
divyegala Jun 9, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,9 @@ set(RAFT_INCLUDE_DIRECTORIES
${RAFT_INCLUDE_DIR}
${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
${FAISS_INCLUDE_DIRS}
${RMM_INCLUDE_DIRS})
${RMM_INCLUDE_DIRS}
${CUDA_CXX_DIR}/include
${CUCO_DIR}/include)

if(NOT CUB_IS_PART_OF_CTK)
list(APPEND RAFT_INCLUDE_DIRECTORIES ${CUB_DIR}/src/cub)
Expand Down Expand Up @@ -290,7 +292,6 @@ if(BUILD_RAFT_TESTS)
test/sparse/csr_transpose.cu
test/sparse/degree.cu
test/sparse/dist_coo_spmv.cu
test/sparse/dist_csr_spmv.cu
test/sparse/distance.cu
test/sparse/filter.cu
test/sparse/knn.cu
Expand Down
32 changes: 32 additions & 0 deletions cpp/cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,36 @@

include(ExternalProject)

##############################################################################
# - libcudacxx

set(CUDA_CXX_DIR ${CMAKE_CURRENT_BINARY_DIR}/libcudacxx CACHE STRING "Path to libcudacxx repo")

ExternalProject_Add(libcudacxx
GIT_REPOSITORY https://github.com/NVIDIA/libcudacxx.git
GIT_TAG main
PREFIX ${CUDA_CXX_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND "")

set(CUDA_CXX_DIR ${CUDA_CXX_DIR}/src/libcudacxx/)

##############################################################################
# - cucollections - (header only) -----------------------------------------------------

set(CUCO_DIR ${CMAKE_CURRENT_BINARY_DIR}/cuCollections CACHE STRING "Path to cuCollections repo")

ExternalProject_Add(cuCollections
GIT_REPOSITORY https://github.com/divyegala/cuCollections.git
GIT_TAG view-initialization-ctor
PREFIX ${CUCO_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND "")

set(CUCO_DIR ${CUCO_DIR}/src/cuCollections/)

##############################################################################
# - cub - (header only) ------------------------------------------------------

Expand Down Expand Up @@ -108,4 +138,6 @@ endif(BUILD_GTEST)
if(NOT CUB_IS_PART_OF_CTK)
add_dependencies(GTest::GTest cub)
endif(NOT CUB_IS_PART_OF_CTK)
add_dependencies(cuCollections libcudacxx)
add_dependencies(GTest::GTest cuCollections)
add_dependencies(FAISS::FAISS faiss)
8 changes: 4 additions & 4 deletions cpp/include/raft/cudart_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,12 +267,12 @@ void allocate(Type*& ptr, size_t len, bool setZero = false) {
}

/** helper method to get max usable shared mem per block parameter */
inline int getSharedMemPerBlock() {
__host__ __device__ inline int getSharedMemPerBlock() {
int devId;
CUDA_CHECK(cudaGetDevice(&devId));
cudaGetDevice(&devId);
int smemPerBlk;
CUDA_CHECK(cudaDeviceGetAttribute(&smemPerBlk,
cudaDevAttrMaxSharedMemoryPerBlock, devId));
cudaDeviceGetAttribute(&smemPerBlk, cudaDevAttrMaxSharedMemoryPerBlock,
devId);
return smemPerBlk;
}

Expand Down
10 changes: 8 additions & 2 deletions cpp/include/raft/linalg/distance_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,16 @@ enum DistanceType : unsigned short {
Haversine = 13,
/** Bray-Curtis distance **/
BrayCurtis = 14,
/** Jensen-Shannon distance **/
/** Jensen-Shannon distance**/
JensenShannon = 15,
/** Hamming distance **/
HammingUnexpanded = 16,
/** KLDivergence **/
KLDivergence = 17,
/** RusselRao **/
RusselRaoExpanded = 18,
/** Dice-Sorensen distance **/
DiceExpanded = 16,
DiceExpanded = 19,
/** Precomputed (special value) **/
Precomputed = 100
};
Expand Down
21 changes: 12 additions & 9 deletions cpp/include/raft/sparse/distance/bin_distance.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ void compute_bin_distance(value_t *out, const value_idx *Q_coo_rows,
const value_t *Q_data, value_idx Q_nnz,
const value_idx *R_coo_rows, const value_t *R_data,
value_idx R_nnz, value_idx m, value_idx n,
cusparseHandle_t handle,
std::shared_ptr<raft::mr::device::allocator> alloc,
cudaStream_t stream, expansion_f expansion_func) {
raft::mr::device::buffer<value_t> Q_norms(alloc, stream, m);
Expand Down Expand Up @@ -114,7 +113,8 @@ class jaccard_expanded_distances_t : public distances_t<value_t> {
explicit jaccard_expanded_distances_t(
const distances_config_t<value_idx, value_t> &config)
: config_(&config),
workspace(config.allocator, config.stream, 0),
workspace(config.handle.get_device_allocator(),
config.handle.get_stream(), 0),
ip_dists(config) {}

void compute(value_t *out_dists) {
Expand All @@ -124,15 +124,16 @@ class jaccard_expanded_distances_t : public distances_t<value_t> {
value_t *b_data = ip_dists.b_data_coo();

raft::mr::device::buffer<value_idx> search_coo_rows(
config_->allocator, config_->stream, config_->a_nnz);
config_->handle.get_device_allocator(), config_->handle.get_stream(),
config_->a_nnz);
raft::sparse::convert::csr_to_coo(config_->a_indptr, config_->a_nrows,
search_coo_rows.data(), config_->a_nnz,
config_->stream);
config_->handle.get_stream());

compute_bin_distance(
out_dists, search_coo_rows.data(), config_->a_data, config_->a_nnz,
b_indices, b_data, config_->b_nnz, config_->a_nrows, config_->b_nrows,
config_->handle, config_->allocator, config_->stream,
config_->handle.get_device_allocator(), config_->handle.get_stream(),
[] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) {
value_t q_r_union = q_norm + r_norm;
value_t denom = q_r_union - dot;
Expand Down Expand Up @@ -163,7 +164,8 @@ class dice_expanded_distances_t : public distances_t<value_t> {
explicit dice_expanded_distances_t(
const distances_config_t<value_idx, value_t> &config)
: config_(&config),
workspace(config.allocator, config.stream, 0),
workspace(config.handle.get_device_allocator(),
config.handle.get_stream(), 0),
ip_dists(config) {}

void compute(value_t *out_dists) {
Expand All @@ -173,15 +175,16 @@ class dice_expanded_distances_t : public distances_t<value_t> {
value_t *b_data = ip_dists.b_data_coo();

raft::mr::device::buffer<value_idx> search_coo_rows(
config_->allocator, config_->stream, config_->a_nnz);
config_->handle.get_device_allocator(), config_->handle.get_stream(),
config_->a_nnz);
raft::sparse::convert::csr_to_coo(config_->a_indptr, config_->a_nrows,
search_coo_rows.data(), config_->a_nnz,
config_->stream);
config_->handle.get_stream());

compute_bin_distance(
out_dists, search_coo_rows.data(), config_->a_data, config_->a_nnz,
b_indices, b_data, config_->b_nnz, config_->a_nrows, config_->b_nrows,
config_->handle, config_->allocator, config_->stream,
config_->handle.get_device_allocator(), config_->handle.get_stream(),
[] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) {
value_t q_r_union = q_norm + r_norm;
value_t dice = (2 * dot) / q_r_union;
Expand Down
10 changes: 4 additions & 6 deletions cpp/include/raft/sparse/distance/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@

#pragma once

#include <cusparse_v2.h>
#include <raft/mr/device/allocator.hpp>
#include <raft/handle.hpp>

namespace raft {
namespace sparse {
namespace distance {

template <typename value_idx, typename value_t>
struct distances_config_t {
distances_config_t(raft::handle_t &handle_) : handle(handle_) {}

// left side
value_idx a_nrows;
value_idx a_ncols;
Expand All @@ -41,10 +42,7 @@ struct distances_config_t {
value_idx *b_indices;
value_t *b_data;

cusparseHandle_t handle;

std::shared_ptr<raft::mr::device::allocator> allocator;
cudaStream_t stream;
raft::handle_t &handle;
};

template <typename value_t>
Expand Down
Loading