Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Fixing RAFT CI & a few small updates for SLHC Python wrapper #178

Merged
merged 10 commits into from
Mar 24, 2021
7 changes: 4 additions & 3 deletions cpp/include/raft/sparse/hierarchy/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ enum LinkageDistance { PAIRWISE = 0, KNN_GRAPH = 1 };
* @tparam value_t
*/
template <typename value_idx, typename value_t>
struct linkage_output {
class linkage_output {
public:
value_idx m;
value_idx n_clusters;

Expand All @@ -41,8 +42,8 @@ struct linkage_output {
value_idx *children; // size: (m-1, 2)
};

struct linkage_output_int_float : public linkage_output<int, float> {};
struct linkage_output__int64_float : public linkage_output<int64_t, float> {};
class linkage_output_int_float : public linkage_output<int, float> {};
class linkage_output__int64_float : public linkage_output<int64_t, float> {};

}; // namespace hierarchy
}; // namespace raft
3 changes: 0 additions & 3 deletions cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -319,9 +319,6 @@ void extract_flattened_clusters(const raft::handle_t &handle, value_idx *labels,
raft::copy_async(label_roots.data(), children + children_cpy_start,
child_size, stream);

// thrust::device_ptr<value_idx> t_label_roots =
// thrust::device_pointer_cast(label_roots.data());
//
thrust::sort(thrust_policy, label_roots.data(),
label_roots.data() + (child_size), thrust::greater<value_idx>());

Expand Down
2 changes: 0 additions & 2 deletions cpp/include/raft/sparse/hierarchy/detail/mst.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,6 @@ void build_sorted_mst(const raft::handle_t &handle, const value_t *X,
mst_coo = connect_knn_graph<value_idx, value_t>(handle, X, mst_coo, m, n,
color.data());

printf("Edges: %d\n", mst_coo.n_edges);

RAFT_EXPECTS(
mst_coo.n_edges == m - 1,
"MST was not able to connect knn graph in a single iteration.");
Expand Down
2 changes: 0 additions & 2 deletions cpp/include/raft/sparse/mst/detail/mst_solver_inl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,6 @@ MST_solver<vertex_t, edge_t, weight_t>::solve() {
mst_result.dst.resize(mst_result.n_edges, stream);
mst_result.weights.resize(mst_result.n_edges, stream);

// raft::print_device_vector("Colors before sending: ", color_index, 7, std::cout);

return mst_result;
}

Expand Down
2 changes: 0 additions & 2 deletions cpp/include/raft/sparse/selection/connect_components.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,6 @@ __global__ void min_components_by_color_kernel(

__syncthreads();

// printf("block %d thread %d did final sync\n", blockIdx.x, threadIdx.x);

value_idx out_offset = out_indptr[blockIdx.x];

// TODO: Do this across threads, using an atomic counter for each color
Expand Down
2 changes: 0 additions & 2 deletions cpp/test/sparse/connect_components.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,6 @@ class ConnectComponentsTest : public ::testing::TestWithParam<
handle, indptr.data(), knn_graph_coo.cols(), knn_graph_coo.vals(),
params.n_row, knn_graph_coo.nnz, colors.data(), stream, false);

CUDA_CHECK(cudaStreamSynchronize(stream));

printf("Got here.\n");

raft::print_device_vector("colors", colors.data(), params.n_row, std::cout);
Expand Down
116 changes: 109 additions & 7 deletions cpp/test/sparse/linkage.cu
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,110 @@ struct LinkageInputs {
int c;
};

/**
* @brief kernel to calculate the values of a and b
* @param firstClusterArray: the array of classes of type T
* @param secondClusterArray: the array of classes of type T
* @param size: the size of the data points
* @param a: number of pairs of points that both the clusters have classified the same
* @param b: number of pairs of points that both the clusters have classified differently
*/
template <typename T, int BLOCK_DIM_X, int BLOCK_DIM_Y>
__global__ void computeTheNumerator(const T* firstClusterArray,
const T* secondClusterArray, uint64_t size,
uint64_t* a, uint64_t* b) {
//calculating the indices of pairs of datapoints compared by the current thread
uint64_t j = threadIdx.x + blockIdx.x * blockDim.x;
uint64_t i = threadIdx.y + blockIdx.y * blockDim.y;

//thread-local variables to count a and b
uint64_t myA = 0, myB = 0;

if (i < size && j < size && j < i) {
cjnolet marked this conversation as resolved.
Show resolved Hide resolved
//checking if the pair have been classified the same by both the clusters
if (firstClusterArray[i] == firstClusterArray[j] &&
secondClusterArray[i] == secondClusterArray[j]) {
++myA;
}

//checking if the pair have been classified differently by both the clusters
else if (firstClusterArray[i] != firstClusterArray[j] &&
secondClusterArray[i] != secondClusterArray[j]) {
++myB;
}
}

//specialize blockReduce for a 2D block of 1024 threads of type uint64_t
typedef cub::BlockReduce<uint64_t, BLOCK_DIM_X,
cub::BLOCK_REDUCE_WARP_REDUCTIONS, BLOCK_DIM_Y>
BlockReduce;
cjnolet marked this conversation as resolved.
Show resolved Hide resolved

//Allocate shared memory for blockReduce
__shared__ typename BlockReduce::TempStorage temp_storage;

//summing up thread-local counts specific to a block
myA = BlockReduce(temp_storage).Sum(myA);
__syncthreads();
myB = BlockReduce(temp_storage).Sum(myB);
__syncthreads();

//executed once per block
if (threadIdx.x == 0 && threadIdx.y == 0) {
raft::myAtomicAdd<unsigned long long int>((unsigned long long int*)a, myA);
raft::myAtomicAdd<unsigned long long int>((unsigned long long int*)b, myB);
}
}

/**
* @brief Function to calculate RandIndex
* <a href="https://en.wikipedia.org/wiki/Rand_index">more info on rand index</a>
* @param firstClusterArray: the array of classes of type T
* @param secondClusterArray: the array of classes of type T
* @param size: the size of the data points of type uint64_t
* @param allocator: object that takes care of temporary device memory allocation of type std::shared_ptr<MLCommon::deviceAllocator>
* @param stream: the cudaStream object
*/
template <typename T>
double compute_rand_index(
T* firstClusterArray, T* secondClusterArray, uint64_t size,
std::shared_ptr<raft::mr::device::allocator> allocator, cudaStream_t stream) {
//rand index for size less than 2 is not defined
ASSERT(size >= 2, "Rand Index for size less than 2 not defined!");

//allocating and initializing memory for a and b in the GPU
raft::mr::device::buffer<uint64_t> arr_buf(allocator, stream, 2);
CUDA_CHECK(cudaMemsetAsync(arr_buf.data(), 0, 2 * sizeof(uint64_t), stream));

//kernel configuration
static const int BLOCK_DIM_Y = 16, BLOCK_DIM_X = 16;
dim3 numThreadsPerBlock(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 numBlocks(raft::ceildiv<int>(size, numThreadsPerBlock.x),
raft::ceildiv<int>(size, numThreadsPerBlock.y));

//calling the kernel
computeTheNumerator<T, BLOCK_DIM_X, BLOCK_DIM_Y>
<<<numBlocks, numThreadsPerBlock, 0, stream>>>(
firstClusterArray, secondClusterArray, size, arr_buf.data(),
arr_buf.data() + 1);

//synchronizing and updating the calculated values of a and b from device to host
uint64_t ab_host[2] = {0};
raft::update_host(ab_host, arr_buf.data(), 2, stream);
CUDA_CHECK(cudaStreamSynchronize(stream));

//error handling
CUDA_CHECK(cudaGetLastError());

//denominator
uint64_t nChooseTwo = size * (size - 1) / 2;

//calculating the rand_index
return (double)(((double)(ab_host[0] + ab_host[1])) / (double)nChooseTwo);
}

template <typename T, typename IdxT>
::std::ostream &operator<<(::std::ostream &os,
const LinkageInputs<T, IdxT> &dims) {
::std::ostream& operator<<(::std::ostream& os,
const LinkageInputs<T, IdxT>& dims) {
return os;
}

Expand Down Expand Up @@ -83,10 +184,14 @@ class LinkageTest : public ::testing::TestWithParam<LinkageInputs<T, IdxT>> {
raft::hierarchy::single_linkage<
IdxT, T, raft::hierarchy::LinkageDistance::KNN_GRAPH>(
handle, data.data(), params.n_row, params.n_col,
raft::distance::DistanceType::L2Unexpanded, &out_arrs, params.c,
raft::distance::DistanceType::L2SqrtExpanded, &out_arrs, params.c,
params.n_clusters);

CUDA_CHECK(cudaStreamSynchronize(handle.get_stream()));

score =
compute_rand_index(labels, labels_ref, params.n_row,
handle.get_device_allocator(), handle.get_stream());
}

void SetUp() override { basicTest(); }
Expand Down Expand Up @@ -491,10 +596,7 @@ const std::vector<LinkageInputs<float, int>> linkage_inputsf2 = {
-4}};

typedef LinkageTest<float, int> LinkageTestF_Int;
TEST_P(LinkageTestF_Int, Result) {
EXPECT_TRUE(
raft::devArrMatch(labels, labels_ref, params.n_row, raft::Compare<int>()));
}
TEST_P(LinkageTestF_Int, Result) { EXPECT_TRUE(score == 1.0); }

INSTANTIATE_TEST_CASE_P(LinkageTest, LinkageTestF_Int,
::testing::ValuesIn(linkage_inputsf2));
Expand Down