diff --git a/cpp/cmake/Dependencies.cmake b/cpp/cmake/Dependencies.cmake index e1e21a9cd3..080efb5b1f 100644 --- a/cpp/cmake/Dependencies.cmake +++ b/cpp/cmake/Dependencies.cmake @@ -23,7 +23,7 @@ if(NOT CUB_IS_PART_OF_CTK) set(CUB_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub CACHE STRING "Path to cub repo") ExternalProject_Add(cub GIT_REPOSITORY https://github.com/thrust/cub.git - GIT_TAG 1.12.0 + GIT_TAG 1.8.0 PREFIX ${CUB_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND "" diff --git a/cpp/include/raft/sparse/distance/bin_distance.cuh b/cpp/include/raft/sparse/distance/bin_distance.cuh index a0467b9566..ef605e1fe0 100644 --- a/cpp/include/raft/sparse/distance/bin_distance.cuh +++ b/cpp/include/raft/sparse/distance/bin_distance.cuh @@ -135,7 +135,13 @@ class jaccard_expanded_distances_t : public distances_t { config_->handle, config_->allocator, config_->stream, [] __device__ __host__(value_t dot, value_t q_norm, value_t r_norm) { value_t q_r_union = q_norm + r_norm; - return 1 - (dot / (q_r_union - dot)); + value_t denom = q_r_union - dot; + + value_t jacc = ((denom != 0) * dot) / ((denom == 0) + denom); + + // flip the similarity when both rows are 0 + bool both_empty = q_r_union == 0; + return 1 - ((!both_empty * jacc) + both_empty); }); } diff --git a/cpp/include/raft/sparse/distance/l2_distance.cuh b/cpp/include/raft/sparse/distance/l2_distance.cuh index 98187576fa..2432a9cc1c 100644 --- a/cpp/include/raft/sparse/distance/l2_distance.cuh +++ b/cpp/include/raft/sparse/distance/l2_distance.cuh @@ -210,7 +210,10 @@ class cosine_expanded_distances_t : public distances_t { value_t norms = sqrt(q_norm) * sqrt(r_norm); // deal with potential for 0 in denominator by forcing 0/1 instead value_t cos = ((norms != 0) * dot) / ((norms == 0) + norms); - return 1 - cos; + + // flip the similarity when both rows are 0 + bool both_empty = (q_norm == 0) && (r_norm == 0); + return 1 - ((!both_empty * cos) + both_empty); }); } diff --git a/cpp/include/raft/sparse/selection/knn_graph.cuh b/cpp/include/raft/sparse/selection/knn_graph.cuh index a78fb8d0f6..1cf225087a 100644 --- a/cpp/include/raft/sparse/selection/knn_graph.cuh +++ b/cpp/include/raft/sparse/selection/knn_graph.cuh @@ -96,8 +96,6 @@ void knn_graph(const handle_t &handle, const value_t *X, size_t m, size_t n, raft::sparse::COO &out, int c = 15) { int k = build_k(m, c); - printf("K=%d\n", k); - auto d_alloc = handle.get_device_allocator(); auto stream = handle.get_stream(); diff --git a/cpp/include/raft/spatial/knn/detail/brute_force_knn.hpp b/cpp/include/raft/spatial/knn/detail/brute_force_knn.hpp index ff0df5f6e5..e686fff587 100644 --- a/cpp/include/raft/spatial/knn/detail/brute_force_knn.hpp +++ b/cpp/include/raft/spatial/knn/detail/brute_force_knn.hpp @@ -330,8 +330,6 @@ void brute_force_knn_impl( userStream, trans.data()); } - raft::print_device_vector("before sqrt", res_D, n * k, std::cout); - // Perform necessary post-processing if ((m == faiss::MetricType::METRIC_L2 || m == faiss::MetricType::METRIC_Lp) && @@ -346,10 +344,6 @@ void brute_force_knn_impl( [p] __device__(float input) { return powf(input, p); }, userStream); } - CUDA_CHECK(cudaStreamSynchronize(userStream)); - - raft::print_device_vector("after sqrt", res_D, n * k, std::cout); - query_metric_processor->revert(search_items); query_metric_processor->postprocess(out_D); for (size_t i = 0; i < input.size(); i++) { diff --git a/cpp/test/sparse/connect_components.cu b/cpp/test/sparse/connect_components.cu index 8d264252e0..965dc20330 100644 --- a/cpp/test/sparse/connect_components.cu +++ b/cpp/test/sparse/connect_components.cu @@ -47,10 +47,6 @@ struct ConnectComponentsInputs { value_idx n_col; std::vector data; - std::vector expected_labels; - - int n_clusters; - int c; }; @@ -67,20 +63,14 @@ class ConnectComponentsTest : public ::testing::TestWithParam< params = ::testing::TestWithParam< ConnectComponentsInputs>::GetParam(); - out_edges = new raft::sparse::COO( + raft::sparse::COO out_edges( handle.get_device_allocator(), handle.get_stream()); rmm::device_uvector data(params.n_row * params.n_col, handle.get_stream()); - // Allocate result labels and expected labels on device - raft::allocate(labels, params.n_row); - raft::allocate(labels_ref, params.n_row); - raft::copy(data.data(), params.data.data(), data.size(), handle.get_stream()); - raft::copy(labels_ref, params.expected_labels.data(), params.n_row, - handle.get_stream()); rmm::device_uvector indptr(params.n_row + 1, stream); @@ -114,33 +104,34 @@ class ConnectComponentsTest : public ::testing::TestWithParam< handle, indptr.data(), knn_graph_coo.cols(), knn_graph_coo.vals(), params.n_row, knn_graph_coo.nnz, colors.data(), stream, false); - raft::print_device_vector("colors", colors.data(), colors.size(), - std::cout); + CUDA_CHECK(cudaStreamSynchronize(stream)); + + printf("Got here.\n"); + + raft::print_device_vector("colors", colors.data(), params.n_row, std::cout); /** * 3. connect_components to fix connectivities */ raft::linkage::connect_components( - handle, *out_edges, data.data(), colors.data(), params.n_row, + handle, out_edges, data.data(), colors.data(), params.n_row, params.n_col); - int final_nnz = out_edges->nnz + mst_coo.n_edges; + int final_nnz = out_edges.nnz + mst_coo.n_edges; mst_coo.src.resize(final_nnz, stream); mst_coo.dst.resize(final_nnz, stream); mst_coo.weights.resize(final_nnz, stream); - printf("New nnz: %d\n", final_nnz); - /** * Construct final edge list */ - raft::copy_async(mst_coo.src.data() + mst_coo.n_edges, out_edges->rows(), - out_edges->nnz, stream); - raft::copy_async(mst_coo.dst.data() + mst_coo.n_edges, out_edges->cols(), - out_edges->nnz, stream); - raft::copy_async(mst_coo.weights.data() + mst_coo.n_edges, - out_edges->vals(), out_edges->nnz, stream); + raft::copy_async(mst_coo.src.data() + mst_coo.n_edges, out_edges.rows(), + out_edges.nnz, stream); + raft::copy_async(mst_coo.dst.data() + mst_coo.n_edges, out_edges.cols(), + out_edges.nnz, stream); + raft::copy_async(mst_coo.weights.data() + mst_coo.n_edges, out_edges.vals(), + out_edges.nnz, stream); raft::sparse::COO final_coo(d_alloc, stream); raft::sparse::linalg::symmetrize( @@ -168,22 +159,15 @@ class ConnectComponentsTest : public ::testing::TestWithParam< CUDA_CHECK(cudaStreamSynchronize(stream)); - printf("output edges: %d\n", output_mst.n_edges); - final_edges = output_mst.n_edges; } void SetUp() override { basicTest(); } - void TearDown() override { - // CUDA_CHECK(cudaFree(labels)); - // CUDA_CHECK(cudaFree(labels_ref)); - } + void TearDown() override {} protected: ConnectComponentsInputs params; - value_idx *labels, *labels_ref; - raft::sparse::COO *out_edges; value_idx final_edges; }; @@ -201,8 +185,6 @@ const std::vector> fix_conn_inputsf2 = { 0.27864171, 0.70911132, 0.21338564, 0.32035554, 0.73788331, 0.46926692, 0.57570162, 0.42559178, 0.87120209, 0.22734951, 0.01847905, 0.75549396, 0.76166195, 0.66613745}, - {9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, - 10, -1}, // Test n_points == 100 {100, @@ -543,11 +525,6 @@ const std::vector> fix_conn_inputsf2 = { 8.66342445e-01 }, - {0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - 10, -4}}; typedef ConnectComponentsTest ConnectComponentsTestF_Int; diff --git a/cpp/test/sparse/distance.cu b/cpp/test/sparse/distance.cu index b103486b96..4247e374d6 100644 --- a/cpp/test/sparse/distance.cu +++ b/cpp/test/sparse/distance.cu @@ -148,6 +148,22 @@ class SparseDistanceTest }; const std::vector> inputs_i32_f = { + {5, + {0, 0, 1, 2}, + + {1, 2}, + {0.5, 0.5}, + {0, 1, 1, 1, 0, 1, 1, 1, 0}, + raft::distance::DistanceType::CosineExpanded, + 0.0}, + {5, + {0, 0, 1, 2}, + + {1, 2}, + {1.0, 1.0}, + {0, 1, 1, 1, 0, 1, 1, 1, 0}, + raft::distance::DistanceType::JaccardExpanded, + 0.0}, {2, {0, 2, 4, 6, 8}, {0, 1, 0, 1, 0, 1, 0, 1}, // indices