From 7db75635d647b72e77a0adb212062e1923f60d80 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 3 Dec 2021 15:12:41 -0500 Subject: [PATCH 1/3] Disabling fused l2 knn from bfknn --- .../knn/detail/knn_brute_force_faiss.cuh | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh b/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh index 0b89377630..065c9ee782 100644 --- a/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh +++ b/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh @@ -293,26 +293,26 @@ void brute_force_knn_impl( cudaStream_t stream = raft::select_stream(userStream, internalStreams, n_int_streams, i); - // TODO: Enable this once we figure out why it's causing pytest failures in cuml. - if (k <= 64 && rowMajorQuery == rowMajorIndex && rowMajorQuery == true && - (metric == raft::distance::DistanceType::L2Unexpanded || - metric == raft::distance::DistanceType::L2SqrtUnexpanded //|| - // metric == raft::distance::DistanceType::L2Expanded || - // metric == raft::distance::DistanceType::L2SqrtExpanded) - )) { - fusedL2Knn(D, - out_i_ptr, - out_d_ptr, - input[i], - search_items, - sizes[i], - n, - k, - rowMajorIndex, - rowMajorQuery, - stream, - metric); - } else { +// // TODO: Enable this once we figure out why it's causing pytest failures in cuml. +// if (k <= 64 && rowMajorQuery == rowMajorIndex && rowMajorQuery == true && +// (metric == raft::distance::DistanceType::L2Unexpanded || +// metric == raft::distance::DistanceType::L2SqrtUnexpanded //|| +// // metric == raft::distance::DistanceType::L2Expanded || +// // metric == raft::distance::DistanceType::L2SqrtExpanded) +// )) { +// fusedL2Knn(D, +// out_i_ptr, +// out_d_ptr, +// input[i], +// search_items, +// sizes[i], +// n, +// k, +// rowMajorIndex, +// rowMajorQuery, +// stream, +// metric); +// } else { switch (metric) { case raft::distance::DistanceType::Haversine: @@ -354,7 +354,7 @@ void brute_force_knn_impl( } CUDA_CHECK(cudaPeekAtLastError()); - } +// } // Sync internal streams if used. We don't need to // sync the user stream because we'll already have From d9abe17b02a7d42f650b8084cc4245a449ae87f0 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 3 Dec 2021 15:45:34 -0500 Subject: [PATCH 2/3] Fixing style --- .../knn/detail/knn_brute_force_faiss.cuh | 120 +++++++++--------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh b/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh index 065c9ee782..d154e5f92a 100644 --- a/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh +++ b/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh @@ -293,68 +293,68 @@ void brute_force_knn_impl( cudaStream_t stream = raft::select_stream(userStream, internalStreams, n_int_streams, i); -// // TODO: Enable this once we figure out why it's causing pytest failures in cuml. -// if (k <= 64 && rowMajorQuery == rowMajorIndex && rowMajorQuery == true && -// (metric == raft::distance::DistanceType::L2Unexpanded || -// metric == raft::distance::DistanceType::L2SqrtUnexpanded //|| -// // metric == raft::distance::DistanceType::L2Expanded || -// // metric == raft::distance::DistanceType::L2SqrtExpanded) -// )) { -// fusedL2Knn(D, -// out_i_ptr, -// out_d_ptr, -// input[i], -// search_items, -// sizes[i], -// n, -// k, -// rowMajorIndex, -// rowMajorQuery, -// stream, -// metric); -// } else { - switch (metric) { - case raft::distance::DistanceType::Haversine: - - ASSERT(D == 2, - "Haversine distance requires 2 dimensions " - "(latitude / longitude)."); - - haversine_knn(out_i_ptr, out_d_ptr, input[i], search_items, sizes[i], n, k, stream); - break; - default: - faiss::MetricType m = build_faiss_metric(metric); - - faiss::gpu::StandardGpuResources gpu_res; - - gpu_res.noTempMemory(); - gpu_res.setDefaultStream(device, stream); - - faiss::gpu::GpuDistanceParams args; - args.metric = m; - args.metricArg = metricArg; - args.k = k; - args.dims = D; - args.vectors = input[i]; - args.vectorsRowMajor = rowMajorIndex; - args.numVectors = sizes[i]; - args.queries = search_items; - args.queriesRowMajor = rowMajorQuery; - args.numQueries = n; - args.outDistances = out_d_ptr; - args.outIndices = out_i_ptr; - - /** - * @todo: Until FAISS supports pluggable allocation strategies, - * we will not reap the benefits of the pool allocator for - * avoiding device-wide synchronizations from cudaMalloc/cudaFree - */ - bfKnn(&gpu_res, args); - } + // // TODO: Enable this once we figure out why it's causing pytest failures in cuml. + // if (k <= 64 && rowMajorQuery == rowMajorIndex && rowMajorQuery == true && + // (metric == raft::distance::DistanceType::L2Unexpanded || + // metric == raft::distance::DistanceType::L2SqrtUnexpanded //|| + // // metric == raft::distance::DistanceType::L2Expanded || + // // metric == raft::distance::DistanceType::L2SqrtExpanded) + // )) { + // fusedL2Knn(D, + // out_i_ptr, + // out_d_ptr, + // input[i], + // search_items, + // sizes[i], + // n, + // k, + // rowMajorIndex, + // rowMajorQuery, + // stream, + // metric); + // } else { + switch (metric) { + case raft::distance::DistanceType::Haversine: + + ASSERT(D == 2, + "Haversine distance requires 2 dimensions " + "(latitude / longitude)."); + + haversine_knn(out_i_ptr, out_d_ptr, input[i], search_items, sizes[i], n, k, stream); + break; + default: + faiss::MetricType m = build_faiss_metric(metric); + + faiss::gpu::StandardGpuResources gpu_res; + + gpu_res.noTempMemory(); + gpu_res.setDefaultStream(device, stream); + + faiss::gpu::GpuDistanceParams args; + args.metric = m; + args.metricArg = metricArg; + args.k = k; + args.dims = D; + args.vectors = input[i]; + args.vectorsRowMajor = rowMajorIndex; + args.numVectors = sizes[i]; + args.queries = search_items; + args.queriesRowMajor = rowMajorQuery; + args.numQueries = n; + args.outDistances = out_d_ptr; + args.outIndices = out_i_ptr; + + /** + * @todo: Until FAISS supports pluggable allocation strategies, + * we will not reap the benefits of the pool allocator for + * avoiding device-wide synchronizations from cudaMalloc/cudaFree + */ + bfKnn(&gpu_res, args); } + } - CUDA_CHECK(cudaPeekAtLastError()); -// } + CUDA_CHECK(cudaPeekAtLastError()); + // } // Sync internal streams if used. We don't need to // sync the user stream because we'll already have From c3b68959ce1092567c2caeb51fa122e758f355fa Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 3 Dec 2021 17:55:12 -0500 Subject: [PATCH 3/3] Fixing style --- cpp/test/spatial/ball_cover.cu | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/cpp/test/spatial/ball_cover.cu b/cpp/test/spatial/ball_cover.cu index ab85e7fe8f..0a1680badc 100644 --- a/cpp/test/spatial/ball_cover.cu +++ b/cpp/test/spatial/ball_cover.cu @@ -52,12 +52,14 @@ __global__ void count_discrepancies_kernel(value_idx* actual_idx, if (row < m) { for (uint32_t i = 0; i < n; i++) { value_t d = actual[row * n + i] - expected[row * n + i]; - bool matches = fabsf(d) <= thres; - if (!matches) { - // printf("row=%d, actual_idx=%ld, actual=%f, expected_id=%ld, expected=%f\n", - // row, actual_idx[row*n+i], actual[row*n+i], expected_idx[row*n+i], - // expected[row*n+i]); - } + bool matches = (fabsf(d) <= thres) || (actual_idx[row * n + i] == expected_idx[row * n + i] && + actual_idx[row * n + i] == row); + // if (!matches) { + // printf("row=%d, actual_idx=%ld, actual=%f, expected_id=%ld, + // expected=%f\n", + // row, actual_idx[row*n+i], actual[row*n+i], expected_idx[row*n+i], + // expected[row*n+i]); + // } n_diffs += !matches; out[row] = n_diffs;