Skip to content

Commit

Permalink
Disabling fused l2 knn from bfknn (#407)
Browse files Browse the repository at this point in the history
It appears the recent changes to the fused l2 knn have somehow broken a few things in cuml, such as rbc, trustworthines, and UMAP.

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: #407
  • Loading branch information
cjnolet authored Dec 3, 2021
1 parent 80507ee commit 50f43c7
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 66 deletions.
120 changes: 60 additions & 60 deletions cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -293,69 +293,69 @@ void brute_force_knn_impl(

cudaStream_t stream = raft::select_stream(userStream, internalStreams, n_int_streams, i);

// TODO: Enable this once we figure out why it's causing pytest failures in cuml.
if (k <= 64 && rowMajorQuery == rowMajorIndex && rowMajorQuery == true &&
(metric == raft::distance::DistanceType::L2Unexpanded ||
metric == raft::distance::DistanceType::L2SqrtUnexpanded //||
// metric == raft::distance::DistanceType::L2Expanded ||
// metric == raft::distance::DistanceType::L2SqrtExpanded)
)) {
fusedL2Knn(D,
out_i_ptr,
out_d_ptr,
input[i],
search_items,
sizes[i],
n,
k,
rowMajorIndex,
rowMajorQuery,
stream,
metric);
} else {
switch (metric) {
case raft::distance::DistanceType::Haversine:

ASSERT(D == 2,
"Haversine distance requires 2 dimensions "
"(latitude / longitude).");

haversine_knn(out_i_ptr, out_d_ptr, input[i], search_items, sizes[i], n, k, stream);
break;
default:
faiss::MetricType m = build_faiss_metric(metric);

faiss::gpu::StandardGpuResources gpu_res;

gpu_res.noTempMemory();
gpu_res.setDefaultStream(device, stream);

faiss::gpu::GpuDistanceParams args;
args.metric = m;
args.metricArg = metricArg;
args.k = k;
args.dims = D;
args.vectors = input[i];
args.vectorsRowMajor = rowMajorIndex;
args.numVectors = sizes[i];
args.queries = search_items;
args.queriesRowMajor = rowMajorQuery;
args.numQueries = n;
args.outDistances = out_d_ptr;
args.outIndices = out_i_ptr;

/**
* @todo: Until FAISS supports pluggable allocation strategies,
* we will not reap the benefits of the pool allocator for
* avoiding device-wide synchronizations from cudaMalloc/cudaFree
*/
bfKnn(&gpu_res, args);
}
// // TODO: Enable this once we figure out why it's causing pytest failures in cuml.
// if (k <= 64 && rowMajorQuery == rowMajorIndex && rowMajorQuery == true &&
// (metric == raft::distance::DistanceType::L2Unexpanded ||
// metric == raft::distance::DistanceType::L2SqrtUnexpanded //||
// // metric == raft::distance::DistanceType::L2Expanded ||
// // metric == raft::distance::DistanceType::L2SqrtExpanded)
// )) {
// fusedL2Knn(D,
// out_i_ptr,
// out_d_ptr,
// input[i],
// search_items,
// sizes[i],
// n,
// k,
// rowMajorIndex,
// rowMajorQuery,
// stream,
// metric);
// } else {
switch (metric) {
case raft::distance::DistanceType::Haversine:

ASSERT(D == 2,
"Haversine distance requires 2 dimensions "
"(latitude / longitude).");

haversine_knn(out_i_ptr, out_d_ptr, input[i], search_items, sizes[i], n, k, stream);
break;
default:
faiss::MetricType m = build_faiss_metric(metric);

faiss::gpu::StandardGpuResources gpu_res;

gpu_res.noTempMemory();
gpu_res.setDefaultStream(device, stream);

faiss::gpu::GpuDistanceParams args;
args.metric = m;
args.metricArg = metricArg;
args.k = k;
args.dims = D;
args.vectors = input[i];
args.vectorsRowMajor = rowMajorIndex;
args.numVectors = sizes[i];
args.queries = search_items;
args.queriesRowMajor = rowMajorQuery;
args.numQueries = n;
args.outDistances = out_d_ptr;
args.outIndices = out_i_ptr;

/**
* @todo: Until FAISS supports pluggable allocation strategies,
* we will not reap the benefits of the pool allocator for
* avoiding device-wide synchronizations from cudaMalloc/cudaFree
*/
bfKnn(&gpu_res, args);
}

CUDA_CHECK(cudaPeekAtLastError());
}

CUDA_CHECK(cudaPeekAtLastError());
// }

// Sync internal streams if used. We don't need to
// sync the user stream because we'll already have
// fully serial execution.
Expand Down
14 changes: 8 additions & 6 deletions cpp/test/spatial/ball_cover.cu
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,14 @@ __global__ void count_discrepancies_kernel(value_idx* actual_idx,
if (row < m) {
for (uint32_t i = 0; i < n; i++) {
value_t d = actual[row * n + i] - expected[row * n + i];
bool matches = fabsf(d) <= thres;
if (!matches) {
// printf("row=%d, actual_idx=%ld, actual=%f, expected_id=%ld, expected=%f\n",
// row, actual_idx[row*n+i], actual[row*n+i], expected_idx[row*n+i],
// expected[row*n+i]);
}
bool matches = (fabsf(d) <= thres) || (actual_idx[row * n + i] == expected_idx[row * n + i] &&
actual_idx[row * n + i] == row);
// if (!matches) {
// printf("row=%d, actual_idx=%ld, actual=%f, expected_id=%ld,
// expected=%f\n",
// row, actual_idx[row*n+i], actual[row*n+i], expected_idx[row*n+i],
// expected[row*n+i]);
// }

n_diffs += !matches;
out[row] = n_diffs;
Expand Down

0 comments on commit 50f43c7

Please sign in to comment.