From 36e5865b4f84b2a322e2a5df8ec6fd88496fff30 Mon Sep 17 00:00:00 2001 From: Allard Hendriksen Date: Mon, 5 Sep 2022 11:08:25 +0200 Subject: [PATCH] Fix doxygen-related CI failure There was a problem with defgroup syntax. --- cpp/bench/spatial/fused_l2_nn.cu | 8 ++++++-- cpp/include/raft/distance/detail/fused_l2_nn.cuh | 2 ++ cpp/include/raft/linalg/contractions.cuh | 4 ---- cpp/test/distance/fused_l2_nn.cu | 7 +++++++ 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/cpp/bench/spatial/fused_l2_nn.cu b/cpp/bench/spatial/fused_l2_nn.cu index 9e88f23e73..aa36483145 100644 --- a/cpp/bench/spatial/fused_l2_nn.cu +++ b/cpp/bench/spatial/fused_l2_nn.cu @@ -22,8 +22,12 @@ #include #include -// Note: do not include raft/spatial/knn/specializations.hpp based on -// RAFT_NN_COMPILED, as fusedL2NN is not specialized and not defined there. +// TODO: Once fusedL2NN is specialized in the raft_distance shared library, add +// back +// +// #if defined RAFT_NN_COMPILED +// #include +// #endif namespace raft::bench::spatial { diff --git a/cpp/include/raft/distance/detail/fused_l2_nn.cuh b/cpp/include/raft/distance/detail/fused_l2_nn.cuh index 6a51bdcf1a..308f8a096a 100644 --- a/cpp/include/raft/distance/detail/fused_l2_nn.cuh +++ b/cpp/include/raft/distance/detail/fused_l2_nn.cuh @@ -202,6 +202,8 @@ __global__ __launch_bounds__(P::Nthreads, 2) void fusedL2NNkernel(OutT* min, for (int i = 0; i < P::AccRowsPerTh; ++i) { #pragma unroll for (int j = P::AccThCols / 2; j > 0; j >>= 1) { + // Actually, the srcLane (lid +j) should be (lid +j) % P:AccThCols, + // but the shfl op applies the modulo internally. auto tmpkey = raft::shfl(val[i].key, lid + j, P::AccThCols); auto tmpvalue = raft::shfl(val[i].value, lid + j, P::AccThCols); KVPair tmp = {tmpkey, tmpvalue}; diff --git a/cpp/include/raft/linalg/contractions.cuh b/cpp/include/raft/linalg/contractions.cuh index 800632ada5..8aed0cb4be 100644 --- a/cpp/include/raft/linalg/contractions.cuh +++ b/cpp/include/raft/linalg/contractions.cuh @@ -168,13 +168,10 @@ struct Policy4x4 { /** @} */ /** - * @defgroup Policy4x4Skinny - * * A smaller k-block (8 instead of 32) with fewer threads per block (8x8 instead * of 16x16), which is faster for raft::distance::fusedL2NN on skinny matrices, * i.e., matrices with a small k dimension. * - * @{ */ template struct Policy4x4Skinny { @@ -191,7 +188,6 @@ struct Policy4x4Skinny { typedef KernelPolicy Policy; typedef ColKernelPolicy ColPolicy; }; -/** @} */ /** * @defgroup Policy2x8 16 elements per thread Policy with k-block = 16 diff --git a/cpp/test/distance/fused_l2_nn.cu b/cpp/test/distance/fused_l2_nn.cu index c01e8c74fc..2a5b30e01f 100644 --- a/cpp/test/distance/fused_l2_nn.cu +++ b/cpp/test/distance/fused_l2_nn.cu @@ -23,6 +23,13 @@ #include #include +// TODO: Once fusedL2NN is specialized in the raft_distance shared library, add +// the following: +// +// #if defined RAFT_NN_COMPILED +// #include +// #endif + namespace raft { namespace distance {