From 2da978412ff8811bbee9fa9bd10fcd1698d689f6 Mon Sep 17 00:00:00 2001 From: William Hicks Date: Wed, 7 Jul 2021 18:14:12 -0400 Subject: [PATCH] Temporarily revert #276 This reverts commit 82061e00200ee0f53c2242cbbdd4d8cd636ed310. This PR appears to have resulted in a substantial increase in compilation time, causing timeouts in CI for cuML PR 4029. These changes should be reinstated once the cause of the increased compilation time has been determined. --- cpp/include/raft/distance/canberra.cuh | 161 ------------------ cpp/include/raft/distance/chebyshev.cuh | 158 ------------------ cpp/include/raft/distance/distance.cuh | 108 ++---------- cpp/include/raft/distance/hellinger.cuh | 204 ----------------------- cpp/include/raft/distance/minkowski.cuh | 172 ------------------- cpp/include/raft/linalg/contractions.cuh | 4 - cpp/test/CMakeLists.txt | 4 - cpp/test/distance/dist_canberra.cu | 68 -------- cpp/test/distance/dist_chebyshev.cu | 68 -------- cpp/test/distance/dist_hellinger.cu | 69 -------- cpp/test/distance/dist_minkowski.cu | 69 -------- cpp/test/distance/distance_base.cuh | 107 ++---------- 12 files changed, 28 insertions(+), 1164 deletions(-) delete mode 100644 cpp/include/raft/distance/canberra.cuh delete mode 100644 cpp/include/raft/distance/chebyshev.cuh delete mode 100644 cpp/include/raft/distance/hellinger.cuh delete mode 100644 cpp/include/raft/distance/minkowski.cuh delete mode 100644 cpp/test/distance/dist_canberra.cu delete mode 100644 cpp/test/distance/dist_chebyshev.cu delete mode 100644 cpp/test/distance/dist_hellinger.cu delete mode 100644 cpp/test/distance/dist_minkowski.cu diff --git a/cpp/include/raft/distance/canberra.cuh b/cpp/include/raft/distance/canberra.cuh deleted file mode 100644 index b87c295eb0..0000000000 --- a/cpp/include/raft/distance/canberra.cuh +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once -#include - -namespace raft { -namespace distance { - -/** - * @brief the canberra distance matrix calculation implementer - * It computes the following equation: cij = max(cij, op(ai-bj)) - * @tparam DataT input data-type (for A and B matrices) - * @tparam AccT accumulation data-type - * @tparam OutT output data-type (for C and D matrices) - * @tparam IdxT index data-type - * @tparam Veclen number of k-elements loaded by each thread - for every LDG call. details in contractions.cuh - * @tparam FinalLambda final lambda called on final distance value - * @tparam isRowMajor true if input/output is row major, - false for column major - * @param[in] x input matrix - * @param[in] y input matrix - * @param[in] m number of rows of A and C/D - * @param[in] n number of rows of B and cols of C/D - * @param[in] k number of cols of A and B - * @param[in] lda leading dimension of A - * @param[in] ldb leading dimension of B - * @param[in] ldd leading dimension of C/D - * @param[output] dOutput output matrix - * @param fin_op the final gemm epilogue lambda - * @param stream cuda stream to launch work - */ -template -static void canberraImpl(const DataT *x, const DataT *y, IdxT m, IdxT n, IdxT k, - IdxT lda, IdxT ldb, IdxT ldd, OutT *dOutput, - FinalLambda fin_op, cudaStream_t stream) { - typedef typename raft::linalg::Policy4x4::Policy RowPolicy; - typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; - - typedef - typename std::conditional::type KPolicy; - - dim3 blk(KPolicy::Nthreads); - - // Accumulation operation lambda - auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { - const auto diff = raft::L1Op()(x - y); - const auto add = raft::myAbs(x) + raft::myAbs(y); - // deal with potential for 0 in denominator by - // forcing 1/0 instead - acc += ((add != 0) * diff / (add + (add == 0))); - }; - - // epilogue operation lambda for final value calculation - auto epilog_lambda = [] __device__( - AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh], - DataT * regxn, DataT * regyn, IdxT gridStrideX, - IdxT gridStrideY) { return; }; - - if (isRowMajor) { - auto canberraRowMajor = - pairwiseDistanceMatKernel; - dim3 grid = - launchConfigGenerator(m, n, KPolicy::SmemSize, canberraRowMajor); - - canberraRowMajor<<>>( - x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, - epilog_lambda, fin_op); - } else { - auto canberraColMajor = - pairwiseDistanceMatKernel; - dim3 grid = - launchConfigGenerator(m, n, KPolicy::SmemSize, canberraColMajor); - canberraColMajor<<>>( - x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, - epilog_lambda, fin_op); - } - - CUDA_CHECK(cudaGetLastError()); -} - -template -void canberra(IdxT m, IdxT n, IdxT k, IdxT lda, IdxT ldb, IdxT ldd, - const DataT *x, const DataT *y, OutT *dOutput, FinalLambda fin_op, - cudaStream_t stream) { - size_t bytesA = sizeof(DataT) * lda; - size_t bytesB = sizeof(DataT) * ldb; - if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) { - canberraImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, - stream); - } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) { - canberraImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, - stream); - } else { - canberraImpl( - x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, stream); - } -} - -/** - * @brief the canberra distance matrix calculation - * It computes the following equation: cij = max(cij, op(ai-bj)) - * @tparam InType input data-type (for A and B matrices) - * @tparam AccType accumulation data-type - * @tparam OutType output data-type (for C and D matrices) - * @tparam FinalLambda user-defined epilogue lamba - * @tparam Index_ Index type - * @param[in] m number of rows of A and C/D - * @param[in] n number of rows of B and cols of C/D - * @param[in] k number of cols of A and B - * @param[in] pA input matrix - * @param[in] pB input matrix - * @param[out] pD output matrix - * @param[in] fin_op the final element-wise epilogue lambda - * @param[in] stream cuda stream to launch work - * @param[in] isRowMajor whether the input and output matrices are row major - */ -template -void canberraImpl(int m, int n, int k, const InType *pA, const InType *pB, - OutType *pD, FinalLambda fin_op, cudaStream_t stream, - bool isRowMajor) { - typedef std::is_same is_bool; - typedef typename std::conditional::type - canberraOutType; - Index_ lda, ldb, ldd; - canberraOutType *pDcast = reinterpret_cast(pD); - if (isRowMajor) { - lda = k, ldb = k, ldd = n; - canberra( - m, n, k, lda, ldb, ldd, pA, pB, pDcast, fin_op, stream); - } else { - lda = n, ldb = m, ldd = m; - canberra( - n, m, k, lda, ldb, ldd, pB, pA, pDcast, fin_op, stream); - } -} -} // namespace distance -} // namespace raft diff --git a/cpp/include/raft/distance/chebyshev.cuh b/cpp/include/raft/distance/chebyshev.cuh deleted file mode 100644 index 8d53408cf8..0000000000 --- a/cpp/include/raft/distance/chebyshev.cuh +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once -#include - -namespace raft { -namespace distance { - -/** - * @brief the Chebyshev distance matrix calculation implementer - * It computes the following equation: cij = max(cij, op(ai-bj)) - * @tparam DataT input data-type (for A and B matrices) - * @tparam AccT accumulation data-type - * @tparam OutT output data-type (for C and D matrices) - * @tparam IdxT index data-type - * @tparam Veclen number of k-elements loaded by each thread - for every LDG call. details in contractions.cuh - * @tparam FinalLambda final lambda called on final distance value - * @tparam isRowMajor true if input/output is row major, - false for column major - * @param[in] x input matrix - * @param[in] y input matrix - * @param[in] m number of rows of A and C/D - * @param[in] n number of rows of B and cols of C/D - * @param[in] k number of cols of A and B - * @param[in] lda leading dimension of A - * @param[in] ldb leading dimension of B - * @param[in] ldd leading dimension of C/D - * @param[out] dOutput output matrix - * @param[in] fin_op the final gemm epilogue lambda - * @param[in] stream cuda stream to launch work - */ -template -static void chebyshevImpl(const DataT *x, const DataT *y, IdxT m, IdxT n, - IdxT k, IdxT lda, IdxT ldb, IdxT ldd, OutT *dOutput, - FinalLambda fin_op, cudaStream_t stream) { - typedef typename raft::linalg::Policy4x4::Policy RowPolicy; - typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; - - typedef - typename std::conditional::type KPolicy; - - dim3 blk(KPolicy::Nthreads); - - // Accumulation operation lambda - auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { - const auto diff = raft::L1Op()(x - y); - acc = raft::myMax(acc, diff); - }; - - // epilogue operation lambda for final value calculation - auto epilog_lambda = [] __device__( - AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh], - DataT * regxn, DataT * regyn, IdxT gridStrideX, - IdxT gridStrideY) { return; }; - - if (isRowMajor) { - auto chebyshevRowMajor = - pairwiseDistanceMatKernel; - dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, - chebyshevRowMajor); - - chebyshevRowMajor<<>>( - x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, - epilog_lambda, fin_op); - } else { - auto chebyshevColMajor = - pairwiseDistanceMatKernel; - dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, - chebyshevColMajor); - chebyshevColMajor<<>>( - x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, - epilog_lambda, fin_op); - } - - CUDA_CHECK(cudaGetLastError()); -} - -template -void chebyshev(IdxT m, IdxT n, IdxT k, IdxT lda, IdxT ldb, IdxT ldd, - const DataT *x, const DataT *y, OutT *dOutput, - FinalLambda fin_op, cudaStream_t stream) { - size_t bytesA = sizeof(DataT) * lda; - size_t bytesB = sizeof(DataT) * ldb; - if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) { - chebyshevImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, - stream); - } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) { - chebyshevImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, - stream); - } else { - chebyshevImpl( - x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, stream); - } -} - -/** - * @brief the chebyshev distance matrix calculation - * It computes the following equation: cij = max(cij, op(ai-bj)) - * @tparam InType input data-type (for A and B matrices) - * @tparam AccType accumulation data-type - * @tparam OutType output data-type (for C and D matrices) - * @tparam FinalLambda user-defined epilogue lamba - * @tparam Index_ Index type - * @param[in] m number of rows of A and C/D - * @param[in] n number of rows of B and cols of C/D - * @param[in] k number of cols of A and B - * @param[in] pA input matrix - * @param[in] pB input matrix - * @param[out] pD output matrix - * @param[in] fin_op the final element-wise epilogue lambda - * @param[in] stream cuda stream to launch work - * @param[in] isRowMajor whether the input and output matrices are row major - */ -template -void chebyshevImpl(int m, int n, int k, const InType *pA, const InType *pB, - OutType *pD, FinalLambda fin_op, cudaStream_t stream, - bool isRowMajor) { - typedef std::is_same is_bool; - typedef typename std::conditional::type - chebyshevOutType; - Index_ lda, ldb, ldd; - chebyshevOutType *pDcast = reinterpret_cast(pD); - if (isRowMajor) { - lda = k, ldb = k, ldd = n; - chebyshev( - m, n, k, lda, ldb, ldd, pA, pB, pDcast, fin_op, stream); - } else { - lda = n, ldb = m, ldd = m; - chebyshev( - n, m, k, lda, ldb, ldd, pB, pA, pDcast, fin_op, stream); - } -} -} // namespace distance -} // namespace raft diff --git a/cpp/include/raft/distance/distance.cuh b/cpp/include/raft/distance/distance.cuh index 1b39a6ec18..579b3bb446 100644 --- a/cpp/include/raft/distance/distance.cuh +++ b/cpp/include/raft/distance/distance.cuh @@ -19,13 +19,9 @@ #include #include #include -#include -#include #include #include -#include #include -#include #include namespace raft { @@ -38,7 +34,7 @@ template { void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, Index_ k, void *workspace, size_t worksize, FinalLambda fin_op, - cudaStream_t stream, bool isRowMajor, InType metric_arg) { + cudaStream_t stream, bool isRowMajor) { raft::distance::euclideanAlgo1(m, n, k, x, y, dist, false, (AccType *)workspace, worksize, @@ -61,7 +57,7 @@ struct DistanceImpl { void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, Index_ k, void *workspace, size_t worksize, FinalLambda fin_op, - cudaStream_t stream, bool isRowMajor, InType metric_arg) { + cudaStream_t stream, bool isRowMajor) { raft::distance::euclideanAlgo1(m, n, k, x, y, dist, true, (AccType *)workspace, worksize, @@ -75,7 +71,7 @@ struct DistanceImpl { void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, Index_ k, void *workspace, size_t worksize, FinalLambda fin_op, - cudaStream_t stream, bool isRowMajor, InType metric_arg) { + cudaStream_t stream, bool isRowMajor) { raft::distance::cosineAlgo1( m, n, k, x, y, dist, (AccType *)workspace, worksize, fin_op, stream, isRowMajor); @@ -88,7 +84,7 @@ struct DistanceImpl { void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, Index_ k, void *workspace, size_t worksize, FinalLambda fin_op, - cudaStream_t stream, bool isRowMajor, InType metric_arg) { + cudaStream_t stream, bool isRowMajor) { raft::distance::euclideanAlgo2(m, n, k, x, y, dist, false, fin_op, stream, isRowMajor); @@ -101,7 +97,7 @@ struct DistanceImpl { void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, Index_ k, void *workspace, size_t worksize, FinalLambda fin_op, - cudaStream_t stream, bool isRowMajor, InType metric_arg) { + cudaStream_t stream, bool isRowMajor) { raft::distance::euclideanAlgo2(m, n, k, x, y, dist, true, fin_op, stream, isRowMajor); @@ -114,63 +110,12 @@ struct DistanceImpl { void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, Index_ k, void *workspace, size_t worksize, FinalLambda fin_op, - cudaStream_t stream, bool isRowMajor, InType metric_arg) { + cudaStream_t stream, bool isRowMajor) { raft::distance::l1Impl( m, n, k, x, y, dist, fin_op, stream, isRowMajor); } }; -template -struct DistanceImpl { - void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, - Index_ k, void *workspace, size_t worksize, FinalLambda fin_op, - cudaStream_t stream, bool isRowMajor, InType metric_arg) { - raft::distance::chebyshevImpl(m, n, k, x, y, dist, fin_op, stream, - isRowMajor); - } -}; - -template -struct DistanceImpl { - void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, - Index_ k, void *workspace, size_t worksize, FinalLambda fin_op, - cudaStream_t stream, bool isRowMajor, InType metric_arg) { - raft::distance::hellingerImpl(m, n, k, x, y, dist, fin_op, stream, - isRowMajor); - } -}; - -template -struct DistanceImpl { - void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, - Index_ k, void *workspace, size_t worksize, FinalLambda fin_op, - cudaStream_t stream, bool isRowMajor, InType metric_arg) { - raft::distance::minkowskiImpl(m, n, k, x, y, dist, fin_op, stream, - isRowMajor, metric_arg); - } -}; - -template -struct DistanceImpl { - void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, - Index_ k, void *workspace, size_t worksize, FinalLambda fin_op, - cudaStream_t stream, bool isRowMajor, InType metric_arg) { - raft::distance::canberraImpl( - m, n, k, x, y, dist, fin_op, stream, isRowMajor); - } -}; - } // anonymous namespace /** @@ -233,12 +178,11 @@ template void distance(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, Index_ k, void *workspace, size_t worksize, - FinalLambda fin_op, cudaStream_t stream, bool isRowMajor = true, - InType metric_arg = 2.0f) { + FinalLambda fin_op, cudaStream_t stream, bool isRowMajor = true) { DistanceImpl distImpl; distImpl.run(x, y, dist, m, n, k, workspace, worksize, fin_op, stream, - isRowMajor, metric_arg); + isRowMajor); CUDA_CHECK(cudaPeekAtLastError()); } @@ -267,14 +211,13 @@ template void distance(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, Index_ k, void *workspace, size_t worksize, - cudaStream_t stream, bool isRowMajor = true, - InType metric_arg = 2.0f) { + cudaStream_t stream, bool isRowMajor = true) { auto default_fin_op = [] __device__(AccType d_val, Index_ g_d_idx) { return d_val; }; distance(x, y, dist, m, n, k, workspace, worksize, default_fin_op, - stream, isRowMajor, metric_arg); + stream, isRowMajor); CUDA_CHECK(cudaPeekAtLastError()); } @@ -301,14 +244,12 @@ template void pairwise_distance_impl(const Type *x, const Type *y, Type *dist, Index_ m, Index_ n, Index_ k, raft::mr::device::buffer &workspace, - cudaStream_t stream, bool isRowMajor, - Type metric_arg = 2.0f) { + cudaStream_t stream, bool isRowMajor) { auto worksize = getWorkspaceSize(x, y, m, n, k); workspace.resize(worksize, stream); - distance(x, y, dist, m, n, k, - workspace.data(), worksize, - stream, isRowMajor, metric_arg); + distance( + x, y, dist, m, n, k, workspace.data(), worksize, stream, isRowMajor); } template @@ -316,7 +257,7 @@ void pairwise_distance(const Type *x, const Type *y, Type *dist, Index_ m, Index_ n, Index_ k, raft::mr::device::buffer &workspace, raft::distance::DistanceType metric, cudaStream_t stream, - bool isRowMajor = true, Type metric_arg = 2.0f) { + bool isRowMajor = true) { switch (metric) { case raft::distance::DistanceType::L2Expanded: pairwise_distance_impl( x, y, dist, m, n, k, workspace, stream, isRowMajor); break; - case raft::distance::DistanceType::Linf: - pairwise_distance_impl( - x, y, dist, m, n, k, workspace, stream, isRowMajor); - break; - case raft::distance::DistanceType::HellingerExpanded: - pairwise_distance_impl( - x, y, dist, m, n, k, workspace, stream, isRowMajor); - break; - case raft::distance::DistanceType::LpUnexpanded: - pairwise_distance_impl( - x, y, dist, m, n, k, workspace, stream, isRowMajor, metric_arg); - break; - case raft::distance::DistanceType::Canberra: - pairwise_distance_impl( - x, y, dist, m, n, k, workspace, stream, isRowMajor); - break; default: THROW("Unknown or unsupported distance metric '%d'!", (int)metric); }; diff --git a/cpp/include/raft/distance/hellinger.cuh b/cpp/include/raft/distance/hellinger.cuh deleted file mode 100644 index f7ad3ed1ba..0000000000 --- a/cpp/include/raft/distance/hellinger.cuh +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once -#include -#include - -namespace raft { -namespace distance { - -/** - * @brief the Hellinger distance matrix using the expanded form: - * It computes the following equation: - cij = sqrt(1 - sum(sqrt(x_k * y_k))) - * This distance computation modifies A and B by computing a sqrt - * and then performing a `pow(x, 2)` to convert it back. Because of this, - * it is possible that the values in A and B might differ slightly - * after this is invoked. - * - * @tparam DataT input data-type (for A and B matrices) - * @tparam AccT accumulation data-type - * @tparam OutT output data-type (for C and D matrices) - * @tparam IdxT index data-type - * @tparam Veclen number of k-elements loaded by each thread - for every LDG call. details in contractions.cuh - * @tparam FinalLambda final lambda called on final distance value - * @tparam isRowMajor true if input/output is row major, - false for column major - * @param[in] x input matrix - * @param[in] y input matrix - * @param[in] m number of rows of A and C/D - * @param[in] n number of rows of B and C/D - * @param[in] k number of cols of A and B - * @param[in] lda leading dimension of A - * @param[in] ldb leading dimension of B - * @param[in] ldd leading dimension of C/D - * @param[output] dOutput output matrix - * @param[in] fin_op the final gemm epilogue lambda - * @param[in] stream cuda stream to launch work - */ -template -static void hellingerImpl(const DataT *x, const DataT *y, IdxT m, IdxT n, - IdxT k, IdxT lda, IdxT ldb, IdxT ldd, OutT *dOutput, - FinalLambda fin_op, cudaStream_t stream) { - typedef typename raft::linalg::Policy4x4::Policy RowPolicy; - typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; - - typedef - typename std::conditional::type KPolicy; - - dim3 blk(KPolicy::Nthreads); - - auto unaryOp_lambda = [] __device__(DataT input) { - return raft::mySqrt(input); - }; - // First sqrt x and y - raft::linalg::unaryOp( - (DataT *)x, x, m * k, unaryOp_lambda, stream); - - if (x != y) { - raft::linalg::unaryOp( - (DataT *)y, y, n * k, unaryOp_lambda, stream); - } - - // Accumulation operation lambda - auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { - // This is sqrt(x) * sqrt(y). - const auto product = x * y; - acc += product; - }; - - // epilogue operation lambda for final value calculation - auto epilog_lambda = [] __device__( - AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh], - DataT * regxn, DataT * regyn, IdxT gridStrideX, - IdxT gridStrideY) { -#pragma unroll - for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) { -#pragma unroll - for (int j = 0; j < KPolicy::AccColsPerTh; ++j) { - // Adjust to replace NaN in sqrt with 0 if input to sqrt is negative - const auto finalVal = (1 - acc[i][j]); - const auto rectifier = (!signbit(finalVal)); - acc[i][j] = raft::mySqrt(rectifier * finalVal); - } - } - }; - - if (isRowMajor) { - auto hellingerRowMajor = - pairwiseDistanceMatKernel; - dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, - hellingerRowMajor); - - hellingerRowMajor<<>>( - x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, - epilog_lambda, fin_op); - } else { - auto hellingerColMajor = - pairwiseDistanceMatKernel; - dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, - hellingerColMajor); - hellingerColMajor<<>>( - x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, - epilog_lambda, fin_op); - } - - // Revert sqrt of x and y - raft::linalg::unaryOp( - (DataT *)x, x, m * k, unaryOp_lambda, stream); - if (x != y) { - raft::linalg::unaryOp( - (DataT *)y, y, n * k, unaryOp_lambda, stream); - } - - CUDA_CHECK(cudaGetLastError()); -} - -template -void hellinger(IdxT m, IdxT n, IdxT k, IdxT lda, IdxT ldb, IdxT ldd, - const DataT *x, const DataT *y, OutT *dOutput, - FinalLambda fin_op, cudaStream_t stream) { - size_t bytesA = sizeof(DataT) * lda; - size_t bytesB = sizeof(DataT) * ldb; - if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) { - hellingerImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, - stream); - } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) { - hellingerImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, - stream); - } else { - hellingerImpl( - x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, stream); - } -} - -/** - * @brief the Hellinger distance matrix calculation - * It computes the following equation: - sqrt(1 - sum(sqrt(x_k * y_k)) - * This distance computation modifies A and B by computing a sqrt - * and then performing a `pow(x, 2)` to convert it back. Because of this, - * it is possible that the values in A and B might differ slightly - * after this is invoked. - * - * @tparam InType input data-type (for A and B matrices) - * @tparam AccType accumulation data-type - * @tparam OutType output data-type (for C and D matrices) - * @tparam FinalLambda user-defined epilogue lamba - * @tparam Index_ Index type - * @param m number of rows of A and C/D - * @param n number of columns of B and C/D - * @param k number of cols of A and rows of B - * @param pA input matrix - * @param pB input matrix - * @param pD output matrix - * @param fin_op the final element-wise epilogue lambda - * @param stream cuda stream where to launch work - * @param isRowMajor whether the input and output matrices are row major - */ -template -void hellingerImpl(int m, int n, int k, const InType *pA, const InType *pB, - OutType *pD, FinalLambda fin_op, cudaStream_t stream, - bool isRowMajor) { - typedef std::is_same is_bool; - typedef typename std::conditional::type - hellingerOutType; - Index_ lda, ldb, ldd; - hellingerOutType *pDcast = reinterpret_cast(pD); - if (isRowMajor) { - lda = k, ldb = k, ldd = n; - hellinger( - m, n, k, lda, ldb, ldd, pA, pB, pDcast, fin_op, stream); - - } else { - lda = n, ldb = m, ldd = m; - hellinger( - n, m, k, lda, ldb, ldd, pB, pA, pDcast, fin_op, stream); - } -} -} // namespace distance -} // namespace raft diff --git a/cpp/include/raft/distance/minkowski.cuh b/cpp/include/raft/distance/minkowski.cuh deleted file mode 100644 index 803f5fc78a..0000000000 --- a/cpp/include/raft/distance/minkowski.cuh +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once -#include - -namespace raft { -namespace distance { - -/** - * @brief the unexpanded Minkowski distance matrix calculation - * It computes the following equation: cij = sum(|x - y|^p)^(1/p) - * @tparam DataT input data-type (for A and B matrices) - * @tparam AccT accumulation data-type - * @tparam OutT output data-type (for C and D matrices) - * @tparam IdxT index data-type - * @tparam Veclen number of k-elements loaded by each thread - for every LDG call. details in contractions.cuh - * @tparam FinalLambda final lambda called on final distance value - * - * @param[in] x input matrix - * @param[in] y input matrix - * @param[in] m number of rows of A and C/D - * @param[in] n number of rows of B and cols of C/D - * @param[in] k number of cols of A and B - * @param[in] lda leading dimension of A - * @param[in] ldb leading dimension of B - * @param[in] ldd leading dimension of C/D - * @param[output] pD output matrix - * @param[in] fin_op the final gemm epilogue lambda - * @param[in] stream cuda stream to launch work - * @param[in] the value of `p` for Minkowski (l-p) distances. - */ -template -void minkowskiUnExpImpl(const DataT *x, const DataT *y, IdxT m, IdxT n, IdxT k, - IdxT lda, IdxT ldb, IdxT ldd, OutT *dOutput, - FinalLambda fin_op, cudaStream_t stream, DataT p) { - typedef typename raft::linalg::Policy4x4::Policy RowPolicy; - typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; - - typedef - typename std::conditional::type KPolicy; - - dim3 blk(KPolicy::Nthreads); - - // Accumulation operation lambda - auto core_lambda = [p] __device__(AccT & acc, DataT & x, DataT & y) { - const auto diff = raft::L1Op()(x - y); - acc += raft::myPow(diff, p); - }; - - // epilogue operation lambda for final value calculation - auto epilog_lambda = [p] __device__( - AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh], - DataT * regxn, DataT * regyn, IdxT gridStrideX, - IdxT gridStrideY) { - const auto one_over_p = 1.0f / p; -#pragma unroll - for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) { -#pragma unroll - for (int j = 0; j < KPolicy::AccColsPerTh; ++j) { - acc[i][j] = raft::myPow(acc[i][j], one_over_p); - } - } - }; - - if (isRowMajor) { - auto minkowskiUnExpRowMajor = - pairwiseDistanceMatKernel; - dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, - minkowskiUnExpRowMajor); - - minkowskiUnExpRowMajor<<>>( - x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, - epilog_lambda, fin_op); - - } else { - auto minkowskiUnExpColMajor = - pairwiseDistanceMatKernel; - dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, - minkowskiUnExpColMajor); - - minkowskiUnExpColMajor<<>>( - x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, - epilog_lambda, fin_op); - } - - CUDA_CHECK(cudaGetLastError()); -} - -template -void minkowskiUnExp(IdxT m, IdxT n, IdxT k, IdxT lda, IdxT ldb, IdxT ldd, - const DataT *x, const DataT *y, OutT *dOutput, - FinalLambda fin_op, cudaStream_t stream, DataT metric_arg) { - size_t bytesA = sizeof(DataT) * lda; - size_t bytesB = sizeof(DataT) * ldb; - if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) { - minkowskiUnExpImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, - fin_op, stream, metric_arg); - } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) { - minkowskiUnExpImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, - fin_op, stream, metric_arg); - } else { - minkowskiUnExpImpl( - x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, stream, metric_arg); - } -} - -/** - * @brief the unexpanded minkowski distance matrix calculation - * It computes the following equation: cij = sum(|x - y|^p)^(1/p) - * @tparam InType input data-type (for A and B matrices) - * @tparam AccType accumulation data-type - * @tparam OutType output data-type (for C and D matrices) - * @tparam FinalLambda user-defined epilogue lamba - * @tparam Index_ index type - * @param[in] m number of rows of A and C/D - * @param[in] n number of rows of B and cols of C/D - * @param[in] k number of cols of A and B - * @param[in] pA input matrix - * @param[in] pB input matrix - * @param[out] pD output matrix - * @param[in] fin_op the final gemm epilogue lambda - * @param[in] stream cuda stream to launch work - * @param[in] isRowMajor whether the input and output matrices are row major - * @param[in] metric_arg the value of `p` for Minkowski (l-p) distances. - */ -template -void minkowskiImpl(Index_ m, Index_ n, Index_ k, const InType *pA, - const InType *pB, OutType *pD, FinalLambda fin_op, - cudaStream_t stream, bool isRowMajor, InType metric_arg) { - typedef std::is_same is_bool; - typedef typename std::conditional::type - LpUnexpOutType; - LpUnexpOutType *pDcast = reinterpret_cast(pD); - Index_ lda, ldb, ldd; - - if (isRowMajor) { - lda = k, ldb = k, ldd = n; - minkowskiUnExp( - m, n, k, lda, ldb, ldd, pA, pB, pDcast, fin_op, stream, metric_arg); - } else { - lda = n, ldb = m, ldd = m; - minkowskiUnExp( - n, m, k, lda, ldb, ldd, pB, pA, pDcast, fin_op, stream, metric_arg); - } -} - -}; // end namespace distance -}; // end namespace raft diff --git a/cpp/include/raft/linalg/contractions.cuh b/cpp/include/raft/linalg/contractions.cuh index aa711a9140..c590abb142 100644 --- a/cpp/include/raft/linalg/contractions.cuh +++ b/cpp/include/raft/linalg/contractions.cuh @@ -338,7 +338,6 @@ struct Contractions_NT { if (isRowMajor) { auto numRows = m; auto koffset = kidx + scolid; -#pragma unroll for (int i = 0; i < P::LdgPerThX; ++i) { if (koffset < lda && (xrowid + i * P::LdgRowsX) < numRows) { ldg(ldgDataX[i], x + i * P::LdgRowsX * lda + koffset); @@ -352,7 +351,6 @@ struct Contractions_NT { } else { const auto numRows = k; auto koffset = scolid; -#pragma unroll for (int i = 0; i < P::LdgPerThX; ++i) { if ((koffset + xrowid) < lda && (srowid + kidx + i * P::LdgRowsX) < numRows) { @@ -371,7 +369,6 @@ struct Contractions_NT { if (isRowMajor) { auto numRows = n; auto koffset = kidx + scolid; -#pragma unroll for (int i = 0; i < P::LdgPerThY; ++i) { if (koffset < ldb && (yrowid + i * P::LdgRowsY) < numRows) { ldg(ldgDataY[i], y + i * P::LdgRowsY * ldb + koffset); @@ -385,7 +382,6 @@ struct Contractions_NT { } else { auto numRows = k; auto koffset = scolid; -#pragma unroll for (int i = 0; i < P::LdgPerThY; ++i) { if ((koffset + yrowid) < ldb && (srowid + kidx + i * P::LdgRowsY) < numRows) { diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index f94a8d9525..0dec4be833 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -19,14 +19,10 @@ add_executable(test_raft test/cudart_utils.cpp test/cluster_solvers.cu test/distance/dist_adj.cu - test/distance/dist_canberra.cu - test/distance/dist_chebyshev.cu test/distance/dist_cos.cu test/distance/dist_euc_exp.cu test/distance/dist_euc_unexp.cu - test/distance/dist_hellinger.cu test/distance/dist_l1.cu - test/distance/dist_minkowski.cu test/distance/fused_l2_nn.cu test/eigen_solvers.cu test/handle.cpp diff --git a/cpp/test/distance/dist_canberra.cu b/cpp/test/distance/dist_canberra.cu deleted file mode 100644 index 10bc4d1899..0000000000 --- a/cpp/test/distance/dist_canberra.cu +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../test_utils.h" -#include "distance_base.cuh" - -namespace raft { -namespace distance { - -template -class DistanceCanberra - : public DistanceTest {}; - -const std::vector> inputsf = { - {0.001f, 1024, 1024, 32, true, 1234ULL}, - {0.001f, 1024, 32, 1024, true, 1234ULL}, - {0.001f, 32, 1024, 1024, true, 1234ULL}, - {0.003f, 1024, 1024, 1024, true, 1234ULL}, - {0.001f, 1024, 1024, 32, false, 1234ULL}, - {0.001f, 1024, 32, 1024, false, 1234ULL}, - {0.001f, 32, 1024, 1024, false, 1234ULL}, - {0.003f, 1024, 1024, 1024, false, 1234ULL}, -}; -typedef DistanceCanberra DistanceCanberraF; -TEST_P(DistanceCanberraF, Result) { - int m = params.isRowMajor ? params.m : params.n; - int n = params.isRowMajor ? params.n : params.m; - ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, - raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCanberraF, - ::testing::ValuesIn(inputsf)); - -const std::vector> inputsd = { - {0.001, 1024, 1024, 32, true, 1234ULL}, - {0.001, 1024, 32, 1024, true, 1234ULL}, - {0.001, 32, 1024, 1024, true, 1234ULL}, - {0.003, 1024, 1024, 1024, true, 1234ULL}, - {0.001, 1024, 1024, 32, false, 1234ULL}, - {0.001, 1024, 32, 1024, false, 1234ULL}, - {0.001, 32, 1024, 1024, false, 1234ULL}, - {0.003, 1024, 1024, 1024, false, 1234ULL}, -}; -typedef DistanceCanberra DistanceCanberraD; -TEST_P(DistanceCanberraD, Result) { - int m = params.isRowMajor ? params.m : params.n; - int n = params.isRowMajor ? params.n : params.m; - ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, - raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCanberraD, - ::testing::ValuesIn(inputsd)); - -} // end namespace distance -} // end namespace raft diff --git a/cpp/test/distance/dist_chebyshev.cu b/cpp/test/distance/dist_chebyshev.cu deleted file mode 100644 index 6a2b02863a..0000000000 --- a/cpp/test/distance/dist_chebyshev.cu +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../test_utils.h" -#include "distance_base.cuh" - -namespace raft { -namespace distance { - -template -class DistanceLinf - : public DistanceTest {}; - -const std::vector> inputsf = { - {0.001f, 1024, 1024, 32, true, 1234ULL}, - {0.001f, 1024, 32, 1024, true, 1234ULL}, - {0.001f, 32, 1024, 1024, true, 1234ULL}, - {0.003f, 1024, 1024, 1024, true, 1234ULL}, - {0.001f, 1024, 1024, 32, false, 1234ULL}, - {0.001f, 1024, 32, 1024, false, 1234ULL}, - {0.001f, 32, 1024, 1024, false, 1234ULL}, - {0.003f, 1024, 1024, 1024, false, 1234ULL}, -}; -typedef DistanceLinf DistanceLinfF; -TEST_P(DistanceLinfF, Result) { - int m = params.isRowMajor ? params.m : params.n; - int n = params.isRowMajor ? params.n : params.m; - ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, - raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceLinfF, - ::testing::ValuesIn(inputsf)); - -const std::vector> inputsd = { - {0.001, 1024, 1024, 32, true, 1234ULL}, - {0.001, 1024, 32, 1024, true, 1234ULL}, - {0.001, 32, 1024, 1024, true, 1234ULL}, - {0.003, 1024, 1024, 1024, true, 1234ULL}, - {0.001, 1024, 1024, 32, false, 1234ULL}, - {0.001, 1024, 32, 1024, false, 1234ULL}, - {0.001, 32, 1024, 1024, false, 1234ULL}, - {0.003, 1024, 1024, 1024, false, 1234ULL}, -}; -typedef DistanceLinf DistanceLinfD; -TEST_P(DistanceLinfD, Result) { - int m = params.isRowMajor ? params.m : params.n; - int n = params.isRowMajor ? params.n : params.m; - ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, - raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceLinfD, - ::testing::ValuesIn(inputsd)); - -} // end namespace distance -} // end namespace raft diff --git a/cpp/test/distance/dist_hellinger.cu b/cpp/test/distance/dist_hellinger.cu deleted file mode 100644 index 39dc7aaeff..0000000000 --- a/cpp/test/distance/dist_hellinger.cu +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../test_utils.h" -#include "distance_base.cuh" - -namespace raft { -namespace distance { - -template -class DistanceHellingerExp - : public DistanceTest {}; - -const std::vector> inputsf = { - {0.001f, 1024, 1024, 32, true, 1234ULL}, - {0.001f, 1024, 32, 1024, true, 1234ULL}, - {0.001f, 32, 1024, 1024, true, 1234ULL}, - {0.003f, 1024, 1024, 1024, true, 1234ULL}, - {0.001f, 1024, 1024, 32, false, 1234ULL}, - {0.001f, 1024, 32, 1024, false, 1234ULL}, - {0.001f, 32, 1024, 1024, false, 1234ULL}, - {0.003f, 1024, 1024, 1024, false, 1234ULL}, -}; -typedef DistanceHellingerExp DistanceHellingerExpF; -TEST_P(DistanceHellingerExpF, Result) { - int m = params.isRowMajor ? params.m : params.n; - int n = params.isRowMajor ? params.n : params.m; - ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, - raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceHellingerExpF, - ::testing::ValuesIn(inputsf)); - -const std::vector> inputsd = { - {0.001, 1024, 1024, 32, true, 1234ULL}, - {0.001, 1024, 32, 1024, true, 1234ULL}, - {0.001, 32, 1024, 1024, true, 1234ULL}, - {0.003, 1024, 1024, 1024, true, 1234ULL}, - {0.001, 1024, 1024, 32, false, 1234ULL}, - {0.001, 1024, 32, 1024, false, 1234ULL}, - {0.001, 32, 1024, 1024, false, 1234ULL}, - {0.003, 1024, 1024, 1024, false, 1234ULL}, -}; -typedef DistanceHellingerExp DistanceHellingerExpD; -TEST_P(DistanceHellingerExpD, Result) { - int m = params.isRowMajor ? params.m : params.n; - int n = params.isRowMajor ? params.n : params.m; - ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, - raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceHellingerExpD, - ::testing::ValuesIn(inputsd)); - -} // end namespace distance -} // end namespace raft diff --git a/cpp/test/distance/dist_minkowski.cu b/cpp/test/distance/dist_minkowski.cu deleted file mode 100644 index 42b8e294ac..0000000000 --- a/cpp/test/distance/dist_minkowski.cu +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../test_utils.h" -#include "distance_base.cuh" - -namespace raft { -namespace distance { - -template -class DistanceLpUnexp - : public DistanceTest { -}; - -const std::vector> inputsf = { - {0.001f, 1024, 1024, 32, true, 1234ULL, 4.0f}, - {0.001f, 1024, 32, 1024, true, 1234ULL, 3.0f}, - {0.001f, 32, 1024, 1024, true, 1234ULL, 4.0f}, - {0.003f, 1024, 1024, 1024, true, 1234ULL, 3.0f}, - {0.001f, 1024, 1024, 32, false, 1234ULL, 4.0f}, - {0.001f, 1024, 32, 1024, false, 1234ULL, 3.0f}, - {0.001f, 32, 1024, 1024, false, 1234ULL, 4.0f}, - {0.003f, 1024, 1024, 1024, false, 1234ULL, 3.0f}, -}; -typedef DistanceLpUnexp DistanceLpUnexpF; -TEST_P(DistanceLpUnexpF, Result) { - int m = params.isRowMajor ? params.m : params.n; - int n = params.isRowMajor ? params.n : params.m; - ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, - raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceLpUnexpF, - ::testing::ValuesIn(inputsf)); - -const std::vector> inputsd = { - {0.001, 1024, 1024, 32, true, 1234ULL, 4.0}, - {0.001, 1024, 32, 1024, true, 1234ULL, 3.0}, - {0.001, 32, 1024, 1024, true, 1234ULL, 4.0}, - {0.003, 1024, 1024, 1024, true, 1234ULL, 3.0}, - {0.001, 1024, 1024, 32, false, 1234ULL, 4.0}, - {0.001, 1024, 32, 1024, false, 1234ULL, 3.0}, - {0.001, 32, 1024, 1024, false, 1234ULL, 4.0}, - {0.003, 1024, 1024, 1024, false, 1234ULL, 3.0}, -}; -typedef DistanceLpUnexp DistanceLpUnexpD; -TEST_P(DistanceLpUnexpD, Result) { - int m = params.isRowMajor ? params.m : params.n; - int n = params.isRowMajor ? params.n : params.m; - ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, - raft::CompareApprox(params.tolerance))); -} -INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceLpUnexpD, - ::testing::ValuesIn(inputsd)); - -} // end namespace distance -} // end namespace raft diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh index fc7b064205..d6f06c186a 100644 --- a/cpp/test/distance/distance_base.cuh +++ b/cpp/test/distance/distance_base.cuh @@ -47,9 +47,9 @@ __global__ void naiveDistanceKernel(DataType *dist, const DataType *x, } template -__global__ void naiveL1_Linf_CanberraDistanceKernel( - DataType *dist, const DataType *x, const DataType *y, int m, int n, int k, - raft::distance::DistanceType type, bool isRowMajor) { +__global__ void naiveL1DistanceKernel(DataType *dist, const DataType *x, + const DataType *y, int m, int n, int k, + bool isRowMajor) { int midx = threadIdx.x + blockIdx.x * blockDim.x; int nidx = threadIdx.y + blockIdx.y * blockDim.y; if (midx >= m || nidx >= n) { @@ -63,16 +63,7 @@ __global__ void naiveL1_Linf_CanberraDistanceKernel( auto a = x[xidx]; auto b = y[yidx]; auto diff = (a > b) ? (a - b) : (b - a); - if (type == raft::distance::DistanceType::Linf) { - acc = raft::myMax(acc, diff); - } else if (type == raft::distance::DistanceType::Canberra) { - const auto add = raft::myAbs(a) + raft::myAbs(b); - // deal with potential for 0 in denominator by - // forcing 1/0 instead - acc += ((add != 0) * diff / (add + (add == 0))); - } else { - acc += diff; - } + acc += diff; } int outidx = isRowMajor ? midx * n + nidx : midx + m * nidx; @@ -110,69 +101,17 @@ __global__ void naiveCosineDistanceKernel(DataType *dist, const DataType *x, (DataType)1.0 - acc_ab / (raft::mySqrt(acc_a) * raft::mySqrt(acc_b)); } -template -__global__ void naiveHellingerDistanceKernel(DataType *dist, const DataType *x, - const DataType *y, int m, int n, - int k, bool isRowMajor) { - int midx = threadIdx.x + blockIdx.x * blockDim.x; - int nidx = threadIdx.y + blockIdx.y * blockDim.y; - if (midx >= m || nidx >= n) { - return; - } - - DataType acc_ab = DataType(0); - - for (int i = 0; i < k; ++i) { - int xidx = isRowMajor ? i + midx * k : i * m + midx; - int yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto a = x[xidx]; - auto b = y[yidx]; - acc_ab += raft::mySqrt(a) * raft::mySqrt(b); - } - - int outidx = isRowMajor ? midx * n + nidx : midx + m * nidx; - - // Adjust to replace NaN in sqrt with 0 if input to sqrt is negative - acc_ab = 1 - acc_ab; - auto rectifier = (!signbit(acc_ab)); - dist[outidx] = raft::mySqrt(rectifier * acc_ab); -} - -template -__global__ void naiveLpUnexpDistanceKernel(DataType *dist, const DataType *x, - const DataType *y, int m, int n, - int k, bool isRowMajor, DataType p) { - int midx = threadIdx.x + blockIdx.x * blockDim.x; - int nidx = threadIdx.y + blockIdx.y * blockDim.y; - if (midx >= m || nidx >= n) return; - DataType acc = DataType(0); - for (int i = 0; i < k; ++i) { - int xidx = isRowMajor ? i + midx * k : i * m + midx; - int yidx = isRowMajor ? i + nidx * k : i * n + nidx; - auto a = x[xidx]; - auto b = y[yidx]; - auto diff = raft::L1Op()(a - b); - acc += raft::myPow(diff, p); - } - auto one_over_p = 1 / p; - acc = raft::myPow(acc, one_over_p); - int outidx = isRowMajor ? midx * n + nidx : midx + m * nidx; - dist[outidx] = acc; -} - template void naiveDistance(DataType *dist, const DataType *x, const DataType *y, int m, int n, int k, raft::distance::DistanceType type, - bool isRowMajor, DataType metric_arg = 2.0f) { + bool isRowMajor) { static const dim3 TPB(16, 32, 1); dim3 nblks(raft::ceildiv(m, (int)TPB.x), raft::ceildiv(n, (int)TPB.y), 1); switch (type) { - case raft::distance::DistanceType::Canberra: - case raft::distance::DistanceType::Linf: case raft::distance::DistanceType::L1: - naiveL1_Linf_CanberraDistanceKernel - <<>>(dist, x, y, m, n, k, type, isRowMajor); + naiveL1DistanceKernel + <<>>(dist, x, y, m, n, k, isRowMajor); break; case raft::distance::DistanceType::L2SqrtUnexpanded: case raft::distance::DistanceType::L2Unexpanded: @@ -185,14 +124,6 @@ void naiveDistance(DataType *dist, const DataType *x, const DataType *y, int m, naiveCosineDistanceKernel <<>>(dist, x, y, m, n, k, isRowMajor); break; - case raft::distance::DistanceType::HellingerExpanded: - naiveHellingerDistanceKernel - <<>>(dist, x, y, m, n, k, isRowMajor); - break; - case raft::distance::DistanceType::LpUnexpanded: - naiveLpUnexpDistanceKernel - <<>>(dist, x, y, m, n, k, isRowMajor, metric_arg); - break; default: FAIL() << "should be here\n"; } @@ -205,7 +136,6 @@ struct DistanceInputs { int m, n, k; bool isRowMajor; unsigned long long int seed; - DataType metric_arg = 2.0f; }; template @@ -218,15 +148,13 @@ template void distanceLauncher(DataType *x, DataType *y, DataType *dist, DataType *dist2, int m, int n, int k, DistanceInputs ¶ms, DataType threshold, char *workspace, size_t worksize, - cudaStream_t stream, bool isRowMajor, - DataType metric_arg = 2.0f) { + cudaStream_t stream, bool isRowMajor) { auto fin_op = [dist2, threshold] __device__(DataType d_val, int g_d_idx) { dist2[g_d_idx] = (d_val < threshold) ? 0.f : d_val; return d_val; }; raft::distance::distance( - x, y, dist, m, n, k, workspace, worksize, fin_op, stream, isRowMajor, - metric_arg); + x, y, dist, m, n, k, workspace, worksize, fin_op, stream, isRowMajor); } template @@ -238,7 +166,6 @@ class DistanceTest : public ::testing::TestWithParam> { int m = params.m; int n = params.n; int k = params.k; - DataType metric_arg = params.metric_arg; bool isRowMajor = params.isRowMajor; cudaStream_t stream; CUDA_CHECK(cudaStreamCreate(&stream)); @@ -247,17 +174,9 @@ class DistanceTest : public ::testing::TestWithParam> { raft::allocate(dist_ref, m * n); raft::allocate(dist, m * n); raft::allocate(dist2, m * n); - if (distanceType == raft::distance::DistanceType::HellingerExpanded) { - // Hellinger works only on positive numbers - r.uniform(x, m * k, DataType(0.0), DataType(1.0), stream); - r.uniform(y, n * k, DataType(0.0), DataType(1.0), stream); - } else { - r.uniform(x, m * k, DataType(-1.0), DataType(1.0), stream); - r.uniform(y, n * k, DataType(-1.0), DataType(1.0), stream); - } - - naiveDistance(dist_ref, x, y, m, n, k, distanceType, isRowMajor, - metric_arg); + r.uniform(x, m * k, DataType(-1.0), DataType(1.0), stream); + r.uniform(y, n * k, DataType(-1.0), DataType(1.0), stream); + naiveDistance(dist_ref, x, y, m, n, k, distanceType, isRowMajor); char *workspace = nullptr; size_t worksize = raft::distance::getWorkspaceSize> { DataType threshold = -10000.f; distanceLauncher(x, y, dist, dist2, m, n, k, params, threshold, workspace, worksize, - stream, isRowMajor, metric_arg); + stream, isRowMajor); CUDA_CHECK(cudaStreamDestroy(stream)); CUDA_CHECK(cudaFree(workspace)); }