diff --git a/cpp/include/raft/distance/correlation.cuh b/cpp/include/raft/distance/correlation.cuh new file mode 100644 index 0000000000..ed3b7a5464 --- /dev/null +++ b/cpp/include/raft/distance/correlation.cuh @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include +#include +#include + +namespace raft { +namespace distance { + +/** + * @brief the Correlation distance matrix: + * + * @tparam DataT input data-type (for A and B matrices) + * @tparam AccT accumulation data-type + * @tparam OutT output data-type (for C and D matrices) + * @tparam IdxT index data-type + * @tparam Veclen number of k-elements loaded by each thread + for every LDG call. details in contractions.cuh + * @tparam FinalLambda final lambda called on final distance value + * @tparam isRowMajor true if input/output is row major, + false for column major + * @param[in] x input matrix + * @param[in] y input matrix + * @param[in] m number of rows of A and C/D + * @param[in] n number of rows of B and C/D + * @param[in] k number of cols of A and B + * @param[in] lda leading dimension of A + * @param[in] ldb leading dimension of B + * @param[in] ldd leading dimension of C/D + * @param[output] dOutput output matrix + * @param[in] fin_op the final gemm epilogue lambda + * @param[in] stream cuda stream to launch work + */ +template +static void correlationImpl(const DataT *x, const DataT *y, const DataT *xn, + const DataT *yn, const DataT *x2n, const DataT *y2n, + IdxT m, IdxT n, IdxT k, IdxT lda, IdxT ldb, + IdxT ldd, OutT *dOutput, FinalLambda fin_op, + cudaStream_t stream) { + typedef typename raft::linalg::Policy4x4::Policy RowPolicy; + typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; + + typedef + typename std::conditional::type KPolicy; + + dim3 blk(KPolicy::Nthreads); + + // Accumulation operation lambda + auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { + acc += x * y; + }; + + // epilogue operation lambda for final value calculation + auto epilog_lambda = [x2n, y2n, m, n, k] __device__( + AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh], + DataT * regxn, DataT * regyn, IdxT gridStrideX, + IdxT gridStrideY) { + DataT regx2n[KPolicy::AccRowsPerTh], regy2n[KPolicy::AccColsPerTh]; + + extern __shared__ char smem[]; + DataT *sx2Norm = + (DataT *)(&smem[KPolicy::SmemSize + + (KPolicy::Mblk + KPolicy::Nblk) * sizeof(DataT)]); + DataT *sy2Norm = (&sx2Norm[KPolicy::Mblk]); + + // Load x & y norms required by this threadblock in shmem buffer + if (gridStrideX == blockIdx.x * KPolicy::Nblk) { + for (int i = threadIdx.x; i < KPolicy::Mblk; i += KPolicy::Nthreads) { + auto idx = gridStrideY + i; + sx2Norm[i] = idx < m ? x2n[idx] : 0; + } + } + + for (int i = threadIdx.x; i < KPolicy::Nblk; i += KPolicy::Nthreads) { + auto idx = gridStrideX + i; + sy2Norm[i] = idx < n ? y2n[idx] : 0; + } + __syncthreads(); + +#pragma unroll + for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) { + regx2n[i] = + sx2Norm[i * KPolicy::AccThRows + (threadIdx.x / KPolicy::AccThCols)]; + } +#pragma unroll + for (int i = 0; i < KPolicy::AccColsPerTh; ++i) { + regy2n[i] = + sy2Norm[i * KPolicy::AccThCols + (threadIdx.x % KPolicy::AccThCols)]; + } + +#pragma unroll + for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) { +#pragma unroll + for (int j = 0; j < KPolicy::AccColsPerTh; ++j) { + auto numer = k * acc[i][j] - (regxn[i] * regyn[j]); + auto Q_denom = k * regx2n[i] - (regxn[i] * regxn[i]); + auto R_denom = k * regy2n[j] - (regyn[j] * regyn[j]); + + acc[i][j] = 1 - (numer / raft::mySqrt(Q_denom * R_denom)); + } + } + }; + + constexpr size_t shmemSize = + KPolicy::SmemSize + (2 * (KPolicy::Mblk + KPolicy::Nblk) * sizeof(DataT)); + if (isRowMajor) { + constexpr auto correlationRowMajor = + pairwiseDistanceMatKernel; + dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, + correlationRowMajor); + correlationRowMajor<<>>( + x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, core_lambda, epilog_lambda, + fin_op); + } else { + constexpr auto correlationColMajor = + pairwiseDistanceMatKernel; + dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, + correlationColMajor); + correlationColMajor<<>>( + x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, core_lambda, epilog_lambda, + fin_op); + } + + CUDA_CHECK(cudaGetLastError()); +} + +template +void correlation(IdxT m, IdxT n, IdxT k, IdxT lda, IdxT ldb, IdxT ldd, + const DataT *x, const DataT *y, const DataT *xn, + const DataT *yn, const DataT *x2n, const DataT *y2n, + OutT *dOutput, FinalLambda fin_op, cudaStream_t stream) { + size_t bytesA = sizeof(DataT) * lda; + size_t bytesB = sizeof(DataT) * ldb; + if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) { + correlationImpl(x, y, xn, yn, x2n, y2n, m, n, k, lda, ldb, ldd, + dOutput, fin_op, stream); + } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) { + correlationImpl(x, y, xn, yn, x2n, y2n, m, n, k, lda, ldb, ldd, + dOutput, fin_op, stream); + } else { + correlationImpl( + x, y, xn, yn, x2n, y2n, m, n, k, lda, ldb, ldd, dOutput, fin_op, stream); + } +} + +/** + * @brief the Correlation distance matrix calculation + * + * @tparam InType input data-type (for A and B matrices) + * @tparam AccType accumulation data-type + * @tparam OutType output data-type (for C and D matrices) + * @tparam FinalLambda user-defined epilogue lamba + * @tparam Index_ Index type + * @param m number of rows of A and C/D + * @param n number of columns of B and C/D + * @param k number of cols of A and rows of B + * @param pA input matrix + * @param pB input matrix + * @param pD output matrix + * @param fin_op the final element-wise epilogue lambda + * @param stream cuda stream where to launch work + * @param isRowMajor whether the input and output matrices are row major + */ +template +void correlationImpl(int m, int n, int k, const InType *pA, const InType *pB, + OutType *pD, AccType *workspace, size_t &worksize, + FinalLambda fin_op, cudaStream_t stream, bool isRowMajor) { + typedef std::is_same is_bool; + typedef typename std::conditional::type + correlationOutType; + Index_ lda, ldb, ldd; + correlationOutType *pDcast = reinterpret_cast(pD); + + ASSERT(!(((pA != pB) && (worksize < 2 * (m + n) * sizeof(AccType))) || + (worksize < 2 * m * sizeof(AccType))), + "workspace size error"); + ASSERT(workspace != nullptr, "workspace is null"); + + AccType *norm_col_vec = workspace; + AccType *norm_row_vec = workspace; + AccType *sq_norm_col_vec = workspace; + AccType *sq_norm_row_vec = workspace; + if (pA != pB) { + norm_row_vec += m; + + raft::linalg::reduce(norm_col_vec, pA, k, m, (AccType)0, isRowMajor, true, + stream, false, raft::Nop(), + raft::Sum()); + raft::linalg::reduce(norm_row_vec, pB, k, n, (AccType)0, isRowMajor, true, + stream, false, raft::Nop(), + raft::Sum()); + + sq_norm_col_vec += (m + n); + sq_norm_row_vec = sq_norm_col_vec + m; + raft::linalg::rowNorm(sq_norm_col_vec, pA, k, m, raft::linalg::L2Norm, + isRowMajor, stream); + raft::linalg::rowNorm(sq_norm_row_vec, pB, k, n, raft::linalg::L2Norm, + isRowMajor, stream); + } else { + raft::linalg::reduce(norm_col_vec, pA, k, m, (AccType)0, isRowMajor, true, + stream, false, raft::Nop(), + raft::Sum()); + sq_norm_col_vec += m; + sq_norm_row_vec = sq_norm_col_vec; + raft::linalg::rowNorm(sq_norm_col_vec, pA, k, m, raft::linalg::L2Norm, + isRowMajor, stream); + } + + if (isRowMajor) { + lda = k, ldb = k, ldd = n; + correlation( + m, n, k, lda, ldb, ldd, pA, pB, norm_col_vec, norm_row_vec, + sq_norm_col_vec, sq_norm_row_vec, pDcast, fin_op, stream); + } else { + lda = n, ldb = m, ldd = m; + correlation(n, m, k, lda, ldb, ldd, pB, pA, norm_row_vec, + norm_col_vec, sq_norm_row_vec, sq_norm_col_vec, pDcast, + fin_op, stream); + } +} +} // namespace distance +} // namespace raft diff --git a/cpp/include/raft/distance/distance.cuh b/cpp/include/raft/distance/distance.cuh index fc0d07773f..02d8fb6d03 100644 --- a/cpp/include/raft/distance/distance.cuh +++ b/cpp/include/raft/distance/distance.cuh @@ -21,11 +21,16 @@ #include #include #include +#include #include #include +#include #include +#include +#include #include #include +#include #include namespace raft { @@ -171,6 +176,72 @@ struct DistanceImpl +struct DistanceImpl { + void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, + Index_ k, void *, size_t, FinalLambda fin_op, cudaStream_t stream, + bool isRowMajor, InType) { + raft::distance::hammingUnexpandedImpl(m, n, k, x, y, dist, fin_op, + stream, isRowMajor); + } +}; + +template +struct DistanceImpl { + void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, + Index_ k, void *, size_t, FinalLambda fin_op, cudaStream_t stream, + bool isRowMajor, InType) { + raft::distance::jensenShannonImpl(m, n, k, x, y, dist, fin_op, + stream, isRowMajor); + } +}; + +template +struct DistanceImpl { + void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, + Index_ k, void *, size_t, FinalLambda fin_op, cudaStream_t stream, + bool isRowMajor, InType) { + raft::distance::russellRaoImpl(m, n, k, x, y, dist, fin_op, stream, + isRowMajor); + } +}; + +template +struct DistanceImpl { + void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, + Index_ k, void *, size_t, FinalLambda fin_op, cudaStream_t stream, + bool isRowMajor, InType) { + raft::distance::klDivergenceImpl(m, n, k, x, y, dist, fin_op, + stream, isRowMajor); + } +}; + +template +struct DistanceImpl { + void run(const InType *x, const InType *y, OutType *dist, Index_ m, Index_ n, + Index_ k, void *workspace, size_t worksize, FinalLambda fin_op, + cudaStream_t stream, bool isRowMajor, InType) { + raft::distance::correlationImpl(m, n, k, x, y, dist, + (AccType *)workspace, worksize, + fin_op, stream, isRowMajor); + } +}; + } // anonymous namespace /** @@ -195,11 +266,16 @@ size_t getWorkspaceSize(const InType *x, const InType *y, Index_ m, Index_ n, Index_ k) { size_t worksize = 0; constexpr bool is_allocated = - distanceType <= raft::distance::DistanceType::CosineExpanded; + (distanceType <= raft::distance::DistanceType::CosineExpanded) || + (distanceType == raft::distance::DistanceType::CorrelationExpanded); + constexpr int numOfBuffers = + (distanceType == raft::distance::DistanceType::CorrelationExpanded) ? 2 : 1; + if (is_allocated) { - worksize += m * sizeof(AccType); - if (x != y) worksize += n * sizeof(AccType); + worksize += numOfBuffers * m * sizeof(AccType); + if (x != y) worksize += numOfBuffers * n * sizeof(AccType); } + return worksize; } @@ -366,6 +442,31 @@ void pairwise_distance(const Type *x, const Type *y, Type *dist, Index_ m, raft::distance::DistanceType::Canberra>( x, y, dist, m, n, k, workspace, stream, isRowMajor); break; + case raft::distance::DistanceType::HammingUnexpanded: + pairwise_distance_impl( + x, y, dist, m, n, k, workspace, stream, isRowMajor); + break; + case raft::distance::DistanceType::JensenShannon: + pairwise_distance_impl( + x, y, dist, m, n, k, workspace, stream, isRowMajor); + break; + case raft::distance::DistanceType::RusselRaoExpanded: + pairwise_distance_impl( + x, y, dist, m, n, k, workspace, stream, isRowMajor); + break; + case raft::distance::DistanceType::KLDivergence: + pairwise_distance_impl( + x, y, dist, m, n, k, workspace, stream, isRowMajor); + break; + case raft::distance::DistanceType::CorrelationExpanded: + pairwise_distance_impl( + x, y, dist, m, n, k, workspace, stream, isRowMajor); + break; default: THROW("Unknown or unsupported distance metric '%d'!", (int)metric); }; diff --git a/cpp/include/raft/distance/hamming.cuh b/cpp/include/raft/distance/hamming.cuh new file mode 100644 index 0000000000..08f1020b85 --- /dev/null +++ b/cpp/include/raft/distance/hamming.cuh @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include + +namespace raft { +namespace distance { + +/** + * @brief the Hamming distance matrix using the unexpanded form: + * It computes the following equation: + Cij = sum(x_i != y_i) / k + * + * @tparam DataT input data-type (for A and B matrices) + * @tparam AccT accumulation data-type + * @tparam OutT output data-type (for C and D matrices) + * @tparam IdxT index data-type + * @tparam Veclen number of k-elements loaded by each thread + for every LDG call. details in contractions.cuh + * @tparam FinalLambda final lambda called on final distance value + * @tparam isRowMajor true if input/output is row major, + false for column major + * @param[in] x input matrix + * @param[in] y input matrix + * @param[in] m number of rows of A and C/D + * @param[in] n number of rows of B and C/D + * @param[in] k number of cols of A and B + * @param[in] lda leading dimension of A + * @param[in] ldb leading dimension of B + * @param[in] ldd leading dimension of C/D + * @param[output] dOutput output matrix + * @param[in] fin_op the final gemm epilogue lambda + * @param[in] stream cuda stream to launch work + */ +template +static void hammingUnexpandedImpl(const DataT *x, const DataT *y, IdxT m, + IdxT n, IdxT k, IdxT lda, IdxT ldb, IdxT ldd, + OutT *dOutput, FinalLambda fin_op, + cudaStream_t stream) { + typedef typename raft::linalg::Policy4x4::Policy RowPolicy; + typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; + + typedef + typename std::conditional::type KPolicy; + + dim3 blk(KPolicy::Nthreads); + + // Accumulation operation lambda + auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { + acc += (x != y); + }; + + // epilogue operation lambda for final value calculation + auto epilog_lambda = [k] __device__( + AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh], + DataT * regxn, DataT * regyn, IdxT gridStrideX, + IdxT gridStrideY) { + const DataT one_over_k = DataT(1.0) / k; +#pragma unroll + for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) { +#pragma unroll + for (int j = 0; j < KPolicy::AccColsPerTh; ++j) { + acc[i][j] *= one_over_k; + } + } + }; + + if (isRowMajor) { + auto hammingUnexpandedRowMajor = + pairwiseDistanceMatKernel; + dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, + hammingUnexpandedRowMajor); + + hammingUnexpandedRowMajor<<>>( + x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, + epilog_lambda, fin_op); + } else { + auto hammingUnexpandedColMajor = + pairwiseDistanceMatKernel; + dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, + hammingUnexpandedColMajor); + hammingUnexpandedColMajor<<>>( + x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, + epilog_lambda, fin_op); + } + + CUDA_CHECK(cudaGetLastError()); +} + +template +void hammingUnexpanded(IdxT m, IdxT n, IdxT k, IdxT lda, IdxT ldb, IdxT ldd, + const DataT *x, const DataT *y, OutT *dOutput, + FinalLambda fin_op, cudaStream_t stream) { + size_t bytesA = sizeof(DataT) * lda; + size_t bytesB = sizeof(DataT) * ldb; + if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) { + hammingUnexpandedImpl(x, y, m, n, k, lda, ldb, ldd, + dOutput, fin_op, stream); + } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) { + hammingUnexpandedImpl(x, y, m, n, k, lda, ldb, ldd, + dOutput, fin_op, stream); + } else { + hammingUnexpandedImpl( + x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, stream); + } +} + +/** + * @brief the Hamming Unexpanded distance matrix calculation + * It computes the following equation: + Cij = sum(x_i != y_i) / k + * + * @tparam InType input data-type (for A and B matrices) + * @tparam AccType accumulation data-type + * @tparam OutType output data-type (for C and D matrices) + * @tparam FinalLambda user-defined epilogue lamba + * @tparam Index_ Index type + * @param m number of rows of A and C/D + * @param n number of columns of B and C/D + * @param k number of cols of A and rows of B + * @param pA input matrix + * @param pB input matrix + * @param pD output matrix + * @param fin_op the final element-wise epilogue lambda + * @param stream cuda stream where to launch work + * @param isRowMajor whether the input and output matrices are row major + */ +template +void hammingUnexpandedImpl(int m, int n, int k, const InType *pA, + const InType *pB, OutType *pD, FinalLambda fin_op, + cudaStream_t stream, bool isRowMajor) { + typedef std::is_same is_bool; + typedef typename std::conditional::type + hammingUnexpandedOutType; + Index_ lda, ldb, ldd; + hammingUnexpandedOutType *pDcast = + reinterpret_cast(pD); + if (isRowMajor) { + lda = k, ldb = k, ldd = n; + hammingUnexpanded(m, n, k, lda, ldb, ldd, pA, pB, pDcast, + fin_op, stream); + + } else { + lda = n, ldb = m, ldd = m; + hammingUnexpanded(n, m, k, lda, ldb, ldd, pB, pA, + pDcast, fin_op, stream); + } +} +} // namespace distance +} // namespace raft diff --git a/cpp/include/raft/distance/jensen_shannon.cuh b/cpp/include/raft/distance/jensen_shannon.cuh new file mode 100644 index 0000000000..2a94205853 --- /dev/null +++ b/cpp/include/raft/distance/jensen_shannon.cuh @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include + +namespace raft { +namespace distance { + +/** + * @brief the Jensen Shannon distance matrix: + * It computes the following equation: + Cij = sqrt(0.5 * sum( -x_i * (log(0.5 * (x_i + y_i)) - log(x_i)) + + (-y_i * (log(0.5 * (x_i + y_i)) - log(y_i))))) + * + * @tparam DataT input data-type (for A and B matrices) + * @tparam AccT accumulation data-type + * @tparam OutT output data-type (for C and D matrices) + * @tparam IdxT index data-type + * @tparam Veclen number of k-elements loaded by each thread + for every LDG call. details in contractions.cuh + * @tparam FinalLambda final lambda called on final distance value + * @tparam isRowMajor true if input/output is row major, + false for column major + * @param[in] x input matrix + * @param[in] y input matrix + * @param[in] m number of rows of A and C/D + * @param[in] n number of rows of B and C/D + * @param[in] k number of cols of A and B + * @param[in] lda leading dimension of A + * @param[in] ldb leading dimension of B + * @param[in] ldd leading dimension of C/D + * @param[output] dOutput output matrix + * @param[in] fin_op the final gemm epilogue lambda + * @param[in] stream cuda stream to launch work + */ +template +static void jensenShannonImpl(const DataT *x, const DataT *y, IdxT m, IdxT n, + IdxT k, IdxT lda, IdxT ldb, IdxT ldd, + OutT *dOutput, FinalLambda fin_op, + cudaStream_t stream) { + typedef typename raft::linalg::Policy4x4::Policy RowPolicy; + typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; + + typedef + typename std::conditional::type KPolicy; + + dim3 blk(KPolicy::Nthreads); + + // Accumulation operation lambda + auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { + const DataT m = 0.5f * (x + y); + const bool m_zero = (m == 0); + const auto logM = (!m_zero) * raft::myLog(m + m_zero); + + const bool x_zero = (x == 0); + const bool y_zero = (y == 0); + acc += (-x * (logM - raft::myLog(x + x_zero))) + + (-y * (logM - raft::myLog(y + y_zero))); + }; + + // epilogue operation lambda for final value calculation + auto epilog_lambda = [] __device__( + AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh], + DataT * regxn, DataT * regyn, IdxT gridStrideX, + IdxT gridStrideY) { +#pragma unroll + for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) { +#pragma unroll + for (int j = 0; j < KPolicy::AccColsPerTh; ++j) { + acc[i][j] = raft::mySqrt(0.5 * acc[i][j]); + } + } + }; + + if (isRowMajor) { + auto jensenShannonRowMajor = + pairwiseDistanceMatKernel; + dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, + jensenShannonRowMajor); + + jensenShannonRowMajor<<>>( + x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, + epilog_lambda, fin_op); + } else { + auto jensenShannonColMajor = + pairwiseDistanceMatKernel; + dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, + jensenShannonColMajor); + jensenShannonColMajor<<>>( + x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, + epilog_lambda, fin_op); + } + + CUDA_CHECK(cudaGetLastError()); +} + +template +void jensenShannon(IdxT m, IdxT n, IdxT k, IdxT lda, IdxT ldb, IdxT ldd, + const DataT *x, const DataT *y, OutT *dOutput, + FinalLambda fin_op, cudaStream_t stream) { + size_t bytesA = sizeof(DataT) * lda; + size_t bytesB = sizeof(DataT) * ldb; + if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) { + jensenShannonImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, + stream); + } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) { + jensenShannonImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, + stream); + } else { + jensenShannonImpl( + x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, stream); + } +} + +/** + * @brief the Jensen Shannon distance matrix calculation + * It computes the following equation: + Cij = sqrt(0.5 * sum( -x_i * (log(0.5 * (x_i + y_i)) - log(x_i)) + + (-y_i * (log(0.5 * (x_i + y_i)) - log(y_i))))) + * + * @tparam InType input data-type (for A and B matrices) + * @tparam AccType accumulation data-type + * @tparam OutType output data-type (for C and D matrices) + * @tparam FinalLambda user-defined epilogue lamba + * @tparam Index_ Index type + * @param m number of rows of A and C/D + * @param n number of columns of B and C/D + * @param k number of cols of A and rows of B + * @param pA input matrix + * @param pB input matrix + * @param pD output matrix + * @param fin_op the final element-wise epilogue lambda + * @param stream cuda stream where to launch work + * @param isRowMajor whether the input and output matrices are row major + */ +template +void jensenShannonImpl(int m, int n, int k, const InType *pA, const InType *pB, + OutType *pD, FinalLambda fin_op, cudaStream_t stream, + bool isRowMajor) { + typedef std::is_same is_bool; + typedef typename std::conditional::type + jensenShannonOutType; + Index_ lda, ldb, ldd; + jensenShannonOutType *pDcast = reinterpret_cast(pD); + if (isRowMajor) { + lda = k, ldb = k, ldd = n; + jensenShannon(m, n, k, lda, ldb, ldd, pA, pB, pDcast, fin_op, stream); + + } else { + lda = n, ldb = m, ldd = m; + jensenShannon(n, m, k, lda, ldb, ldd, pB, pA, pDcast, fin_op, + stream); + } +} +} // namespace distance +} // namespace raft diff --git a/cpp/include/raft/distance/kl_divergence.cuh b/cpp/include/raft/distance/kl_divergence.cuh new file mode 100644 index 0000000000..3197b73d10 --- /dev/null +++ b/cpp/include/raft/distance/kl_divergence.cuh @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include + +namespace raft { +namespace distance { + +/** + * @brief the KL Divergence distance matrix: + * It computes the following equation: + Cij = 0.5 * sum(x * log (x / y)); + * This distance computation modifies A or B by computing a log(x) + * and then performing a `pow(e, log(x))` to convert it back. Because of this, + * it is possible that the values in A or B might differ slightly + * after this is invoked. + * + * @tparam DataT input data-type (for A and B matrices) + * @tparam AccT accumulation data-type + * @tparam OutT output data-type (for C and D matrices) + * @tparam IdxT index data-type + * @tparam Veclen number of k-elements loaded by each thread + for every LDG call. details in contractions.cuh + * @tparam FinalLambda final lambda called on final distance value + * @tparam isRowMajor true if input/output is row major, + false for column major + * @param[in] x input matrix + * @param[in] y input matrix + * @param[in] m number of rows of A and C/D + * @param[in] n number of rows of B and C/D + * @param[in] k number of cols of A and B + * @param[in] lda leading dimension of A + * @param[in] ldb leading dimension of B + * @param[in] ldd leading dimension of C/D + * @param[output] dOutput output matrix + * @param[in] fin_op the final gemm epilogue lambda + * @param[in] stream cuda stream to launch work + */ +template +static void klDivergenceImpl(const DataT *x, const DataT *y, IdxT m, IdxT n, + IdxT k, IdxT lda, IdxT ldb, IdxT ldd, + OutT *dOutput, FinalLambda fin_op, + cudaStream_t stream) { + typedef typename raft::linalg::Policy4x4::Policy RowPolicy; + typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; + + typedef + typename std::conditional::type KPolicy; + + dim3 blk(KPolicy::Nthreads); + + // Accumulation operation lambda + auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { + if (isRowMajor) { + const bool x_zero = (x == 0); + acc += x * (raft::myLog(x + x_zero) - y); + } else { + const bool y_zero = (y == 0); + acc += y * (raft::myLog(y + y_zero) - x); + } + }; + + auto core_lambda_x_equal_y = [] __device__(AccT & acc, DataT & x, DataT & y) { + if (isRowMajor) { + const bool x_zero = (x == 0); + const bool y_zero = (y == 0); + acc += + x * (raft::myLog(x + x_zero) - (!y_zero) * raft::myLog(y + y_zero)); + } else { + const bool y_zero = (y == 0); + const bool x_zero = (x == 0); + acc += + y * (raft::myLog(y + y_zero) - (!x_zero) * raft::myLog(x + x_zero)); + } + }; + + auto unaryOp_lambda = [] __device__(DataT input) { + const bool x_zero = (input == 0); + return (!x_zero) * raft::myLog(input + x_zero); + }; + + auto unaryOp_lambda_reverse = [] __device__(DataT input) { + // reverse previous log (x) back to x using (e ^ log(x)) + const bool x_zero = (input == 0); + return (!x_zero) * raft::myExp(input); + }; + + // epilogue operation lambda for final value calculation + auto epilog_lambda = [] __device__( + AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh], + DataT * regxn, DataT * regyn, IdxT gridStrideX, + IdxT gridStrideY) { +#pragma unroll + for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) { +#pragma unroll + for (int j = 0; j < KPolicy::AccColsPerTh; ++j) { + acc[i][j] = (0.5f * acc[i][j]); + } + } + }; + + if (isRowMajor) { + constexpr auto klDivergenceRowMajor = + pairwiseDistanceMatKernel; + constexpr auto klDivergenceRowMajorXequalY = + pairwiseDistanceMatKernel; + if (x != y) { + raft::linalg::unaryOp( + (DataT *)y, y, n * k, unaryOp_lambda, stream); + dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, + klDivergenceRowMajor); + klDivergenceRowMajor<<>>( + x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, + epilog_lambda, fin_op); + // Now reverse previous log (x) back to x using (e ^ log(x)) + raft::linalg::unaryOp( + (DataT *)y, y, n * k, unaryOp_lambda_reverse, stream); + } else { + dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, + klDivergenceRowMajorXequalY); + klDivergenceRowMajorXequalY<<>>( + x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, + core_lambda_x_equal_y, epilog_lambda, fin_op); + } + } else { + constexpr auto klDivergenceColMajor = + pairwiseDistanceMatKernel; + constexpr auto klDivergenceColMajorXequalY = + pairwiseDistanceMatKernel; + if (x != y) { + raft::linalg::unaryOp( + (DataT *)x, x, m * k, unaryOp_lambda, stream); + dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, + klDivergenceColMajor); + klDivergenceColMajor<<>>( + x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, + epilog_lambda, fin_op); + // Now reverse previous log (x) back to x using (e ^ log(x)) + raft::linalg::unaryOp( + (DataT *)x, x, m * k, unaryOp_lambda_reverse, stream); + } else { + dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, + klDivergenceColMajorXequalY); + klDivergenceColMajorXequalY<<>>( + x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, + core_lambda_x_equal_y, epilog_lambda, fin_op); + } + } + + CUDA_CHECK(cudaGetLastError()); +} + +template +void klDivergence(IdxT m, IdxT n, IdxT k, IdxT lda, IdxT ldb, IdxT ldd, + const DataT *x, const DataT *y, OutT *dOutput, + FinalLambda fin_op, cudaStream_t stream) { + size_t bytesA = sizeof(DataT) * lda; + size_t bytesB = sizeof(DataT) * ldb; + if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) { + klDivergenceImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, + stream); + } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) { + klDivergenceImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, + stream); + } else { + klDivergenceImpl( + x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, stream); + } +} + +/** + * @brief the KL Divergence distance matrix calculation + * It computes the following equation: + Cij = 0.5 * sum(x * log (x / y)); + * This distance computation modifies A or B by computing a log(x) + * and then performing a `pow(e, log(x))` to convert it back. Because of this, + * it is possible that the values in A or B might differ slightly + * after this is invoked. + * @tparam InType input data-type (for A and B matrices) + * @tparam AccType accumulation data-type + * @tparam OutType output data-type (for C and D matrices) + * @tparam FinalLambda user-defined epilogue lamba + * @tparam Index_ Index type + * @param m number of rows of A and C/D + * @param n number of columns of B and C/D + * @param k number of cols of A and rows of B + * @param pA input matrix + * @param pB input matrix + * @param pD output matrix + * @param fin_op the final element-wise epilogue lambda + * @param stream cuda stream where to launch work + * @param isRowMajor whether the input and output matrices are row major + */ +template +void klDivergenceImpl(int m, int n, int k, const InType *pA, const InType *pB, + OutType *pD, FinalLambda fin_op, cudaStream_t stream, + bool isRowMajor) { + typedef std::is_same is_bool; + typedef typename std::conditional::type + klDivergenceOutType; + Index_ lda, ldb, ldd; + klDivergenceOutType *pDcast = reinterpret_cast(pD); + if (isRowMajor) { + lda = k, ldb = k, ldd = n; + klDivergence(m, n, k, lda, ldb, ldd, pA, pB, pDcast, fin_op, stream); + + } else { + lda = n, ldb = m, ldd = m; + klDivergence(n, m, k, lda, ldb, ldd, pB, pA, pDcast, fin_op, stream); + } +} +} // namespace distance +} // namespace raft diff --git a/cpp/include/raft/distance/russell_rao.cuh b/cpp/include/raft/distance/russell_rao.cuh new file mode 100644 index 0000000000..417fb73b94 --- /dev/null +++ b/cpp/include/raft/distance/russell_rao.cuh @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include + +namespace raft { +namespace distance { + +/** + * @brief the Russell Rao distance matrix: + * It computes the following equation: + Cij = (k - sum(x_i * y_i)) / k + * + * @tparam DataT input data-type (for A and B matrices) + * @tparam AccT accumulation data-type + * @tparam OutT output data-type (for C and D matrices) + * @tparam IdxT index data-type + * @tparam Veclen number of k-elements loaded by each thread + for every LDG call. details in contractions.cuh + * @tparam FinalLambda final lambda called on final distance value + * @tparam isRowMajor true if input/output is row major, + false for column major + * @param[in] x input matrix + * @param[in] y input matrix + * @param[in] m number of rows of A and C/D + * @param[in] n number of rows of B and C/D + * @param[in] k number of cols of A and B + * @param[in] lda leading dimension of A + * @param[in] ldb leading dimension of B + * @param[in] ldd leading dimension of C/D + * @param[output] dOutput output matrix + * @param[in] fin_op the final gemm epilogue lambda + * @param[in] stream cuda stream to launch work + */ +template +static void russellRaoImpl(const DataT *x, const DataT *y, IdxT m, IdxT n, + IdxT k, IdxT lda, IdxT ldb, IdxT ldd, OutT *dOutput, + FinalLambda fin_op, cudaStream_t stream) { + typedef typename raft::linalg::Policy4x4::Policy RowPolicy; + typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy; + + typedef + typename std::conditional::type KPolicy; + + dim3 blk(KPolicy::Nthreads); + + // Accumulation operation lambda + auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { + acc += x * y; + }; + + const float one_over_k = 1.0 / k; + // epilogue operation lambda for final value calculation + auto epilog_lambda = [k, one_over_k] __device__( + AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh], + DataT * regxn, DataT * regyn, IdxT gridStrideX, + IdxT gridStrideY) { +#pragma unroll + for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) { +#pragma unroll + for (int j = 0; j < KPolicy::AccColsPerTh; ++j) { + acc[i][j] = (k - acc[i][j]) * one_over_k; + } + } + }; + + if (isRowMajor) { + constexpr auto russellRaoRowMajor = + pairwiseDistanceMatKernel; + dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, + russellRaoRowMajor); + + russellRaoRowMajor<<>>( + x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, + epilog_lambda, fin_op); + } else { + constexpr auto russellRaoColMajor = + pairwiseDistanceMatKernel; + dim3 grid = launchConfigGenerator(m, n, KPolicy::SmemSize, + russellRaoColMajor); + russellRaoColMajor<<>>( + x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda, + epilog_lambda, fin_op); + } + + CUDA_CHECK(cudaGetLastError()); +} + +template +void russellRao(IdxT m, IdxT n, IdxT k, IdxT lda, IdxT ldb, IdxT ldd, + const DataT *x, const DataT *y, OutT *dOutput, + FinalLambda fin_op, cudaStream_t stream) { + size_t bytesA = sizeof(DataT) * lda; + size_t bytesB = sizeof(DataT) * ldb; + if (16 % sizeof(DataT) == 0 && bytesA % 16 == 0 && bytesB % 16 == 0) { + russellRaoImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, + stream); + } else if (8 % sizeof(DataT) == 0 && bytesA % 8 == 0 && bytesB % 8 == 0) { + russellRaoImpl(x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, + stream); + } else { + russellRaoImpl( + x, y, m, n, k, lda, ldb, ldd, dOutput, fin_op, stream); + } +} + +/** + * @brief the Russell Rao distance matrix calculation + * It computes the following equation: + Cij = (k - sum(x_i * y_i)) / k + * + * @tparam InType input data-type (for A and B matrices) + * @tparam AccType accumulation data-type + * @tparam OutType output data-type (for C and D matrices) + * @tparam FinalLambda user-defined epilogue lamba + * @tparam Index_ Index type + * @param m number of rows of A and C/D + * @param n number of columns of B and C/D + * @param k number of cols of A and rows of B + * @param pA input matrix + * @param pB input matrix + * @param pD output matrix + * @param fin_op the final element-wise epilogue lambda + * @param stream cuda stream where to launch work + * @param isRowMajor whether the input and output matrices are row major + */ +template +void russellRaoImpl(int m, int n, int k, const InType *pA, const InType *pB, + OutType *pD, FinalLambda fin_op, cudaStream_t stream, + bool isRowMajor) { + typedef std::is_same is_bool; + typedef typename std::conditional::type + russellRaoOutType; + Index_ lda, ldb, ldd; + russellRaoOutType *pDcast = reinterpret_cast(pD); + if (isRowMajor) { + lda = k, ldb = k, ldd = n; + russellRao( + m, n, k, lda, ldb, ldd, pA, pB, pDcast, fin_op, stream); + + } else { + lda = n, ldb = m, ldd = m; + russellRao( + n, m, k, lda, ldb, ldd, pB, pA, pDcast, fin_op, stream); + } +} +} // namespace distance +} // namespace raft diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index f94a8d9525..0428e09142 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -21,12 +21,17 @@ add_executable(test_raft test/distance/dist_adj.cu test/distance/dist_canberra.cu test/distance/dist_chebyshev.cu + test/distance/dist_correlation.cu test/distance/dist_cos.cu test/distance/dist_euc_exp.cu test/distance/dist_euc_unexp.cu + test/distance/dist_hamming.cu test/distance/dist_hellinger.cu + test/distance/dist_jensen_shannon.cu + test/distance/dist_kl_divergence.cu test/distance/dist_l1.cu test/distance/dist_minkowski.cu + test/distance/dist_russell_rao.cu test/distance/fused_l2_nn.cu test/eigen_solvers.cu test/handle.cpp diff --git a/cpp/test/distance/dist_correlation.cu b/cpp/test/distance/dist_correlation.cu new file mode 100644 index 0000000000..5d84f18e52 --- /dev/null +++ b/cpp/test/distance/dist_correlation.cu @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include "distance_base.cuh" + +namespace raft { +namespace distance { + +template +class DistanceCorrelation + : public DistanceTest {}; + +const std::vector> inputsf = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceCorrelation DistanceCorrelationF; +TEST_P(DistanceCorrelationF, Result) { + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, + raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCorrelationF, + ::testing::ValuesIn(inputsf)); + +const std::vector> inputsd = { + {0.001, 1024, 1024, 32, true, 1234ULL}, + {0.001, 1024, 32, 1024, true, 1234ULL}, + {0.001, 32, 1024, 1024, true, 1234ULL}, + {0.003, 1024, 1024, 1024, true, 1234ULL}, + {0.001, 1024, 1024, 32, false, 1234ULL}, + {0.001, 1024, 32, 1024, false, 1234ULL}, + {0.001, 32, 1024, 1024, false, 1234ULL}, + {0.003, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceCorrelation DistanceCorrelationD; +TEST_P(DistanceCorrelationD, Result) { + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, + raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceCorrelationD, + ::testing::ValuesIn(inputsd)); + +} // end namespace distance +} // end namespace raft diff --git a/cpp/test/distance/dist_hamming.cu b/cpp/test/distance/dist_hamming.cu new file mode 100644 index 0000000000..47febd825b --- /dev/null +++ b/cpp/test/distance/dist_hamming.cu @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include "distance_base.cuh" + +namespace raft { +namespace distance { + +template +class DistanceHamming + : public DistanceTest {}; + +const std::vector> inputsf = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceHamming DistanceHammingF; +TEST_P(DistanceHammingF, Result) { + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, + raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceHammingF, + ::testing::ValuesIn(inputsf)); + +const std::vector> inputsd = { + {0.001, 1024, 1024, 32, true, 1234ULL}, + {0.001, 1024, 32, 1024, true, 1234ULL}, + {0.001, 32, 1024, 1024, true, 1234ULL}, + {0.003, 1024, 1024, 1024, true, 1234ULL}, + {0.001, 1024, 1024, 32, false, 1234ULL}, + {0.001, 1024, 32, 1024, false, 1234ULL}, + {0.001, 32, 1024, 1024, false, 1234ULL}, + {0.003, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceHamming DistanceHammingD; +TEST_P(DistanceHammingD, Result) { + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, + raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceHammingD, + ::testing::ValuesIn(inputsd)); + +} // end namespace distance +} // end namespace raft diff --git a/cpp/test/distance/dist_jensen_shannon.cu b/cpp/test/distance/dist_jensen_shannon.cu new file mode 100644 index 0000000000..bc0b56f506 --- /dev/null +++ b/cpp/test/distance/dist_jensen_shannon.cu @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include "distance_base.cuh" + +namespace raft { +namespace distance { + +template +class DistanceJensenShannon + : public DistanceTest { +}; + +const std::vector> inputsf = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceJensenShannon DistanceJensenShannonF; +TEST_P(DistanceJensenShannonF, Result) { + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, + raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceJensenShannonF, + ::testing::ValuesIn(inputsf)); + +const std::vector> inputsd = { + {0.001, 1024, 1024, 32, true, 1234ULL}, + {0.001, 1024, 32, 1024, true, 1234ULL}, + {0.001, 32, 1024, 1024, true, 1234ULL}, + {0.003, 1024, 1024, 1024, true, 1234ULL}, + {0.001, 1024, 1024, 32, false, 1234ULL}, + {0.001, 1024, 32, 1024, false, 1234ULL}, + {0.001, 32, 1024, 1024, false, 1234ULL}, + {0.003, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceJensenShannon DistanceJensenShannonD; +TEST_P(DistanceJensenShannonD, Result) { + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, + raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceJensenShannonD, + ::testing::ValuesIn(inputsd)); + +} // end namespace distance +} // end namespace raft diff --git a/cpp/test/distance/dist_kl_divergence.cu b/cpp/test/distance/dist_kl_divergence.cu new file mode 100644 index 0000000000..884ac4b948 --- /dev/null +++ b/cpp/test/distance/dist_kl_divergence.cu @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include "distance_base.cuh" + +namespace raft { +namespace distance { + +template +class DistanceKLDivergence + : public DistanceTest { +}; + +const std::vector> inputsf = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceKLDivergence DistanceKLDivergenceF; +TEST_P(DistanceKLDivergenceF, Result) { + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, + raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceKLDivergenceF, + ::testing::ValuesIn(inputsf)); + +const std::vector> inputsd = { + {0.001, 1024, 1024, 32, true, 1234ULL}, + {0.001, 1024, 32, 1024, true, 1234ULL}, + {0.001, 32, 1024, 1024, true, 1234ULL}, + {0.003, 1024, 1024, 1024, true, 1234ULL}, + {0.001, 1024, 1024, 32, false, 1234ULL}, + {0.001, 1024, 32, 1024, false, 1234ULL}, + {0.001, 32, 1024, 1024, false, 1234ULL}, + {0.003, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceKLDivergence DistanceKLDivergenceD; +TEST_P(DistanceKLDivergenceD, Result) { + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, + raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceKLDivergenceD, + ::testing::ValuesIn(inputsd)); + +} // end namespace distance +} // end namespace raft diff --git a/cpp/test/distance/dist_russell_rao.cu b/cpp/test/distance/dist_russell_rao.cu new file mode 100644 index 0000000000..74ccfb0c2e --- /dev/null +++ b/cpp/test/distance/dist_russell_rao.cu @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.h" +#include "distance_base.cuh" + +namespace raft { +namespace distance { + +template +class DistanceRussellRao + : public DistanceTest {}; + +const std::vector> inputsf = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceRussellRao DistanceRussellRaoF; +TEST_P(DistanceRussellRaoF, Result) { + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, + raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceRussellRaoF, + ::testing::ValuesIn(inputsf)); + +const std::vector> inputsd = { + {0.001, 1024, 1024, 32, true, 1234ULL}, + {0.001, 1024, 32, 1024, true, 1234ULL}, + {0.001, 32, 1024, 1024, true, 1234ULL}, + {0.003, 1024, 1024, 1024, true, 1234ULL}, + {0.001, 1024, 1024, 32, false, 1234ULL}, + {0.001, 1024, 32, 1024, false, 1234ULL}, + {0.001, 32, 1024, 1024, false, 1234ULL}, + {0.003, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceRussellRao DistanceRussellRaoD; +TEST_P(DistanceRussellRaoD, Result) { + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(raft::devArrMatch(dist_ref, dist, m, n, + raft::CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceRussellRaoD, + ::testing::ValuesIn(inputsd)); + +} // end namespace distance +} // end namespace raft diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh index fc7b064205..9e3290593d 100644 --- a/cpp/test/distance/distance_base.cuh +++ b/cpp/test/distance/distance_base.cuh @@ -160,6 +160,138 @@ __global__ void naiveLpUnexpDistanceKernel(DataType *dist, const DataType *x, dist[outidx] = acc; } +template +__global__ void naiveHammingDistanceKernel(DataType *dist, const DataType *x, + const DataType *y, int m, int n, + int k, bool isRowMajor) { + int midx = threadIdx.x + blockIdx.x * blockDim.x; + int nidx = threadIdx.y + blockIdx.y * blockDim.y; + if (midx >= m || nidx >= n) return; + DataType acc = DataType(0); + for (int i = 0; i < k; ++i) { + int xidx = isRowMajor ? i + midx * k : i * m + midx; + int yidx = isRowMajor ? i + nidx * k : i * n + nidx; + auto a = x[xidx]; + auto b = y[yidx]; + acc += (a != b); + } + acc = acc / k; + int outidx = isRowMajor ? midx * n + nidx : midx + m * nidx; + dist[outidx] = acc; +} + +template +__global__ void naiveJensenShannonDistanceKernel(DataType *dist, + const DataType *x, + const DataType *y, int m, + int n, int k, + bool isRowMajor) { + int midx = threadIdx.x + blockIdx.x * blockDim.x; + int nidx = threadIdx.y + blockIdx.y * blockDim.y; + if (midx >= m || nidx >= n) return; + DataType acc = DataType(0); + for (int i = 0; i < k; ++i) { + int xidx = isRowMajor ? i + midx * k : i * m + midx; + int yidx = isRowMajor ? i + nidx * k : i * n + nidx; + auto a = x[xidx]; + auto b = y[yidx]; + + DataType m = 0.5f * (a + b); + bool a_zero = a == 0; + bool b_zero = b == 0; + + DataType p = (!a_zero * m) / (a_zero + a); + DataType q = (!b_zero * m) / (b_zero + b); + + bool p_zero = p == 0; + bool q_zero = q == 0; + + acc += + (-a * (!p_zero * log(p + p_zero))) + (-b * (!q_zero * log(q + q_zero))); + } + acc = raft::mySqrt(0.5f * acc); + int outidx = isRowMajor ? midx * n + nidx : midx + m * nidx; + dist[outidx] = acc; +} + +template +__global__ void naiveRussellRaoDistanceKernel(OutType *dist, const DataType *x, + const DataType *y, int m, int n, + int k, bool isRowMajor) { + int midx = threadIdx.x + blockIdx.x * blockDim.x; + int nidx = threadIdx.y + blockIdx.y * blockDim.y; + if (midx >= m || nidx >= n) return; + OutType acc = OutType(0); + for (int i = 0; i < k; ++i) { + int xidx = isRowMajor ? i + midx * k : i * m + midx; + int yidx = isRowMajor ? i + nidx * k : i * n + nidx; + auto a = x[xidx]; + auto b = y[yidx]; + acc += (a * b); + } + acc = (k - acc) / k; + int outidx = isRowMajor ? midx * n + nidx : midx + m * nidx; + dist[outidx] = acc; +} + +template +__global__ void naiveKLDivergenceDistanceKernel(OutType *dist, + const DataType *x, + const DataType *y, int m, int n, + int k, bool isRowMajor) { + int midx = threadIdx.x + blockIdx.x * blockDim.x; + int nidx = threadIdx.y + blockIdx.y * blockDim.y; + if (midx >= m || nidx >= n) return; + OutType acc = OutType(0); + for (int i = 0; i < k; ++i) { + int xidx = isRowMajor ? i + midx * k : i * m + midx; + int yidx = isRowMajor ? i + nidx * k : i * n + nidx; + auto a = x[xidx]; + auto b = y[yidx]; + bool b_zero = (b == 0); + const auto m = (!b_zero) * (a / b); + const bool m_zero = (m == 0); + acc += (a * (!m_zero) * log(m + m_zero)); + } + acc = 0.5f * acc; + int outidx = isRowMajor ? midx * n + nidx : midx + m * nidx; + dist[outidx] = acc; +} + +template +__global__ void naiveCorrelationDistanceKernel(OutType *dist, const DataType *x, + const DataType *y, int m, int n, + int k, bool isRowMajor) { + int midx = threadIdx.x + blockIdx.x * blockDim.x; + int nidx = threadIdx.y + blockIdx.y * blockDim.y; + if (midx >= m || nidx >= n) return; + OutType acc = OutType(0); + auto a_norm = DataType(0); + auto b_norm = DataType(0); + auto a_sq_norm = DataType(0); + auto b_sq_norm = DataType(0); + for (int i = 0; i < k; ++i) { + int xidx = isRowMajor ? i + midx * k : i * m + midx; + int yidx = isRowMajor ? i + nidx * k : i * n + nidx; + auto a = x[xidx]; + auto b = y[yidx]; + a_norm += a; + b_norm += b; + a_sq_norm += (a * a); + b_sq_norm += (b * b); + acc += (a * b); + } + + auto numer = k * acc - (a_norm * b_norm); + auto Q_denom = k * a_sq_norm - (a_norm * a_norm); + auto R_denom = k * b_sq_norm - (b_norm * b_norm); + + acc = 1 - (numer / raft::mySqrt(Q_denom * R_denom)); + + int outidx = isRowMajor ? midx * n + nidx : midx + m * nidx; + dist[outidx] = acc; +} + template void naiveDistance(DataType *dist, const DataType *x, const DataType *y, int m, int n, int k, raft::distance::DistanceType type, @@ -193,6 +325,26 @@ void naiveDistance(DataType *dist, const DataType *x, const DataType *y, int m, naiveLpUnexpDistanceKernel <<>>(dist, x, y, m, n, k, isRowMajor, metric_arg); break; + case raft::distance::DistanceType::HammingUnexpanded: + naiveHammingDistanceKernel + <<>>(dist, x, y, m, n, k, isRowMajor); + break; + case raft::distance::DistanceType::JensenShannon: + naiveJensenShannonDistanceKernel + <<>>(dist, x, y, m, n, k, isRowMajor); + break; + case raft::distance::DistanceType::RusselRaoExpanded: + naiveRussellRaoDistanceKernel + <<>>(dist, x, y, m, n, k, isRowMajor); + break; + case raft::distance::DistanceType::KLDivergence: + naiveKLDivergenceDistanceKernel + <<>>(dist, x, y, m, n, k, isRowMajor); + break; + case raft::distance::DistanceType::CorrelationExpanded: + naiveCorrelationDistanceKernel + <<>>(dist, x, y, m, n, k, isRowMajor); + break; default: FAIL() << "should be here\n"; } @@ -247,10 +399,19 @@ class DistanceTest : public ::testing::TestWithParam> { raft::allocate(dist_ref, m * n); raft::allocate(dist, m * n); raft::allocate(dist2, m * n); - if (distanceType == raft::distance::DistanceType::HellingerExpanded) { + if (distanceType == raft::distance::DistanceType::HellingerExpanded || + distanceType == raft::distance::DistanceType::JensenShannon || + distanceType == raft::distance::DistanceType::KLDivergence) { // Hellinger works only on positive numbers r.uniform(x, m * k, DataType(0.0), DataType(1.0), stream); r.uniform(y, n * k, DataType(0.0), DataType(1.0), stream); + } else if (distanceType == + raft::distance::DistanceType::RusselRaoExpanded) { + r.uniform(x, m * k, DataType(0.0), DataType(1.0), stream); + r.uniform(y, n * k, DataType(0.0), DataType(1.0), stream); + // Russel rao works on boolean values. + r.bernoulli(x, m * k, 0.5f, stream); + r.bernoulli(y, n * k, 0.5f, stream); } else { r.uniform(x, m * k, DataType(-1.0), DataType(1.0), stream); r.uniform(y, n * k, DataType(-1.0), DataType(1.0), stream);