Skip to content

Commit

Permalink
Merge pull request rapidsai#247 from rapidsai/branch-21.06
Browse files Browse the repository at this point in the history
[gpuCI] Forward-merge branch-21.06 to branch-21.08 [skip ci]
  • Loading branch information
GPUtester authored Jun 2, 2021
2 parents 7f7a443 + 0be145c commit f9be523
Show file tree
Hide file tree
Showing 7 changed files with 328 additions and 359 deletions.
35 changes: 15 additions & 20 deletions cpp/include/raft/distance/cosine.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ void cosineImpl(const DataT *x, const DataT *y, const DataT *xn,
typedef
typename std::conditional<isRowMajor, RowPolicy, ColPolicy>::type KPolicy;

dim3 grid(raft::ceildiv<int>(m, KPolicy::Mblk),
raft::ceildiv<int>(n, KPolicy::Nblk));
dim3 blk(KPolicy::Nthreads);

// Accumulation operation lambda
Expand All @@ -71,8 +73,7 @@ void cosineImpl(const DataT *x, const DataT *y, const DataT *xn,
// epilogue operation lambda for final value calculation
auto epilog_lambda = [] __device__(
AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh],
DataT * regxn, DataT * regyn, IdxT gridStrideX,
IdxT gridStrideY) {
DataT * regxn, DataT * regyn) {
#pragma unroll
for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
#pragma unroll
Expand All @@ -82,26 +83,20 @@ void cosineImpl(const DataT *x, const DataT *y, const DataT *xn,
}
};

constexpr size_t shmemSize =
KPolicy::SmemSize + ((KPolicy::Mblk + KPolicy::Nblk) * sizeof(DataT));
if (isRowMajor) {
auto cosineRowMajor =
pairwiseDistanceMatKernel<true, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, true>;
dim3 grid = launchConfigGenerator<KPolicy>(m, n, shmemSize, cosineRowMajor);
cosineRowMajor<<<grid, blk, shmemSize, stream>>>(
x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, core_lambda, epilog_lambda,
fin_op);
pairwiseDistanceMatKernel<true, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, true>
<<<grid, blk, KPolicy::SmemSize, stream>>>(x, y, xn, yn, m, n, k, lda,
ldb, ldd, dOutput, core_lambda,
epilog_lambda, fin_op);
} else {
auto cosineColMajor =
pairwiseDistanceMatKernel<true, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, false>;
dim3 grid = launchConfigGenerator<KPolicy>(m, n, shmemSize, cosineColMajor);
cosineColMajor<<<grid, blk, shmemSize, stream>>>(
x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, core_lambda, epilog_lambda,
fin_op);
pairwiseDistanceMatKernel<true, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, false>
<<<grid, blk, KPolicy::SmemSize, stream>>>(x, y, xn, yn, m, n, k, lda,
ldb, ldd, dOutput, core_lambda,
epilog_lambda, fin_op);
}

CUDA_CHECK(cudaGetLastError());
Expand Down
77 changes: 30 additions & 47 deletions cpp/include/raft/distance/euclidean.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ void euclideanExpImpl(const DataT *x, const DataT *y, const DataT *xn,
typedef
typename std::conditional<isRowMajor, RowPolicy, ColPolicy>::type KPolicy;

dim3 grid(raft::ceildiv<int>(m, KPolicy::Mblk),
raft::ceildiv<int>(n, KPolicy::Nblk));
dim3 blk(KPolicy::Nthreads);

// Accumulation operation lambda
Expand All @@ -70,8 +72,7 @@ void euclideanExpImpl(const DataT *x, const DataT *y, const DataT *xn,
// epilogue operation lambda for final value calculation
auto epilog_lambda = [sqrt] __device__(
AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh],
DataT * regxn, DataT * regyn, IdxT gridStrideX,
IdxT gridStrideY) {
DataT * regxn, DataT * regyn) {
#pragma unroll
for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
#pragma unroll
Expand All @@ -90,29 +91,20 @@ void euclideanExpImpl(const DataT *x, const DataT *y, const DataT *xn,
}
};

constexpr size_t shmemSize =
KPolicy::SmemSize + ((KPolicy::Mblk + KPolicy::Nblk) * sizeof(DataT));
if (isRowMajor) {
auto euclideanExpRowMajor =
pairwiseDistanceMatKernel<true, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, true>;
dim3 grid =
launchConfigGenerator<KPolicy>(m, n, shmemSize, euclideanExpRowMajor);

euclideanExpRowMajor<<<grid, blk, shmemSize, stream>>>(
x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, core_lambda, epilog_lambda,
fin_op);
pairwiseDistanceMatKernel<true, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, true>
<<<grid, blk, KPolicy::SmemSize, stream>>>(x, y, xn, yn, m, n, k, lda,
ldb, ldd, dOutput, core_lambda,
epilog_lambda, fin_op);
} else {
auto euclideanExpColMajor =
pairwiseDistanceMatKernel<true, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, false>;
dim3 grid =
launchConfigGenerator<KPolicy>(m, n, shmemSize, euclideanExpColMajor);
euclideanExpColMajor<<<grid, blk, shmemSize, stream>>>(
x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, core_lambda, epilog_lambda,
fin_op);
pairwiseDistanceMatKernel<true, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, false>
<<<grid, blk, KPolicy::SmemSize, stream>>>(x, y, xn, yn, m, n, k, lda,
ldb, ldd, dOutput, core_lambda,
epilog_lambda, fin_op);
}

CUDA_CHECK(cudaGetLastError());
Expand Down Expand Up @@ -237,7 +229,8 @@ void euclideanUnExpImpl(const DataT *x, const DataT *y, IdxT m, IdxT n, IdxT k,

typedef
typename std::conditional<isRowMajor, RowPolicy, ColPolicy>::type KPolicy;

dim3 grid(raft::ceildiv<int>(m, KPolicy::Mblk),
raft::ceildiv<int>(n, KPolicy::Nblk));
dim3 blk(KPolicy::Nthreads);

// Accumulation operation lambda
Expand All @@ -249,8 +242,7 @@ void euclideanUnExpImpl(const DataT *x, const DataT *y, IdxT m, IdxT n, IdxT k,
// epilogue operation lambda for final value calculation
auto epilog_lambda = [sqrt] __device__(
AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh],
DataT * regxn, DataT * regyn, IdxT gridStrideX,
IdxT gridStrideY) {
DataT * regxn, DataT * regyn) {
if (sqrt) {
#pragma unroll
for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
Expand All @@ -263,28 +255,19 @@ void euclideanUnExpImpl(const DataT *x, const DataT *y, IdxT m, IdxT n, IdxT k,
};

if (isRowMajor) {
auto euclideanUnExpRowMajor =
pairwiseDistanceMatKernel<false, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, true>;
dim3 grid = launchConfigGenerator<KPolicy>(m, n, KPolicy::SmemSize,
euclideanUnExpRowMajor);

euclideanUnExpRowMajor<<<grid, blk, KPolicy::SmemSize, stream>>>(
x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda,
epilog_lambda, fin_op);

pairwiseDistanceMatKernel<false, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda>
<<<grid, blk, KPolicy::SmemSize, stream>>>(
x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda,
epilog_lambda, fin_op);
} else {
auto euclideanUnExpColMajor =
pairwiseDistanceMatKernel<false, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, false>;
dim3 grid = launchConfigGenerator<KPolicy>(m, n, KPolicy::SmemSize,
euclideanUnExpColMajor);

euclideanUnExpColMajor<<<grid, blk, KPolicy::SmemSize, stream>>>(
x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda,
epilog_lambda, fin_op);
pairwiseDistanceMatKernel<false, DataT, AccT, OutT, IdxT, KPolicy,
decltype(core_lambda), decltype(epilog_lambda),
FinalLambda, isRowMajor>
<<<grid, blk, KPolicy::SmemSize, stream>>>(
x, y, nullptr, nullptr, m, n, k, lda, ldb, ldd, dOutput, core_lambda,
epilog_lambda, fin_op);
}

CUDA_CHECK(cudaGetLastError());
Expand Down
Loading

0 comments on commit f9be523

Please sign in to comment.