Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Map operation #168

Merged
merged 2 commits into from
Mar 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ if(BUILD_RAFT_TESTS)
test/linalg/eig.cu
test/linalg/eig_sel.cu
test/linalg/gemm_layout.cu
test/linalg/map.cu
test/linalg/map_then_reduce.cu
test/linalg/matrix_vector_op.cu
test/linalg/multiply.cu
Expand Down
52 changes: 26 additions & 26 deletions cpp/include/raft/linalg/eltwise.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION.
* Copyright (c) 2018-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,7 +24,7 @@ namespace linalg {

/**
* @defgroup ScalarOps Scalar operations on the input buffer
* @tparam math_t data-type upon which the math operation will be performed
* @tparam InType data-type upon which the math operation will be performed
* @tparam IdxType Integer type used to for addressing
* @param out the output buffer
* @param in the input buffer
Expand All @@ -33,26 +33,26 @@ namespace linalg {
* @param stream cuda stream where to launch work
* @{
*/
template <typename math_t, typename IdxType = int>
void scalarAdd(math_t *out, const math_t *in, math_t scalar, IdxType len,
template <typename InType, typename IdxType, typename OutType = InType>
void scalarAdd(OutType *out, const InType *in, InType scalar, IdxType len,
cudaStream_t stream) {
raft::linalg::unaryOp(
out, in, len, [scalar] __device__(math_t in) { return in + scalar; },
out, in, len, [scalar] __device__(InType in) { return in + scalar; },
stream);
}

template <typename math_t, typename IdxType = int>
void scalarMultiply(math_t *out, const math_t *in, math_t scalar, IdxType len,
template <typename InType, typename IdxType, typename OutType = InType>
void scalarMultiply(OutType *out, const InType *in, InType scalar, IdxType len,
cudaStream_t stream) {
raft::linalg::unaryOp(
out, in, len, [scalar] __device__(math_t in) { return in * scalar; },
out, in, len, [scalar] __device__(InType in) { return in * scalar; },
stream);
}
/** @} */

/**
* @defgroup BinaryOps Element-wise binary operations on the input buffers
* @tparam math_t data-type upon which the math operation will be performed
* @tparam InType data-type upon which the math operation will be performed
* @tparam IdxType Integer type used to for addressing
* @param out the output buffer
* @param in1 the first input buffer
Expand All @@ -61,46 +61,46 @@ void scalarMultiply(math_t *out, const math_t *in, math_t scalar, IdxType len,
* @param stream cuda stream where to launch work
* @{
*/
template <typename math_t, typename IdxType = int>
void eltwiseAdd(math_t *out, const math_t *in1, const math_t *in2, IdxType len,
template <typename InType, typename IdxType, typename OutType = InType>
void eltwiseAdd(OutType *out, const InType *in1, const InType *in2, IdxType len,
cudaStream_t stream) {
binaryOp(
out, in1, in2, len, [] __device__(math_t a, math_t b) { return a + b; },
out, in1, in2, len, [] __device__(InType a, InType b) { return a + b; },
stream);
}

template <typename math_t, typename IdxType = int>
void eltwiseSub(math_t *out, const math_t *in1, const math_t *in2, IdxType len,
template <typename InType, typename IdxType, typename OutType = InType>
void eltwiseSub(OutType *out, const InType *in1, const InType *in2, IdxType len,
cudaStream_t stream) {
binaryOp(
out, in1, in2, len, [] __device__(math_t a, math_t b) { return a - b; },
out, in1, in2, len, [] __device__(InType a, InType b) { return a - b; },
stream);
}

template <typename math_t, typename IdxType = int>
void eltwiseMultiply(math_t *out, const math_t *in1, const math_t *in2,
template <typename InType, typename IdxType, typename OutType = InType>
void eltwiseMultiply(OutType *out, const InType *in1, const InType *in2,
IdxType len, cudaStream_t stream) {
binaryOp(
out, in1, in2, len, [] __device__(math_t a, math_t b) { return a * b; },
out, in1, in2, len, [] __device__(InType a, InType b) { return a * b; },
stream);
}

template <typename math_t, typename IdxType = int>
void eltwiseDivide(math_t *out, const math_t *in1, const math_t *in2,
template <typename InType, typename IdxType, typename OutType = InType>
void eltwiseDivide(OutType *out, const InType *in1, const InType *in2,
IdxType len, cudaStream_t stream) {
binaryOp(
out, in1, in2, len, [] __device__(math_t a, math_t b) { return a / b; },
out, in1, in2, len, [] __device__(InType a, InType b) { return a / b; },
stream);
}

template <typename math_t, typename IdxType = int>
void eltwiseDivideCheckZero(math_t *out, const math_t *in1, const math_t *in2,
template <typename InType, typename IdxType, typename OutType = InType>
void eltwiseDivideCheckZero(OutType *out, const InType *in1, const InType *in2,
IdxType len, cudaStream_t stream) {
binaryOp(
out, in1, in2, len,
[] __device__(math_t a, math_t b) {
if (b == math_t(0.0))
return math_t(0.0);
[] __device__(InType a, InType b) {
if (b == InType(0.0))
return InType(0.0);
else
return a / b;
},
Expand Down
72 changes: 72 additions & 0 deletions cpp/include/raft/linalg/map.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cub/cub.cuh>
#include <raft/cuda_utils.cuh>
#include <raft/handle.hpp>
#include <raft/vectorized.cuh>

namespace raft {
namespace linalg {

template <typename InType, typename OutType, typename MapOp, int TPB,
typename... Args>
__global__ void mapKernel(OutType *out, size_t len, MapOp map, const InType *in,
Args... args) {
auto idx = (threadIdx.x + (blockIdx.x * blockDim.x));

if (idx < len) {
out[idx] = map(in[idx], args[idx]...);
}
}

template <typename InType, typename OutType, typename MapOp, int TPB,
typename... Args>
void mapImpl(OutType *out, size_t len, MapOp map, cudaStream_t stream,
const InType *in, Args... args) {
const int nblks = raft::ceildiv(len, (size_t)TPB);
mapKernel<InType, OutType, MapOp, TPB, Args...>
<<<nblks, TPB, 0, stream>>>(out, len, map, in, args...);
CUDA_CHECK(cudaPeekAtLastError());
}

/**
* @brief CUDA version of map
* @tparam InType data-type upon which the math operation will be performed
* @tparam MapOp the device-lambda performing the actual operation
* @tparam TPB threads-per-block in the final kernel launched
* @tparam Args additional parameters
* @tparam OutType data-type in which the result will be stored
* @param out the output of the map operation (assumed to be a device pointer)
* @param len number of elements in the input array
* @param map the device-lambda
* @param stream cuda-stream where to launch this kernel
* @param in the input array
* @param args additional input arrays
*/

template <typename InType, typename MapOp, int TPB = 256, typename... Args,
typename OutType = InType>
void map(OutType *out, size_t len, MapOp map, cudaStream_t stream,
const InType *in, Args... args) {
mapImpl<InType, OutType, MapOp, TPB, Args...>(out, len, map, stream, in,
args...);
}

} // namespace linalg
}; // namespace raft
149 changes: 149 additions & 0 deletions cpp/test/linalg/map.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>
#include <raft/cudart_utils.h>
#include <raft/linalg/eltwise.cuh>
#include <raft/linalg/map.cuh>
#include <raft/mr/device/buffer.hpp>
#include <raft/random/rng.cuh>
#include "../test_utils.h"

namespace raft {
namespace linalg {

template <typename InType, typename IdxType, typename OutType>
void mapLaunch(OutType *out, const InType *in1, const InType *in2,
const InType *in3, InType scalar, IdxType len,
cudaStream_t stream) {
map(
out, len,
[=] __device__(InType a, InType b, InType c) { return a + b + c + scalar; },
stream, in1, in2, in3);
}

template <typename InType, typename IdxType = int, typename OutType = InType>
struct MapInputs {
InType tolerance;
IdxType len;
unsigned long long int seed;
InType scalar;
};

template <typename InType, typename IdxType, typename OutType = InType>
void create_ref(OutType *out_ref, const InType *in1, const InType *in2,
const InType *in3, InType scalar, IdxType len,
cudaStream_t stream) {
InType *tmp;
allocate(tmp, len);
eltwiseAdd(tmp, in1, in2, len, stream);
eltwiseAdd(out_ref, tmp, in3, len, stream);
scalarAdd(out_ref, out_ref, (OutType)scalar, len, stream);
CUDA_CHECK(cudaFree(tmp));
}

template <typename InType, typename IdxType, typename OutType = InType>
class MapTest
: public ::testing::TestWithParam<MapInputs<InType, IdxType, OutType>> {
protected:
void SetUp() override {
params =
::testing::TestWithParam<MapInputs<InType, IdxType, OutType>>::GetParam();
raft::random::Rng r(params.seed);

cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));
IdxType len = params.len;
allocate(in1, len);
allocate(in2, len);
allocate(in3, len);
allocate(out_ref, len);
allocate(out, len);
r.uniform(in1, len, InType(-1.0), InType(1.0), stream);
r.uniform(in2, len, InType(-1.0), InType(1.0), stream);
r.uniform(in3, len, InType(-1.0), InType(1.0), stream);

create_ref(out_ref, in1, in2, in3, params.scalar, len, stream);
mapLaunch(out, in1, in2, in3, params.scalar, len, stream);
CUDA_CHECK(cudaStreamDestroy(stream));
}

void TearDown() override {
CUDA_CHECK(cudaFree(in1));
CUDA_CHECK(cudaFree(in2));
CUDA_CHECK(cudaFree(in3));
CUDA_CHECK(cudaFree(out_ref));
CUDA_CHECK(cudaFree(out));
}

protected:
MapInputs<InType, IdxType, OutType> params;
InType *in1, *in2, *in3;
OutType *out_ref, *out;
};

const std::vector<MapInputs<float, int>> inputsf_i32 = {
{0.000001f, 1024 * 1024, 1234ULL, 3.2}};
typedef MapTest<float, int> MapTestF_i32;
TEST_P(MapTestF_i32, Result) {
ASSERT_TRUE(devArrMatch(out_ref, out, params.len,
CompareApprox<float>(params.tolerance)));
}
INSTANTIATE_TEST_SUITE_P(MapTests, MapTestF_i32,
::testing::ValuesIn(inputsf_i32));

const std::vector<MapInputs<float, size_t>> inputsf_i64 = {
{0.000001f, 1024 * 1024, 1234ULL, 9.4}};
typedef MapTest<float, size_t> MapTestF_i64;
TEST_P(MapTestF_i64, Result) {
ASSERT_TRUE(devArrMatch(out_ref, out, params.len,
CompareApprox<float>(params.tolerance)));
}
INSTANTIATE_TEST_SUITE_P(MapTests, MapTestF_i64,
::testing::ValuesIn(inputsf_i64));

const std::vector<MapInputs<float, int, double>> inputsf_i32_d = {
{0.000001f, 1024 * 1024, 1234ULL, 5.9}};
typedef MapTest<float, int, double> MapTestF_i32_D;
TEST_P(MapTestF_i32_D, Result) {
ASSERT_TRUE(devArrMatch(out_ref, out, params.len,
CompareApprox<double>(params.tolerance)));
}
INSTANTIATE_TEST_SUITE_P(MapTests, MapTestF_i32_D,
::testing::ValuesIn(inputsf_i32_d));

const std::vector<MapInputs<double, int>> inputsd_i32 = {
{0.00000001, 1024 * 1024, 1234ULL, 7.5}};
typedef MapTest<double, int> MapTestD_i32;
TEST_P(MapTestD_i32, Result) {
ASSERT_TRUE(devArrMatch(out_ref, out, params.len,
CompareApprox<double>(params.tolerance)));
}
INSTANTIATE_TEST_SUITE_P(MapTests, MapTestD_i32,
::testing::ValuesIn(inputsd_i32));

const std::vector<MapInputs<double, size_t>> inputsd_i64 = {
{0.00000001, 1024 * 1024, 1234ULL, 5.2}};
typedef MapTest<double, size_t> MapTestD_i64;
TEST_P(MapTestD_i64, Result) {
ASSERT_TRUE(devArrMatch(out_ref, out, params.len,
CompareApprox<double>(params.tolerance)));
}
INSTANTIATE_TEST_SUITE_P(MapTests, MapTestD_i64,
::testing::ValuesIn(inputsd_i64));

} // namespace linalg
} // namespace raft