diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d09e1b329b..5d01683f95 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -261,6 +261,7 @@ if(BUILD_RAFT_TESTS) test/linalg/eig.cu test/linalg/eig_sel.cu test/linalg/gemm_layout.cu + test/linalg/map.cu test/linalg/map_then_reduce.cu test/linalg/matrix_vector_op.cu test/linalg/multiply.cu diff --git a/cpp/include/raft/linalg/eltwise.cuh b/cpp/include/raft/linalg/eltwise.cuh index a46d550220..1c6dee562d 100644 --- a/cpp/include/raft/linalg/eltwise.cuh +++ b/cpp/include/raft/linalg/eltwise.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ namespace linalg { /** * @defgroup ScalarOps Scalar operations on the input buffer - * @tparam math_t data-type upon which the math operation will be performed + * @tparam InType data-type upon which the math operation will be performed * @tparam IdxType Integer type used to for addressing * @param out the output buffer * @param in the input buffer @@ -33,26 +33,26 @@ namespace linalg { * @param stream cuda stream where to launch work * @{ */ -template -void scalarAdd(math_t *out, const math_t *in, math_t scalar, IdxType len, +template +void scalarAdd(OutType *out, const InType *in, InType scalar, IdxType len, cudaStream_t stream) { raft::linalg::unaryOp( - out, in, len, [scalar] __device__(math_t in) { return in + scalar; }, + out, in, len, [scalar] __device__(InType in) { return in + scalar; }, stream); } -template -void scalarMultiply(math_t *out, const math_t *in, math_t scalar, IdxType len, +template +void scalarMultiply(OutType *out, const InType *in, InType scalar, IdxType len, cudaStream_t stream) { raft::linalg::unaryOp( - out, in, len, [scalar] __device__(math_t in) { return in * scalar; }, + out, in, len, [scalar] __device__(InType in) { return in * scalar; }, stream); } /** @} */ /** * @defgroup BinaryOps Element-wise binary operations on the input buffers - * @tparam math_t data-type upon which the math operation will be performed + * @tparam InType data-type upon which the math operation will be performed * @tparam IdxType Integer type used to for addressing * @param out the output buffer * @param in1 the first input buffer @@ -61,46 +61,46 @@ void scalarMultiply(math_t *out, const math_t *in, math_t scalar, IdxType len, * @param stream cuda stream where to launch work * @{ */ -template -void eltwiseAdd(math_t *out, const math_t *in1, const math_t *in2, IdxType len, +template +void eltwiseAdd(OutType *out, const InType *in1, const InType *in2, IdxType len, cudaStream_t stream) { binaryOp( - out, in1, in2, len, [] __device__(math_t a, math_t b) { return a + b; }, + out, in1, in2, len, [] __device__(InType a, InType b) { return a + b; }, stream); } -template -void eltwiseSub(math_t *out, const math_t *in1, const math_t *in2, IdxType len, +template +void eltwiseSub(OutType *out, const InType *in1, const InType *in2, IdxType len, cudaStream_t stream) { binaryOp( - out, in1, in2, len, [] __device__(math_t a, math_t b) { return a - b; }, + out, in1, in2, len, [] __device__(InType a, InType b) { return a - b; }, stream); } -template -void eltwiseMultiply(math_t *out, const math_t *in1, const math_t *in2, +template +void eltwiseMultiply(OutType *out, const InType *in1, const InType *in2, IdxType len, cudaStream_t stream) { binaryOp( - out, in1, in2, len, [] __device__(math_t a, math_t b) { return a * b; }, + out, in1, in2, len, [] __device__(InType a, InType b) { return a * b; }, stream); } -template -void eltwiseDivide(math_t *out, const math_t *in1, const math_t *in2, +template +void eltwiseDivide(OutType *out, const InType *in1, const InType *in2, IdxType len, cudaStream_t stream) { binaryOp( - out, in1, in2, len, [] __device__(math_t a, math_t b) { return a / b; }, + out, in1, in2, len, [] __device__(InType a, InType b) { return a / b; }, stream); } -template -void eltwiseDivideCheckZero(math_t *out, const math_t *in1, const math_t *in2, +template +void eltwiseDivideCheckZero(OutType *out, const InType *in1, const InType *in2, IdxType len, cudaStream_t stream) { binaryOp( out, in1, in2, len, - [] __device__(math_t a, math_t b) { - if (b == math_t(0.0)) - return math_t(0.0); + [] __device__(InType a, InType b) { + if (b == InType(0.0)) + return InType(0.0); else return a / b; }, diff --git a/cpp/test/linalg/map.cu b/cpp/test/linalg/map.cu new file mode 100644 index 0000000000..227bce6a48 --- /dev/null +++ b/cpp/test/linalg/map.cu @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2018-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include "../test_utils.h" + +namespace raft { +namespace linalg { + +template +void mapLaunch(OutType *out, const InType *in1, const InType *in2, + const InType *in3, InType scalar, IdxType len, + cudaStream_t stream) { + map( + out, len, + [=] __device__(InType a, InType b, InType c) { return a + b + c + scalar; }, + stream, in1, in2, in3); +} + +template +struct MapInputs { + InType tolerance; + IdxType len; + unsigned long long int seed; + InType scalar; +}; + +template +void create_ref(OutType *out_ref, const InType *in1, const InType *in2, + const InType *in3, InType scalar, IdxType len, + cudaStream_t stream) { + InType *tmp; + allocate(tmp, len); + eltwiseAdd(tmp, in1, in2, len, stream); + eltwiseAdd(out_ref, tmp, in3, len, stream); + scalarAdd(out_ref, out_ref, (OutType)scalar, len, stream); + CUDA_CHECK(cudaFree(tmp)); +} + +template +class MapTest + : public ::testing::TestWithParam> { + protected: + void SetUp() override { + params = + ::testing::TestWithParam>::GetParam(); + raft::random::Rng r(params.seed); + + cudaStream_t stream; + CUDA_CHECK(cudaStreamCreate(&stream)); + IdxType len = params.len; + allocate(in1, len); + allocate(in2, len); + allocate(in3, len); + allocate(out_ref, len); + allocate(out, len); + r.uniform(in1, len, InType(-1.0), InType(1.0), stream); + r.uniform(in2, len, InType(-1.0), InType(1.0), stream); + r.uniform(in3, len, InType(-1.0), InType(1.0), stream); + + create_ref(out_ref, in1, in2, in3, params.scalar, len, stream); + mapLaunch(out, in1, in2, in3, params.scalar, len, stream); + CUDA_CHECK(cudaStreamDestroy(stream)); + } + + void TearDown() override { + CUDA_CHECK(cudaFree(in1)); + CUDA_CHECK(cudaFree(in2)); + CUDA_CHECK(cudaFree(in3)); + CUDA_CHECK(cudaFree(out_ref)); + CUDA_CHECK(cudaFree(out)); + } + + protected: + MapInputs params; + InType *in1, *in2, *in3; + OutType *out_ref, *out; +}; + +const std::vector> inputsf_i32 = { + {0.000001f, 1024 * 1024, 1234ULL, 3.2}}; +typedef MapTest MapTestF_i32; +TEST_P(MapTestF_i32, Result) { + ASSERT_TRUE(devArrMatch(out_ref, out, params.len, + CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_SUITE_P(MapTests, MapTestF_i32, + ::testing::ValuesIn(inputsf_i32)); + +const std::vector> inputsf_i64 = { + {0.000001f, 1024 * 1024, 1234ULL, 9.4}}; +typedef MapTest MapTestF_i64; +TEST_P(MapTestF_i64, Result) { + ASSERT_TRUE(devArrMatch(out_ref, out, params.len, + CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_SUITE_P(MapTests, MapTestF_i64, + ::testing::ValuesIn(inputsf_i64)); + +const std::vector> inputsf_i32_d = { + {0.000001f, 1024 * 1024, 1234ULL, 5.9}}; +typedef MapTest MapTestF_i32_D; +TEST_P(MapTestF_i32_D, Result) { + ASSERT_TRUE(devArrMatch(out_ref, out, params.len, + CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_SUITE_P(MapTests, MapTestF_i32_D, + ::testing::ValuesIn(inputsf_i32_d)); + +const std::vector> inputsd_i32 = { + {0.00000001, 1024 * 1024, 1234ULL, 7.5}}; +typedef MapTest MapTestD_i32; +TEST_P(MapTestD_i32, Result) { + ASSERT_TRUE(devArrMatch(out_ref, out, params.len, + CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_SUITE_P(MapTests, MapTestD_i32, + ::testing::ValuesIn(inputsd_i32)); + +const std::vector> inputsd_i64 = { + {0.00000001, 1024 * 1024, 1234ULL, 5.2}}; +typedef MapTest MapTestD_i64; +TEST_P(MapTestD_i64, Result) { + ASSERT_TRUE(devArrMatch(out_ref, out, params.len, + CompareApprox(params.tolerance))); +} +INSTANTIATE_TEST_SUITE_P(MapTests, MapTestD_i64, + ::testing::ValuesIn(inputsd_i64)); + +} // namespace linalg +} // namespace raft