From bad1aa2b10eceeee0a76e5a52b454f979a676117 Mon Sep 17 00:00:00 2001 From: Ethan Tao Date: Sat, 5 Feb 2022 01:15:43 +0000 Subject: [PATCH] UTs --- .../cpu/reduction/reduction_ops_test.cc | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/orttraining/orttraining/test/training_ops/cpu/reduction/reduction_ops_test.cc b/orttraining/orttraining/test/training_ops/cpu/reduction/reduction_ops_test.cc index eff6cd567cfcd..46d775173cdc3 100644 --- a/orttraining/orttraining/test/training_ops/cpu/reduction/reduction_ops_test.cc +++ b/orttraining/orttraining/test/training_ops/cpu/reduction/reduction_ops_test.cc @@ -6,6 +6,7 @@ #include #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" +#include "test/common/cuda_op_test_utils.h" namespace onnxruntime { namespace test { @@ -163,6 +164,106 @@ TEST_P(ReductionOpTest, ReduceAllL2HalfFloat) { } #endif +#if defined(USE_CUDA) || defined(USE_ROCM) +TEST_P(ReductionOpTest, ReduceAllL2_BFloat16_BFloat16) { +#ifdef USE_CUDA + int min_cuda_architecture = 530; + if (!HasCudaEnvironment(min_cuda_architecture)) { + LOGS_DEFAULT(WARNING) << "Hardware NOT support BFP16"; + return; + } +#endif + OpTester test("ReduceAllL2", 1, onnxruntime::kMSDomain, true); + test.SetDeterminism(GetParam()); + + std::vector data0 = {1.0f, 2.0f, 3.0f}; + std::vector data0_bf16 = FloatsToBFloat16s(data0); + + std::vector data1 = {-1.0f, -2.0f}; + std::vector data1_bf16 = FloatsToBFloat16s(data1); + + std::vector result = {4.358898943540674f}; + std::vector result_bf16 = FloatsToBFloat16s(result); + + test.AddInput("data0", {3}, data0_bf16); + test.AddInput("data1", {2}, data1_bf16); + + test.AddOutput("reduced", {}, result_bf16); + + std::vector> execution_providers; +#ifdef USE_CUDA + execution_providers.push_back(DefaultCudaExecutionProvider()); +#elif USE_ROCM + execution_providers.push_back(DefaultRocmExecutionProvider()); +#endif + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); +} + +TEST_P(ReductionOpTest, ReduceAllL2_BFloat16_Float) { +#ifdef USE_CUDA + int min_cuda_architecture = 530; + if (!HasCudaEnvironment(min_cuda_architecture)) { + LOGS_DEFAULT(WARNING) << "Hardware NOT support BFP16"; + return; + } +#endif + OpTester test("ReduceAllL2", 1, onnxruntime::kMSDomain, true); + test.SetDeterminism(GetParam()); + + std::vector data0 = {1.0f, 2.0f, 3.0f}; + std::vector data0_bf16 = FloatsToBFloat16s(data0); + + std::vector data1 = {-1.0f, -2.0f}; + std::vector data1_bf16 = FloatsToBFloat16s(data1); + + std::vector result = {4.358898943540674f}; + + test.AddInput("data0", {3}, data0_bf16); + test.AddInput("data1", {2}, data1_bf16); + + test.AddOutput("reduced", {}, result); + + std::vector> execution_providers; +#ifdef USE_CUDA + execution_providers.push_back(DefaultCudaExecutionProvider()); +#elif USE_ROCM + execution_providers.push_back(DefaultRocmExecutionProvider()); +#endif + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); +} + +TEST_P(ReductionOpTest, ReduceAllL2_Float_BFloat16) { +#ifdef USE_CUDA + int min_cuda_architecture = 530; + if (!HasCudaEnvironment(min_cuda_architecture)) { + LOGS_DEFAULT(WARNING) << "Hardware NOT support BFP16"; + return; + } +#endif + OpTester test("ReduceAllL2", 1, onnxruntime::kMSDomain, true); + test.SetDeterminism(GetParam()); + + std::vector data0 = {1.0f, 2.0f, 3.0f}; + std::vector data1 = {-1.0f, -2.0f}; + + std::vector result = {4.358898943540674f}; + std::vector result_bf16 = FloatsToBFloat16s(result); + + test.AddInput("data0", {3}, data0); + test.AddInput("data1", {2}, data1); + + test.AddOutput("reduced", {}, result_bf16); + + std::vector> execution_providers; +#ifdef USE_CUDA + execution_providers.push_back(DefaultCudaExecutionProvider()); +#elif USE_ROCM + execution_providers.push_back(DefaultRocmExecutionProvider()); +#endif + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); +} +#endif + void TestMultiTensorReduce( const int tensor_count, const int min_tensor_size,