From 382bd23b6d96ced43a297c7ae3ed835cfc8b5345 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Wed, 30 Oct 2024 18:07:09 +0000 Subject: [PATCH 01/13] fix driver for old ocl kernel --- driver/bn_driver.hpp | 110 +++++++++++++++++++++++-------------------- driver/dm_bnorm.cpp | 7 +-- 2 files changed, 63 insertions(+), 54 deletions(-) diff --git a/driver/bn_driver.hpp b/driver/bn_driver.hpp index c89c3f166d..3c93b2960f 100644 --- a/driver/bn_driver.hpp +++ b/driver/bn_driver.hpp @@ -66,7 +66,11 @@ //#define BN_RUNFOR_PROFILER -template +template class BatchNormDriver : public Driver { public: @@ -133,8 +137,8 @@ class BatchNormDriver : public Driver tensor out_ref; // forward - GpumemTensor scale; - GpumemTensor bias; + GpumemTensor scale; + GpumemTensor bias; // forward inference GpumemTensor estMean; @@ -153,14 +157,14 @@ class BatchNormDriver : public Driver tensor runVariance_ref; // backward needed different type for bwd. - GpumemTensor out_bwd; + GpumemTensor out_bwd; - GpumemTensor bnScale; + GpumemTensor bnScale; GpumemTensor dScale; GpumemTensor dBias; // savedMean declared above as Tmix as well GpumemTensor savedInvVar; - GpumemTensor dy; + GpumemTensor dy; tensor dBias_ref; tensor dScale_ref; @@ -170,8 +174,8 @@ class BatchNormDriver : public Driver miopenTensorLayout_t bn_layout; }; -template -int BatchNormDriver::ParseCmdLineArgs(int argc, char* argv[]) +template +int BatchNormDriver::ParseCmdLineArgs(int argc, char* argv[]) { inflags.Parse(argc, argv); @@ -183,8 +187,8 @@ int BatchNormDriver::ParseCmdLineArgs(int argc, char* argv[]) return miopenStatusSuccess; } -template -int BatchNormDriver::GetandSetData() +template +int BatchNormDriver::GetandSetData() { std::vector in_len = GetInputTensorLengthsFromCmdLine(); @@ -201,8 +205,8 @@ int BatchNormDriver::GetandSetData() if(isFwdInfer || isFwdTrain) { out.AllocOnHost(tensor{bn_layout, in_len}); - scale.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); - bias.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + scale.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + bias.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); auto gen_value_scale_bias = [](auto...) { return prng::gen_descreet_uniform_sign(1e-2, 100); @@ -236,13 +240,13 @@ int BatchNormDriver::GetandSetData() } else if(isBwd) { - out_bwd.AllocOnHost(tensor{bn_layout, in_len}); + out_bwd.AllocOnHost(tensor{bn_layout, in_len}); - bnScale.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); - dy.AllocOnHost(tensor{bn_layout, in_len}); + bnScale.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + dy.AllocOnHost(tensor{bn_layout, in_len}); auto gen_var_bwd = [](auto...) { - return static_cast(1e-2 * (prng::gen_0_to_B(100) + 1)); + return static_cast(1e-2 * (prng::gen_0_to_B(100) + 1)); }; dy.InitHostData(dy.GetTensor().desc.GetElementSize(), true, gen_var_bwd); @@ -269,8 +273,8 @@ int BatchNormDriver::GetandSetData() return miopenStatusSuccess; } -template -int BatchNormDriver::AddCmdLineArgs() +template +int BatchNormDriver::AddCmdLineArgs() { inflags.AddInputFlag( "forw", @@ -321,8 +325,9 @@ int BatchNormDriver::AddCmdLineArgs() return miopenStatusSuccess; } -template -std::vector BatchNormDriver::GetInputTensorLengthsFromCmdLine() +template +std::vector +BatchNormDriver::GetInputTensorLengthsFromCmdLine() { int in_n = inflags.GetValueInt("batchsize"); int in_c = inflags.GetValueInt("in_channels"); @@ -344,8 +349,8 @@ std::vector BatchNormDriver::GetInputTensorLengthsFromCmd } } -template -bool BatchNormDriver::ChkLayout_ShortName() +template +bool BatchNormDriver::ChkLayout_ShortName() { // check for short name of layout type if(inflags.FindShortName("layout") == 'L') @@ -361,8 +366,9 @@ bool BatchNormDriver::ChkLayout_ShortName() } } -template -void BatchNormDriver::ValidateLayoutInputParameters(std::string layout_value) +template +void BatchNormDriver::ValidateLayoutInputParameters( + std::string layout_value) { if(!ChkLayout_ShortName()) { @@ -377,8 +383,8 @@ void BatchNormDriver::ValidateLayoutInputParameters(std::strin } } -template -int BatchNormDriver::SetBNParametersFromCmdLineArgs() +template +int BatchNormDriver::SetBNParametersFromCmdLineArgs() { // double bnAlpha = inflags.GetValueDouble("alpha"); @@ -510,8 +516,8 @@ int BatchNormDriver::SetBNParametersFromCmdLineArgs() return miopenStatusSuccess; } -template -int BatchNormDriver::AllocateBuffersAndCopy() +template +int BatchNormDriver::AllocateBuffersAndCopy() { status_t status = STATUS_SUCCESS; DEFINE_CONTEXT(ctx); @@ -596,8 +602,10 @@ int BatchNormDriver::AllocateBuffersAndCopy() return miopenStatusSuccess; } -template -void BatchNormDriver::runGPUFwdInference(Tref epsilon, float alpha, float beta) +template +void BatchNormDriver::runGPUFwdInference(Tref epsilon, + float alpha, + float beta) { if(keepRunningMeanVar) @@ -644,11 +652,11 @@ void BatchNormDriver::runGPUFwdInference(Tref epsilon, float a return; } -template -void BatchNormDriver::runGPUFwdTrain(Tref epsilon, - Tref eAF, - float alpha, - float beta) +template +void BatchNormDriver::runGPUFwdTrain(Tref epsilon, + Tref eAF, + float alpha, + float beta) { if(saveMeanVar && keepRunningMeanVar) { @@ -767,8 +775,8 @@ void BatchNormDriver::runGPUFwdTrain(Tref epsilon, #endif } -template -int BatchNormDriver::RunForwardGPU() +template +int BatchNormDriver::RunForwardGPU() { float alpha = static_cast(1), beta = static_cast(0); @@ -867,8 +875,8 @@ int BatchNormDriver::RunForwardGPU() return miopenStatusSuccess; } -template -void BatchNormDriver::runCPUFwdInference(Tref epsilon) +template +void BatchNormDriver::runCPUFwdInference(Tref epsilon) { int size{0}; miopenGetTensorDescriptorSize(&in.GetTensor().desc, &size); @@ -916,8 +924,8 @@ void BatchNormDriver::runCPUFwdInference(Tref epsilon) return; } -template -void BatchNormDriver::runCPUFwdTrain(Tref epsilon, Tref eAF) +template +void BatchNormDriver::runCPUFwdTrain(Tref epsilon, Tref eAF) { int size{0}; miopenGetTensorDescriptorSize(&in.GetTensor().desc, &size); @@ -985,8 +993,8 @@ void BatchNormDriver::runCPUFwdTrain(Tref epsilon, Tref eAF) } } -template -int BatchNormDriver::RunForwardCPU() +template +int BatchNormDriver::RunForwardCPU() { // T alpha = 0., beta = 0.; Tref epsilon = static_cast(EPSILON); @@ -1014,8 +1022,8 @@ int BatchNormDriver::RunForwardCPU() return miopenStatusSuccess; } -template -int BatchNormDriver::RunBackwardGPU() +template +int BatchNormDriver::RunBackwardGPU() { if(!back) return miopenStatusSuccess; @@ -1134,8 +1142,8 @@ int BatchNormDriver::RunBackwardGPU() return miopenStatusSuccess; } -template -int BatchNormDriver::VerifyForward() +template +int BatchNormDriver::VerifyForward() { // jump out since we are forcing forward off when doing backwards. @@ -1350,8 +1358,8 @@ int BatchNormDriver::VerifyForward() return miopenStatusSuccess; } -template -int BatchNormDriver::RunBackwardCPU() +template +int BatchNormDriver::RunBackwardCPU() { if(!back) @@ -1439,8 +1447,8 @@ int BatchNormDriver::RunBackwardCPU() return miopenStatusSuccess; } -template -int BatchNormDriver::VerifyBackward() +template +int BatchNormDriver::VerifyBackward() { if(!back) diff --git a/driver/dm_bnorm.cpp b/driver/dm_bnorm.cpp index 24e986fa1d..6381a8a83c 100644 --- a/driver/dm_bnorm.cpp +++ b/driver/dm_bnorm.cpp @@ -30,12 +30,13 @@ static Driver* makeDriver(const std::string& base_arg) { if(base_arg == "bnorm") return new BatchNormDriver(); + // if(base_arg == "bnormfp16") - return new BatchNormDriver(); + return new BatchNormDriver(); if(base_arg == "bnormfp16fp32") - return new BatchNormDriver(); + return new BatchNormDriver(); if(base_arg == "bnormbfp16fp32") - return new BatchNormDriver(); + return new BatchNormDriver(); return nullptr; } From b4e1a00fc056c9230ab18f7f4088ae9e2f4f9aaa Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Wed, 30 Oct 2024 21:18:56 +0000 Subject: [PATCH 02/13] fix template name --- driver/bn_driver.hpp | 207 +++++++++++++++++++++--------------------- test/gtest/bn_bwd.cpp | 6 -- 2 files changed, 105 insertions(+), 108 deletions(-) diff --git a/driver/bn_driver.hpp b/driver/bn_driver.hpp index 3c93b2960f..1c9806b629 100644 --- a/driver/bn_driver.hpp +++ b/driver/bn_driver.hpp @@ -66,15 +66,15 @@ //#define BN_RUNFOR_PROFILER -template + typename TAcc = TInput, + typename TScaleBias = TInput, + typename TOut = TInput> class BatchNormDriver : public Driver { public: - BatchNormDriver() : Driver() { data_type = (sizeof(Tgpu) == 4) ? miopenFloat : miopenHalf; } + BatchNormDriver() : Driver() { data_type = (sizeof(TInput) == 4) ? miopenFloat : miopenHalf; } int AddCmdLineArgs() override; int ParseCmdLineArgs(int argc, char* argv[]) override; @@ -132,39 +132,39 @@ class BatchNormDriver : public Driver InputFlags inflags; bool isDepthSpecified = false; - GpumemTensor in; - GpumemTensor out; + GpumemTensor in; + GpumemTensor out; // for forward the output maches input type. tensor out_ref; // forward - GpumemTensor scale; - GpumemTensor bias; + GpumemTensor scale; + GpumemTensor bias; // forward inference - GpumemTensor estMean; - GpumemTensor estVariance; + GpumemTensor estMean; + GpumemTensor estVariance; - GpumemTensor savedMean; + GpumemTensor savedMean; tensor savedMean_ref; // forward training - GpumemTensor savedVariance; - GpumemTensor runMean; - GpumemTensor runVariance; + GpumemTensor savedVariance; + GpumemTensor runMean; + GpumemTensor runVariance; // ref tensor savedVariance_ref; tensor runMean_ref; tensor runVariance_ref; // backward needed different type for bwd. - GpumemTensor out_bwd; + GpumemTensor out_bwd; - GpumemTensor bnScale; - GpumemTensor dScale; - GpumemTensor dBias; - // savedMean declared above as Tmix as well - GpumemTensor savedInvVar; - GpumemTensor dy; + GpumemTensor bnScale; + GpumemTensor dScale; + GpumemTensor dBias; + // savedMean declared above as TAcc as well + GpumemTensor savedInvVar; + GpumemTensor dy; tensor dBias_ref; tensor dScale_ref; @@ -174,8 +174,9 @@ class BatchNormDriver : public Driver miopenTensorLayout_t bn_layout; }; -template -int BatchNormDriver::ParseCmdLineArgs(int argc, char* argv[]) +template +int BatchNormDriver::ParseCmdLineArgs(int argc, + char* argv[]) { inflags.Parse(argc, argv); @@ -187,16 +188,16 @@ int BatchNormDriver::ParseCmdLineArgs(int return miopenStatusSuccess; } -template -int BatchNormDriver::GetandSetData() +template +int BatchNormDriver::GetandSetData() { std::vector in_len = GetInputTensorLengthsFromCmdLine(); SetBNParametersFromCmdLineArgs(); - auto gen_value = [](auto...) { return prng::gen_descreet_uniform_sign(1e-2, 100); }; + auto gen_value = [](auto...) { return prng::gen_descreet_uniform_sign(1e-2, 100); }; - in.AllocOnHost(tensor{bn_layout, in_len}); + in.AllocOnHost(tensor{bn_layout, in_len}); in.InitHostData(in.GetTensor().desc.GetElementSize(), true, gen_value); auto derivedBnDesc = miopen::TensorDescriptor{}; @@ -204,12 +205,12 @@ int BatchNormDriver::GetandSetData() if(isFwdInfer || isFwdTrain) { - out.AllocOnHost(tensor{bn_layout, in_len}); - scale.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); - bias.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + out.AllocOnHost(tensor{bn_layout, in_len}); + scale.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + bias.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); auto gen_value_scale_bias = [](auto...) { - return prng::gen_descreet_uniform_sign(1e-2, 100); + return prng::gen_descreet_uniform_sign(1e-2, 100); }; scale.InitHostData(scale.GetTensor().desc.GetElementSize(), true, gen_value_scale_bias); @@ -217,23 +218,23 @@ int BatchNormDriver::GetandSetData() } if(isFwdInfer) { - estMean.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); - estVariance.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + estMean.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + estVariance.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); auto gen_value_emean = [](auto...) { - return prng::gen_descreet_uniform_sign(1e-2, 100); + return prng::gen_descreet_uniform_sign(1e-2, 100); }; estMean.InitHostData(estMean.GetTensor().desc.GetElementSize(), true, gen_value_emean); } else if(isFwdTrain) { - savedMean.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); - savedVariance.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); - runMean.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); - runVariance.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + savedMean.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + savedVariance.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + runMean.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + runVariance.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); auto gen_var = [](auto...) { - return static_cast(1e-2 * (prng::gen_0_to_B(100) + 1)); + return static_cast(1e-2 * (prng::gen_0_to_B(100) + 1)); }; runMean.InitHostData(runMean.GetTensor().desc.GetElementSize(), true, gen_var); runVariance.InitHostData(runVariance.GetTensor().desc.GetElementSize(), true, gen_var); @@ -242,7 +243,7 @@ int BatchNormDriver::GetandSetData() { out_bwd.AllocOnHost(tensor{bn_layout, in_len}); - bnScale.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + bnScale.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); dy.AllocOnHost(tensor{bn_layout, in_len}); auto gen_var_bwd = [](auto...) { @@ -250,17 +251,17 @@ int BatchNormDriver::GetandSetData() }; dy.InitHostData(dy.GetTensor().desc.GetElementSize(), true, gen_var_bwd); - dScale.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); - dBias.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); - savedMean.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); - savedInvVar.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + dScale.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + dBias.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + savedMean.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); + savedInvVar.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); bnScale.InitHostData(bnScale.GetTensor().desc.GetElementSize(), true, gen_value); savedMean.InitHostData(savedMean.GetTensor().desc.GetElementSize(), true, gen_var_bwd); auto gen_in_var = [](auto...) { - return static_cast(1e-2 * (prng::gen_0_to_B(100) + 1)); + return static_cast(1e-2 * (prng::gen_0_to_B(100) + 1)); }; savedInvVar.InitHostData(savedInvVar.GetTensor().desc.GetElementSize(), true, gen_in_var); } @@ -273,8 +274,8 @@ int BatchNormDriver::GetandSetData() return miopenStatusSuccess; } -template -int BatchNormDriver::AddCmdLineArgs() +template +int BatchNormDriver::AddCmdLineArgs() { inflags.AddInputFlag( "forw", @@ -325,9 +326,9 @@ int BatchNormDriver::AddCmdLineArgs() return miopenStatusSuccess; } -template +template std::vector -BatchNormDriver::GetInputTensorLengthsFromCmdLine() +BatchNormDriver::GetInputTensorLengthsFromCmdLine() { int in_n = inflags.GetValueInt("batchsize"); int in_c = inflags.GetValueInt("in_channels"); @@ -349,8 +350,8 @@ BatchNormDriver::GetInputTensorLengthsFrom } } -template -bool BatchNormDriver::ChkLayout_ShortName() +template +bool BatchNormDriver::ChkLayout_ShortName() { // check for short name of layout type if(inflags.FindShortName("layout") == 'L') @@ -366,8 +367,8 @@ bool BatchNormDriver::ChkLayout_ShortName( } } -template -void BatchNormDriver::ValidateLayoutInputParameters( +template +void BatchNormDriver::ValidateLayoutInputParameters( std::string layout_value) { if(!ChkLayout_ShortName()) @@ -383,8 +384,8 @@ void BatchNormDriver::ValidateLayoutInputP } } -template -int BatchNormDriver::SetBNParametersFromCmdLineArgs() +template +int BatchNormDriver::SetBNParametersFromCmdLineArgs() { // double bnAlpha = inflags.GetValueDouble("alpha"); @@ -516,8 +517,8 @@ int BatchNormDriver::SetBNParametersFromCm return miopenStatusSuccess; } -template -int BatchNormDriver::AllocateBuffersAndCopy() +template +int BatchNormDriver::AllocateBuffersAndCopy() { status_t status = STATUS_SUCCESS; DEFINE_CONTEXT(ctx); @@ -602,10 +603,10 @@ int BatchNormDriver::AllocateBuffersAndCop return miopenStatusSuccess; } -template -void BatchNormDriver::runGPUFwdInference(Tref epsilon, - float alpha, - float beta) +template +void BatchNormDriver::runGPUFwdInference(Tref epsilon, + float alpha, + float beta) { if(keepRunningMeanVar) @@ -652,11 +653,11 @@ void BatchNormDriver::runGPUFwdInference(T return; } -template -void BatchNormDriver::runGPUFwdTrain(Tref epsilon, - Tref eAF, - float alpha, - float beta) +template +void BatchNormDriver::runGPUFwdTrain(Tref epsilon, + Tref eAF, + float alpha, + float beta) { if(saveMeanVar && keepRunningMeanVar) { @@ -775,8 +776,8 @@ void BatchNormDriver::runGPUFwdTrain(Tref #endif } -template -int BatchNormDriver::RunForwardGPU() +template +int BatchNormDriver::RunForwardGPU() { float alpha = static_cast(1), beta = static_cast(0); @@ -875,8 +876,8 @@ int BatchNormDriver::RunForwardGPU() return miopenStatusSuccess; } -template -void BatchNormDriver::runCPUFwdInference(Tref epsilon) +template +void BatchNormDriver::runCPUFwdInference(Tref epsilon) { int size{0}; miopenGetTensorDescriptorSize(&in.GetTensor().desc, &size); @@ -924,8 +925,9 @@ void BatchNormDriver::runCPUFwdInference(T return; } -template -void BatchNormDriver::runCPUFwdTrain(Tref epsilon, Tref eAF) +template +void BatchNormDriver::runCPUFwdTrain(Tref epsilon, + Tref eAF) { int size{0}; miopenGetTensorDescriptorSize(&in.GetTensor().desc, &size); @@ -993,8 +995,8 @@ void BatchNormDriver::runCPUFwdTrain(Tref } } -template -int BatchNormDriver::RunForwardCPU() +template +int BatchNormDriver::RunForwardCPU() { // T alpha = 0., beta = 0.; Tref epsilon = static_cast(EPSILON); @@ -1022,8 +1024,8 @@ int BatchNormDriver::RunForwardCPU() return miopenStatusSuccess; } -template -int BatchNormDriver::RunBackwardGPU() +template +int BatchNormDriver::RunBackwardGPU() { if(!back) return miopenStatusSuccess; @@ -1142,18 +1144,18 @@ int BatchNormDriver::RunBackwardGPU() return miopenStatusSuccess; } -template -int BatchNormDriver::VerifyForward() +template +int BatchNormDriver::VerifyForward() { // jump out since we are forcing forward off when doing backwards. if(!forw) return miopenStatusSuccess; - const Tref maxrms = static_cast((sizeof(Tgpu) == 4) ? RMSTOL_FP32 : RMSTOL_FP16); + const Tref maxrms = static_cast((sizeof(TInput) == 4) ? RMSTOL_FP32 : RMSTOL_FP16); #if(MIO_BN_DEBUG == 1) - const Tref tolerance = static_cast((sizeof(Tgpu) == 4) ? ERRTOL_FP32 : ERRTOL_FP16); + const Tref tolerance = static_cast((sizeof(TInput) == 4) ? ERRTOL_FP32 : ERRTOL_FP16); Tref diff = static_cast(0.); #endif @@ -1181,13 +1183,13 @@ int BatchNormDriver::VerifyForward() i < MIO_BN_MAX_DEBUGLOOP; i++) { - diff = fabs(Tmix(fabs(runMean.GetVector()[i]) - fabs(runMean_ref.data[i]))); + diff = fabs(TAcc(fabs(runMean.GetVector()[i]) - fabs(runMean_ref.data[i]))); if(!std::isfinite(diff) || diff > tolerance) { std::cout << "rm[" << i << "]: " << runMean.GetVector()[i]; std::cout << ", rm_host[" << i << "]: " << runMean_ref.data[i]; std::cout << ", diff[" << i << "]: " - << Tmix(fabs(runMean.GetVector()[i]) - fabs(runMean_ref.data[i])) + << TAcc(fabs(runMean.GetVector()[i]) - fabs(runMean_ref.data[i])) << std::endl; } } @@ -1211,13 +1213,13 @@ int BatchNormDriver::VerifyForward() i++) { diff = fabs( - Tmix(fabs(runVariance.GetVector()[i]) - fabs(runVariance_ref.data[i]))); + TAcc(fabs(runVariance.GetVector()[i]) - fabs(runVariance_ref.data[i]))); if(!std::isfinite(diff) || diff > tolerance) { std::cout << "rv[" << i << "]: " << runVariance.GetVector()[i]; std::cout << ", rv_host[" << i << "]: " << runVariance_ref.data[i]; std::cout << ", diff[" << i << "]: " - << Tmix(fabs(runVariance.GetVector()[i]) - + << TAcc(fabs(runVariance.GetVector()[i]) - fabs(runVariance_ref.data[i])) << std::endl; } @@ -1247,14 +1249,14 @@ int BatchNormDriver::VerifyForward() i < MIO_BN_MAX_DEBUGLOOP; i++) { - diff = fabs(Tmix(fabs(savedMean.GetVector()[i]) - fabs(savedMean_ref.data[i]))); + diff = fabs(TAcc(fabs(savedMean.GetVector()[i]) - fabs(savedMean_ref.data[i]))); maxval = maxval < diff ? diff : maxval; if(!std::isfinite(diff) || diff > tolerance) { std::cout << "sm[" << i << "]: " << savedMean.GetVector()[i]; std::cout << ", sm_host[" << i << "]: " << savedMean_ref.data[i]; std::cout << ", diff[" << i << "]: " - << Tmix(fabs(savedMean.GetVector()[i]) - + << TAcc(fabs(savedMean.GetVector()[i]) - fabs(savedMean_ref.data[i])) << std::endl; } @@ -1282,13 +1284,13 @@ int BatchNormDriver::VerifyForward() i++) { diff = fabs( - Tmix(fabs(savedVariance.GetVector()[i]) - fabs(savedVariance_ref.data[i]))); + TAcc(fabs(savedVariance.GetVector()[i]) - fabs(savedVariance_ref.data[i]))); if(!std::isfinite(diff) || diff > tolerance) { std::cout << "sv[" << i << "]: " << savedVariance.GetVector()[i]; std::cout << ", sv_host[" << i << "]: " << savedVariance_ref.data[i]; std::cout << ", diff[" << i << "]: " - << Tmix(fabs(savedVariance.GetVector()[i]) - + << TAcc(fabs(savedVariance.GetVector()[i]) - fabs(savedVariance_ref.data[i])) << std::endl; } @@ -1358,8 +1360,8 @@ int BatchNormDriver::VerifyForward() return miopenStatusSuccess; } -template -int BatchNormDriver::RunBackwardCPU() +template +int BatchNormDriver::RunBackwardCPU() { if(!back) @@ -1447,15 +1449,16 @@ int BatchNormDriver::RunBackwardCPU() return miopenStatusSuccess; } -template -int BatchNormDriver::VerifyBackward() +template +int BatchNormDriver::VerifyBackward() { if(!back) return miopenStatusSuccess; - const Tref maxrms = static_cast(((sizeof(Tgpu) == 4) ? RMSTOL_FP32 : RMSTOL_FP16) * 1000); - bool anError = false; + const Tref maxrms = + static_cast(((sizeof(TInput) == 4) ? RMSTOL_FP32 : RMSTOL_FP16) * 1000); + bool anError = false; RunBackwardCPU(); @@ -1465,7 +1468,7 @@ int BatchNormDriver::VerifyBackward() #if(MIO_BN_DEBUG == 1) const Tref tolerance = - static_cast(1000 * (sizeof(Tgpu) == 4) ? ERRTOL_FP32 : ERRTOL_FP16); + static_cast(1000 * (sizeof(TInput) == 4) ? ERRTOL_FP32 : ERRTOL_FP16); Tref diff = static_cast(0.0); #endif maxval = static_cast(0.0); @@ -1478,14 +1481,14 @@ int BatchNormDriver::VerifyBackward() #if(MIO_BN_DEBUG == 1) for(int i = 0; i < out_ref.data.size() && i < MIO_BN_MAX_DEBUGLOOP; i++) { - diff = fabs(Tgpu(fabs(out_ref.data[i]) - fabs(out_bwd.GetVector()[i]))); + diff = fabs(TInput(fabs(out_ref.data[i]) - fabs(out_bwd.GetVector()[i]))); maxval = maxval < diff ? diff : maxval; if(!std::isfinite(diff) || diff > tolerance) { std::cout << "out_ref[" << i << "]: " << out_ref.data[i]; std::cout << "\tout_bwd.GetVector()[" << i << "]: " << out_bwd.GetVector()[i]; std::cout << "\tdiff[" << i - << "]: " << Tgpu(fabs(out_ref.data[i]) - fabs(out_bwd.GetVector()[i])); + << "]: " << TInput(fabs(out_ref.data[i]) - fabs(out_bwd.GetVector()[i])); std::cout << "\tratioH: " << fabs(fabs(out_ref.data[i]) - fabs(out_bwd.GetVector()[i])) / fabs(out_bwd.GetVector()[i]) @@ -1511,14 +1514,14 @@ int BatchNormDriver::VerifyBackward() #if(MIO_BN_DEBUG == 1) for(int i = 0; i < dScale.GetVector().size() && i < MIO_BN_MAX_DEBUGLOOP; i++) { - auto diff = fabs(Tmix(fabs(dScale.GetVector()[i]) - fabs(dScale_ref.data[i]))); + auto diff = fabs(TAcc(fabs(dScale.GetVector()[i]) - fabs(dScale_ref.data[i]))); maxval = maxval < diff ? diff : maxval; if(!std::isfinite(diff) || diff > tolerance) { std::cout << "dscale[" << i << "]: " << dScale.GetVector()[i]; std::cout << "\tdscale_host[" << i << "]: " << dScale_ref.data[i]; std::cout << "\tdiff[" << i - << "]: " << Tmix(fabs(dScale.GetVector()[i]) - fabs(dScale_ref.data[i])); + << "]: " << TAcc(fabs(dScale.GetVector()[i]) - fabs(dScale_ref.data[i])); std::cout << "\tratioH: " << fabs(fabs(dScale.GetVector()[i]) - fabs(dScale_ref.data[i])) / fabs(dScale_ref.data[i]) @@ -1543,13 +1546,13 @@ int BatchNormDriver::VerifyBackward() #if(MIO_BN_DEBUG == 1) for(int i = 0; i < dBias.GetVector().size() && i < MIO_BN_MAX_DEBUGLOOP; i++) { - diff = fabs(Tmix(fabs(dBias.GetVector()[i]) - fabs(dBias_ref.data[i]))); + diff = fabs(TAcc(fabs(dBias.GetVector()[i]) - fabs(dBias_ref.data[i]))); if(!std::isfinite(diff) || diff > tolerance) { std::cout << "dbias[" << i << "]: " << dBias.GetVector()[i]; std::cout << "\tdbias_host[" << i << "]: " << dBias_ref.data[i]; std::cout << "\tdiff[" << i - << "]: " << Tmix(fabs(dBias.GetVector()[i]) - fabs(dBias_ref.data[i])); + << "]: " << TAcc(fabs(dBias.GetVector()[i]) - fabs(dBias_ref.data[i])); std::cout << "\tratioH: " << fabs(fabs(dBias.GetVector()[i]) - fabs(dBias_ref.data[i])) / fabs(dBias_ref.data[i]) diff --git a/test/gtest/bn_bwd.cpp b/test/gtest/bn_bwd.cpp index 164c36bb18..1a77050d81 100644 --- a/test/gtest/bn_bwd.cpp +++ b/test/gtest/bn_bwd.cpp @@ -36,12 +36,6 @@ struct GPU_BN_BWD_Large_FP16 { }; -// bf16 NHWC solver accepts is only on CK solver -// XDataType : bfloat16 -// YDataYype : bfloat16 -// ScaleDataType : bfloat16 -// BiasDataType : bfloat16 -// MeanVarDataType : float struct GPU_BN_BWD_Small_BFP16 : BNBwdTest { }; From 14f1a1f4f0670456d1a9722ce73abdf85a32ff2e Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Thu, 31 Oct 2024 03:25:01 +0000 Subject: [PATCH 03/13] fix review comments --- driver/bn_driver.hpp | 97 +++++++++++++++++++++----------------------- driver/dm_bnorm.cpp | 2 +- 2 files changed, 48 insertions(+), 51 deletions(-) diff --git a/driver/bn_driver.hpp b/driver/bn_driver.hpp index 1c9806b629..61197129ab 100644 --- a/driver/bn_driver.hpp +++ b/driver/bn_driver.hpp @@ -174,9 +174,8 @@ class BatchNormDriver : public Driver miopenTensorLayout_t bn_layout; }; -template -int BatchNormDriver::ParseCmdLineArgs(int argc, - char* argv[]) +template +int BatchNormDriver::ParseCmdLineArgs(int argc, char* argv[]) { inflags.Parse(argc, argv); @@ -188,8 +187,8 @@ int BatchNormDriver::ParseCmdLineArgs return miopenStatusSuccess; } -template -int BatchNormDriver::GetandSetData() +template +int BatchNormDriver::GetandSetData() { std::vector in_len = GetInputTensorLengthsFromCmdLine(); @@ -241,13 +240,13 @@ int BatchNormDriver::GetandSetData() } else if(isBwd) { - out_bwd.AllocOnHost(tensor{bn_layout, in_len}); + out_bwd.AllocOnHost(tensor{bn_layout, in_len}); bnScale.AllocOnHost(tensor{bn_layout, derivedBnDesc.GetLengths()}); - dy.AllocOnHost(tensor{bn_layout, in_len}); + dy.AllocOnHost(tensor{bn_layout, in_len}); auto gen_var_bwd = [](auto...) { - return static_cast(1e-2 * (prng::gen_0_to_B(100) + 1)); + return static_cast(1e-2 * (prng::gen_0_to_B(100) + 1)); }; dy.InitHostData(dy.GetTensor().desc.GetElementSize(), true, gen_var_bwd); @@ -258,11 +257,10 @@ int BatchNormDriver::GetandSetData() bnScale.InitHostData(bnScale.GetTensor().desc.GetElementSize(), true, gen_value); - savedMean.InitHostData(savedMean.GetTensor().desc.GetElementSize(), true, gen_var_bwd); - auto gen_in_var = [](auto...) { return static_cast(1e-2 * (prng::gen_0_to_B(100) + 1)); }; + savedMean.InitHostData(savedMean.GetTensor().desc.GetElementSize(), true, gen_in_var); savedInvVar.InitHostData(savedInvVar.GetTensor().desc.GetElementSize(), true, gen_in_var); } else @@ -274,8 +272,8 @@ int BatchNormDriver::GetandSetData() return miopenStatusSuccess; } -template -int BatchNormDriver::AddCmdLineArgs() +template +int BatchNormDriver::AddCmdLineArgs() { inflags.AddInputFlag( "forw", @@ -326,9 +324,9 @@ int BatchNormDriver::AddCmdLineArgs() return miopenStatusSuccess; } -template +template std::vector -BatchNormDriver::GetInputTensorLengthsFromCmdLine() +BatchNormDriver::GetInputTensorLengthsFromCmdLine() { int in_n = inflags.GetValueInt("batchsize"); int in_c = inflags.GetValueInt("in_channels"); @@ -350,8 +348,8 @@ BatchNormDriver::GetInputTensorLength } } -template -bool BatchNormDriver::ChkLayout_ShortName() +template +bool BatchNormDriver::ChkLayout_ShortName() { // check for short name of layout type if(inflags.FindShortName("layout") == 'L') @@ -367,8 +365,8 @@ bool BatchNormDriver::ChkLayout_Short } } -template -void BatchNormDriver::ValidateLayoutInputParameters( +template +void BatchNormDriver::ValidateLayoutInputParameters( std::string layout_value) { if(!ChkLayout_ShortName()) @@ -384,8 +382,8 @@ void BatchNormDriver::ValidateLayoutI } } -template -int BatchNormDriver::SetBNParametersFromCmdLineArgs() +template +int BatchNormDriver::SetBNParametersFromCmdLineArgs() { // double bnAlpha = inflags.GetValueDouble("alpha"); @@ -517,8 +515,8 @@ int BatchNormDriver::SetBNParametersF return miopenStatusSuccess; } -template -int BatchNormDriver::AllocateBuffersAndCopy() +template +int BatchNormDriver::AllocateBuffersAndCopy() { status_t status = STATUS_SUCCESS; DEFINE_CONTEXT(ctx); @@ -603,10 +601,10 @@ int BatchNormDriver::AllocateBuffersA return miopenStatusSuccess; } -template -void BatchNormDriver::runGPUFwdInference(Tref epsilon, - float alpha, - float beta) +template +void BatchNormDriver::runGPUFwdInference(Tref epsilon, + float alpha, + float beta) { if(keepRunningMeanVar) @@ -653,11 +651,11 @@ void BatchNormDriver::runGPUFwdInfere return; } -template -void BatchNormDriver::runGPUFwdTrain(Tref epsilon, - Tref eAF, - float alpha, - float beta) +template +void BatchNormDriver::runGPUFwdTrain(Tref epsilon, + Tref eAF, + float alpha, + float beta) { if(saveMeanVar && keepRunningMeanVar) { @@ -776,8 +774,8 @@ void BatchNormDriver::runGPUFwdTrain( #endif } -template -int BatchNormDriver::RunForwardGPU() +template +int BatchNormDriver::RunForwardGPU() { float alpha = static_cast(1), beta = static_cast(0); @@ -876,8 +874,8 @@ int BatchNormDriver::RunForwardGPU() return miopenStatusSuccess; } -template -void BatchNormDriver::runCPUFwdInference(Tref epsilon) +template +void BatchNormDriver::runCPUFwdInference(Tref epsilon) { int size{0}; miopenGetTensorDescriptorSize(&in.GetTensor().desc, &size); @@ -925,9 +923,8 @@ void BatchNormDriver::runCPUFwdInfere return; } -template -void BatchNormDriver::runCPUFwdTrain(Tref epsilon, - Tref eAF) +template +void BatchNormDriver::runCPUFwdTrain(Tref epsilon, Tref eAF) { int size{0}; miopenGetTensorDescriptorSize(&in.GetTensor().desc, &size); @@ -995,8 +992,8 @@ void BatchNormDriver::runCPUFwdTrain( } } -template -int BatchNormDriver::RunForwardCPU() +template +int BatchNormDriver::RunForwardCPU() { // T alpha = 0., beta = 0.; Tref epsilon = static_cast(EPSILON); @@ -1024,8 +1021,8 @@ int BatchNormDriver::RunForwardCPU() return miopenStatusSuccess; } -template -int BatchNormDriver::RunBackwardGPU() +template +int BatchNormDriver::RunBackwardGPU() { if(!back) return miopenStatusSuccess; @@ -1144,8 +1141,8 @@ int BatchNormDriver::RunBackwardGPU() return miopenStatusSuccess; } -template -int BatchNormDriver::VerifyForward() +template +int BatchNormDriver::VerifyForward() { // jump out since we are forcing forward off when doing backwards. @@ -1360,8 +1357,8 @@ int BatchNormDriver::VerifyForward() return miopenStatusSuccess; } -template -int BatchNormDriver::RunBackwardCPU() +template +int BatchNormDriver::RunBackwardCPU() { if(!back) @@ -1449,8 +1446,8 @@ int BatchNormDriver::RunBackwardCPU() return miopenStatusSuccess; } -template -int BatchNormDriver::VerifyBackward() +template +int BatchNormDriver::VerifyBackward() { if(!back) @@ -1481,14 +1478,14 @@ int BatchNormDriver::VerifyBackward() #if(MIO_BN_DEBUG == 1) for(int i = 0; i < out_ref.data.size() && i < MIO_BN_MAX_DEBUGLOOP; i++) { - diff = fabs(TInput(fabs(out_ref.data[i]) - fabs(out_bwd.GetVector()[i]))); + diff = fabs(TOut(fabs(out_ref.data[i]) - fabs(out_bwd.GetVector()[i]))); maxval = maxval < diff ? diff : maxval; if(!std::isfinite(diff) || diff > tolerance) { std::cout << "out_ref[" << i << "]: " << out_ref.data[i]; std::cout << "\tout_bwd.GetVector()[" << i << "]: " << out_bwd.GetVector()[i]; std::cout << "\tdiff[" << i - << "]: " << TInput(fabs(out_ref.data[i]) - fabs(out_bwd.GetVector()[i])); + << "]: " << TOut(fabs(out_ref.data[i]) - fabs(out_bwd.GetVector()[i])); std::cout << "\tratioH: " << fabs(fabs(out_ref.data[i]) - fabs(out_bwd.GetVector()[i])) / fabs(out_bwd.GetVector()[i]) diff --git a/driver/dm_bnorm.cpp b/driver/dm_bnorm.cpp index 6381a8a83c..0ee3da5d0a 100644 --- a/driver/dm_bnorm.cpp +++ b/driver/dm_bnorm.cpp @@ -30,7 +30,7 @@ static Driver* makeDriver(const std::string& base_arg) { if(base_arg == "bnorm") return new BatchNormDriver(); - // + // if(base_arg == "bnormfp16") return new BatchNormDriver(); if(base_arg == "bnormfp16fp32") From 6a5cc924adeef3e175f25a9e7dc743c3e06a8ea9 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Fri, 1 Nov 2024 01:32:47 +0000 Subject: [PATCH 04/13] fix isApplicable for CK and OCL bn spatial --- src/batchnorm/problem_description.cpp | 16 ++++---- .../miopen/batchnorm/problem_description.hpp | 29 ++++++++------ src/solver/batchnorm/backward_ck.cpp | 17 +++++++- .../batchnorm/backward_per_activation.cpp | 5 +-- .../batchnorm/backward_spatial_multiple.cpp | 10 ++--- .../batchnorm/backward_spatial_single.cpp | 39 +++++++++++++------ src/solver/batchnorm/forward_inference.cpp | 28 +++++++++---- src/solver/batchnorm/forward_inference_ck.cpp | 16 +++++++- .../batchnorm/forward_per_activation.cpp | 2 +- .../batchnorm/forward_spatial_multiple.cpp | 2 +- .../batchnorm/forward_spatial_single.cpp | 37 +++++++++++------- src/solver/batchnorm/forward_training_ck.cpp | 15 ++++++- 12 files changed, 149 insertions(+), 67 deletions(-) diff --git a/src/batchnorm/problem_description.cpp b/src/batchnorm/problem_description.cpp index ac63fdf73e..7eb2592c46 100644 --- a/src/batchnorm/problem_description.cpp +++ b/src/batchnorm/problem_description.cpp @@ -67,7 +67,7 @@ NetworkConfig ProblemDescription::MakeForwardTrainingNetworkConfig() const size_t ygridsize = 1; bool bfpmixparm = false; - if(xDesc.GetType() == miopenHalf && GetBnScaleBiasMeanVarDesc().GetType() == miopenFloat) + if(IsMix()) { bfpmixparm = true; } @@ -137,7 +137,7 @@ NetworkConfig ProblemDescription::MakeForwardTrainingNetworkConfig() const ss << "fp16" << static_cast(IsFp16()); ss << "fp32" << static_cast(IsFp32()); ss << "fp64" << static_cast(IsFp64()); - ss << "fbf16" << static_cast(IsBfp16()); + ss << "fbf16" << static_cast(IsBFp16()); ss << "fmix" << static_cast(IsMix()); ss << "c" << c; } @@ -154,7 +154,7 @@ NetworkConfig ProblemDescription::MakeForwardTrainingNetworkConfig() const ss << "fp16" << static_cast(IsFp16()); ss << "fp32" << static_cast(IsFp32()); ss << "fp64" << static_cast(IsFp64()); - ss << "fbf16" << static_cast(IsBfp16()); + ss << "fbf16" << static_cast(IsBFp16()); ss << "fmix" << static_cast(IsMix()); ss << "single" << static_cast(single); ss << "n" << n; @@ -173,7 +173,7 @@ NetworkConfig ProblemDescription::MakeForwardTrainingNetworkConfig() const ss << "fp16" << static_cast(IsFp16()); ss << "fp32" << static_cast(IsFp32()); ss << "fp64" << static_cast(IsFp64()); - ss << "fbf16" << static_cast(IsBfp16()); + ss << "fbf16" << static_cast(IsBFp16()); ss << "fmix" << static_cast(IsMix()); ss << "gx" << xgridsize; ss << "gy" << ygridsize; @@ -203,7 +203,7 @@ NetworkConfig ProblemDescription::MakeForwardInferenceNetworkConfig() const ss << "fp16" << static_cast(IsFp16()); ss << "fp32" << static_cast(IsFp32()); ss << "fp64" << static_cast(IsFp64()); - ss << "fbf16" << static_cast(IsBfp16()); + ss << "fbf16" << static_cast(IsBFp16()); ss << "fmix" << static_cast(IsMix()); ss << "mode" << bn_mode; ss << "HWdims" << in_cstride; @@ -218,7 +218,7 @@ NetworkConfig ProblemDescription::MakeBackwardNetworkConfig() const std::ostringstream ss; bool bfpmixparm = false; - if(xDesc.GetType() == miopenHalf && GetScaleBiasDiffDesc().GetType() == miopenFloat) + if(xDesc.GetType() == miopenHalf && GetBnScale().GetType() == miopenFloat) { bfpmixparm = true; } @@ -311,7 +311,7 @@ NetworkConfig ProblemDescription::MakeBackwardNetworkConfig() const ss << "fp16" << static_cast(IsFp16()); ss << "fp32" << static_cast(IsFp32()); ss << "fp64" << static_cast(IsFp64()); - ss << "fbf16" << static_cast(IsBfp16()); + ss << "fbf16" << static_cast(IsBFp16()); ss << "fmix" << static_cast(IsMix()); ss << "single" << static_cast(single); ss << "gcn" << ldsgcn; @@ -334,7 +334,7 @@ NetworkConfig ProblemDescription::MakeBackwardNetworkConfig() const ss << "fp16" << static_cast(IsFp16()); ss << "fp32" << static_cast(IsFp32()); ss << "fp64" << static_cast(IsFp64()); - ss << "fbf16" << static_cast(IsBfp16()); + ss << "fbf16" << static_cast(IsBFp16()); ss << "fmix" << static_cast(IsMix()); ss << "nhw" << in_nhw; } diff --git a/src/include/miopen/batchnorm/problem_description.hpp b/src/include/miopen/batchnorm/problem_description.hpp index b0ecb64b7e..a1ed9d2594 100644 --- a/src/include/miopen/batchnorm/problem_description.hpp +++ b/src/include/miopen/batchnorm/problem_description.hpp @@ -52,6 +52,16 @@ struct ProblemDescriptionTag { }; +inline bool is_fp16_or_bfp16(miopenDataType_t type) +{ + return ((type == miopenHalf) || (type == miopenBFloat16)); +} + +inline bool is_fp32_or_fp64(miopenDataType_t type) +{ + return ((type == miopenFloat) || (type == miopenDouble)); +} + struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, ProblemDescriptionTag #if MIOPEN_ENABLE_SQLITE @@ -173,13 +183,13 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, return dxDesc; } - const TensorDescriptor& GetBnScaleBiasMeanVarDesc() const - { - assert(direction == Direction::ForwardTraining || direction == Direction::ForwardInference); - return scaleDesc; - } + const TensorDescriptor& GetBnScale() const { return scaleDesc; } + + const TensorDescriptor& GetBnBias() const { return biasDesc; } + + const TensorDescriptor& GetBnSMean() const { return sMeanDesc; } - const TensorDescriptor& GetScaleBiasDiffDesc() const { return scaleDesc; } + const TensorDescriptor& GetBnSVar() const { return sVarianceDesc; } bool GetResultSave() const { @@ -233,11 +243,8 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, bool IsFp64() const { return xDesc.GetType() == miopenDouble; } bool IsFp32() const { return xDesc.GetType() == miopenFloat; } bool IsFp16() const { return xDesc.GetType() == miopenHalf; } - bool IsMix() const - { - return xDesc.GetType() == miopenHalf && sMeanDesc.GetType() == miopenFloat; - } - bool IsBfp16() const { return xDesc.GetType() == miopenBFloat16; } + bool IsBFp16() const { return xDesc.GetType() == miopenBFloat16; } + bool IsMix() const { return (IsFp16() || IsBFp16()) && sMeanDesc.GetType() == miopenFloat; } void Serialize(std::ostream& stream) const { stream << MakeNetworkConfig().ToString(); } diff --git a/src/solver/batchnorm/backward_ck.cpp b/src/solver/batchnorm/backward_ck.cpp index 198b046eff..fd2613f807 100644 --- a/src/solver/batchnorm/backward_ck.cpp +++ b/src/solver/batchnorm/backward_ck.cpp @@ -367,12 +367,25 @@ bool BnCKBwdBackward::IsApplicable( return false; if(bn_problem.GetDirection() != miopen::batchnorm::Direction::Backward) return false; - if(bn_problem.GetXDesc().GetType() != bn_problem.GetScaleBiasDiffDesc().GetType()) - return false; if(bn_problem.GetMode() != miopenBNSpatial) return false; if(!bn_problem.Is2D()) return false; + // case 1 : fp16 or bfp16 + if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + bn_problem.GetDXDesc().GetType() == miopenFloat && + ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetBnScale().GetType()) && + bn_problem.GetDYDesc().GetType() == miopenFloat && + bn_problem.GetBnSMean().GetType() == miopenFloat && + bn_problem.GetBnSVar().GetType() == miopenFloat) || + // case 1 : fp32 or fp64 + (::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSVar().GetType())))) + return false; switch(bn_problem.GetXDesc().GetType()) { diff --git a/src/solver/batchnorm/backward_per_activation.cpp b/src/solver/batchnorm/backward_per_activation.cpp index af52fbc339..9a02ad9f25 100644 --- a/src/solver/batchnorm/backward_per_activation.cpp +++ b/src/solver/batchnorm/backward_per_activation.cpp @@ -57,14 +57,13 @@ BnBwdTrainingPerActivation::GetSolution(const ExecutionContext& context, bool bfp16parm = false; bool bfp32parm = true; - if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetScaleBiasDiffDesc().GetType() == miopenHalf) + if(problem.GetXDesc().GetType() == miopenHalf && problem.GetBnScale().GetType() == miopenHalf) { bfp16parm = true; bfp32parm = false; } else if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetScaleBiasDiffDesc().GetType() == miopenFloat) + problem.GetBnScale().GetType() == miopenFloat) { bfpmixparm = true; bfp32parm = false; diff --git a/src/solver/batchnorm/backward_spatial_multiple.cpp b/src/solver/batchnorm/backward_spatial_multiple.cpp index 7fa9c0f89a..d9c5596ce2 100644 --- a/src/solver/batchnorm/backward_spatial_multiple.cpp +++ b/src/solver/batchnorm/backward_spatial_multiple.cpp @@ -50,8 +50,7 @@ bool BnBwdTrainingSpatialMultiple::IsApplicable( } #if WORKAROUND_ISSUE_1549_FP16_BUILD_ERROR - if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetScaleBiasDiffDesc().GetType() == miopenHalf) + if(problem.GetXDesc().GetType() == miopenHalf && problem.GetBnScale().GetType() == miopenHalf) { // bfp16parm = true; // Unsupported kernel mode, error in kernel code @@ -72,14 +71,13 @@ ConvSolution BnBwdTrainingSpatialMultiple::GetSolution( bool bfp16parm = false; bool bfp32parm = true; - if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetScaleBiasDiffDesc().GetType() == miopenHalf) + if(problem.GetXDesc().GetType() == miopenHalf && problem.GetBnScale().GetType() == miopenHalf) { bfp16parm = true; bfp32parm = false; } else if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetScaleBiasDiffDesc().GetType() == miopenFloat) + problem.GetBnScale().GetType() == miopenFloat) { bfpmixparm = true; bfp32parm = false; @@ -274,7 +272,7 @@ ConvSolution BnBwdTrainingSpatialMultiple::GetSolution( } } - const auto dtype = problem.GetScaleBiasDiffDesc().GetType(); + const auto dtype = problem.GetBnScale().GetType(); const auto useSaved = problem.UseSaved(); result.invoker_factory = [=](const std::vector& kernels) { diff --git a/src/solver/batchnorm/backward_spatial_single.cpp b/src/solver/batchnorm/backward_spatial_single.cpp index 86fa5a68c7..c8d8681488 100644 --- a/src/solver/batchnorm/backward_spatial_single.cpp +++ b/src/solver/batchnorm/backward_spatial_single.cpp @@ -40,17 +40,17 @@ namespace solver { namespace batchnorm { bool BnBwdTrainingSpatialSingle::IsApplicable( - const ExecutionContext&, const miopen::batchnorm::ProblemDescription& problem) const + const ExecutionContext&, const miopen::batchnorm::ProblemDescription& bn_problem) const { - if(problem.GetDirection() != miopen::batchnorm::Direction::Backward || - problem.GetMode() != miopenBNSpatial) + if(bn_problem.GetDirection() != miopen::batchnorm::Direction::Backward || + bn_problem.GetMode() != miopenBNSpatial) return false; - if(!problem.Is2D()) + if(!bn_problem.Is2D()) return false; #if WORKAROUND_ISSUE_1549_FP16_BUILD_ERROR - if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetScaleBiasDiffDesc().GetType() == miopenHalf) + if(bn_problem.GetXDesc().GetType() == miopenHalf && + bn_problem.GetBnScale().GetType() == miopenHalf) { // bfp16parm = true; // Unsupported kernel mode, error in kernel code @@ -59,11 +59,27 @@ bool BnBwdTrainingSpatialSingle::IsApplicable( } #endif - if(problem.IsLayoutNHWC()) + if(bn_problem.IsLayoutNHWC()) return true; + // case 1 : fp16 or bfp16 + if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetDXDesc().GetType()) && + ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetDYDesc().GetType()) && + bn_problem.GetBnScale().GetType() == miopenFloat && + bn_problem.GetBnSMean().GetType() == miopenFloat && + bn_problem.GetBnSVar().GetType() == miopenFloat) || + // case 1 : fp32 or fp64 + (::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSVar().GetType())))) + return false; + int n, c, h, w; - std::tie(n, c, h, w) = tien<4>(problem.GetXDesc().GetLengths()); + std::tie(n, c, h, w) = tien<4>(bn_problem.GetXDesc().GetLengths()); unsigned int in_cstride = h * w; unsigned int in_nhw = n * in_cstride; @@ -83,14 +99,13 @@ BnBwdTrainingSpatialSingle::GetSolution(const ExecutionContext& context, bool bfp16parm = false; bool bfp32parm = true; - if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetScaleBiasDiffDesc().GetType() == miopenHalf) + if(problem.GetXDesc().GetType() == miopenHalf && problem.GetBnScale().GetType() == miopenHalf) { bfp16parm = true; bfp32parm = false; } else if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetScaleBiasDiffDesc().GetType() == miopenFloat) + problem.GetBnScale().GetType() == miopenFloat) { bfpmixparm = true; bfp32parm = false; @@ -278,7 +293,7 @@ BnBwdTrainingSpatialSingle::GetSolution(const ExecutionContext& context, result.construction_params.push_back(kernel); } - const auto dtype = problem.GetScaleBiasDiffDesc().GetType(); + const auto dtype = problem.GetBnScale().GetType(); const auto useSaved = problem.UseSaved(); result.invoker_factory = [=](const std::vector& kernels) { diff --git a/src/solver/batchnorm/forward_inference.cpp b/src/solver/batchnorm/forward_inference.cpp index 2f945c46a9..f62c161092 100644 --- a/src/solver/batchnorm/forward_inference.cpp +++ b/src/solver/batchnorm/forward_inference.cpp @@ -39,16 +39,29 @@ namespace solver { namespace batchnorm { bool BnFwdInference::IsApplicable(const ExecutionContext&, - const miopen::batchnorm::ProblemDescription& problem) const + const miopen::batchnorm::ProblemDescription& bn_problem) const { - if(problem.IsLayoutNHWC()) + if(bn_problem.IsLayoutNHWC()) return false; - if(problem.GetDirection() != miopen::batchnorm::Direction::ForwardInference) + if(bn_problem.GetDirection() != miopen::batchnorm::Direction::ForwardInference) return false; - if(!(problem.IsFp32() or problem.IsFp16())) + if(!(bn_problem.IsFp32() or bn_problem.IsFp16())) return false; - if(!problem.Is2D()) + if(!bn_problem.Is2D()) return false; + + // case 1 : mix type + if(!((bn_problem.GetXDesc().GetType() == miopenHalf && + bn_problem.GetYDesc().GetType() == miopenHalf && + bn_problem.GetBnScale().GetType() == miopenFloat && + bn_problem.GetBnBias().GetType() == miopenFloat) || + // case 2 : float type + (bn_problem.GetXDesc().GetType() == miopenFloat && + bn_problem.GetYDesc().GetType() == miopenFloat && + bn_problem.GetBnScale().GetType() == miopenFloat && + bn_problem.GetBnBias().GetType() == miopenFloat))) + return false; + return true; } @@ -60,14 +73,13 @@ ConvSolution BnFwdInference::GetSolution(const ExecutionContext& context, bool bfpmixparm = false; bool bfp16parm = false; bool bfp32parm = true; - if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetBnScaleBiasMeanVarDesc().GetType() == miopenHalf) + if(problem.GetXDesc().GetType() == miopenHalf && problem.GetBnScale().GetType() == miopenHalf) { bfp16parm = true; bfp32parm = false; } else if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetBnScaleBiasMeanVarDesc().GetType() == miopenFloat) + problem.GetBnScale().GetType() == miopenFloat) { bfpmixparm = true; bfp32parm = false; diff --git a/src/solver/batchnorm/forward_inference_ck.cpp b/src/solver/batchnorm/forward_inference_ck.cpp index 529065d2ae..c4fc3b00ef 100644 --- a/src/solver/batchnorm/forward_inference_ck.cpp +++ b/src/solver/batchnorm/forward_inference_ck.cpp @@ -334,7 +334,21 @@ bool BnCKFwdInference::IsApplicable( return false; if(bn_problem.GetMode() != miopenBNSpatial) return false; - if(bn_problem.GetXDesc().GetType() != bn_problem.GetScaleBiasDiffDesc().GetType()) + + // case 1 : fp16 or bfp16 + if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && + ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetBnScale().GetType()) && + ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetBnBias().GetType()) && + bn_problem.GetBnSMean().GetType() == miopenFloat && + bn_problem.GetBnSVar().GetType() == miopenFloat) || + // case 1 : fp32 or fp64 + (::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSVar().GetType())))) return false; switch(bn_problem.GetXDesc().GetType()) diff --git a/src/solver/batchnorm/forward_per_activation.cpp b/src/solver/batchnorm/forward_per_activation.cpp index 0e4d6da799..1f653e9b7d 100644 --- a/src/solver/batchnorm/forward_per_activation.cpp +++ b/src/solver/batchnorm/forward_per_activation.cpp @@ -61,7 +61,7 @@ BnFwdTrainingPerActivation::GetSolution(const ExecutionContext& context, { decltype(auto) handle = context.GetStream(); - decltype(auto) bnScaleBiasMeanVarDesc = problem.GetBnScaleBiasMeanVarDesc(); + decltype(auto) bnScaleBiasMeanVarDesc = problem.GetBnScale(); unsigned int in_nhw = n * in_cstride; unsigned int in_nchw = n * in_nstride; diff --git a/src/solver/batchnorm/forward_spatial_multiple.cpp b/src/solver/batchnorm/forward_spatial_multiple.cpp index 9f85c8d7ab..f8bde6be6a 100644 --- a/src/solver/batchnorm/forward_spatial_multiple.cpp +++ b/src/solver/batchnorm/forward_spatial_multiple.cpp @@ -55,7 +55,7 @@ ConvSolution BnFwdTrainingSpatialMultiple::GetSolution( { const auto& handle = context.GetStream(); const auto& xDesc = problem.GetXDesc(); - const auto& bnScaleBiasMeanVarDesc = problem.GetBnScaleBiasMeanVarDesc(); + const auto& bnScaleBiasMeanVarDesc = problem.GetBnScale(); int n, c, h, w; std::tie(n, c, h, w) = tien<4>(xDesc.GetLengths()); diff --git a/src/solver/batchnorm/forward_spatial_single.cpp b/src/solver/batchnorm/forward_spatial_single.cpp index 732181073a..fd30c8ffba 100644 --- a/src/solver/batchnorm/forward_spatial_single.cpp +++ b/src/solver/batchnorm/forward_spatial_single.cpp @@ -40,17 +40,29 @@ namespace solver { namespace batchnorm { bool BnFwdTrainingSpatialSingle::IsApplicable( - const ExecutionContext&, const miopen::batchnorm::ProblemDescription& problem) const + const ExecutionContext&, const miopen::batchnorm::ProblemDescription& bn_problem) const { - if(problem.GetDirection() != miopen::batchnorm::Direction::ForwardTraining || - problem.GetMode() != miopenBNSpatial) + if(bn_problem.GetDirection() != miopen::batchnorm::Direction::ForwardTraining || + bn_problem.GetMode() != miopenBNSpatial) return false; - if(problem.IsLayoutNHWC()) + if(bn_problem.IsLayoutNHWC()) return true; + // case 1 : mix type + if(!((bn_problem.GetXDesc().GetType() == miopenHalf && + bn_problem.GetYDesc().GetType() == miopenHalf && + bn_problem.GetBnScale().GetType() == miopenFloat && + bn_problem.GetBnBias().GetType() == miopenFloat) || + // case 2 : float type + (bn_problem.GetXDesc().GetType() == miopenFloat && + bn_problem.GetYDesc().GetType() == miopenFloat && + bn_problem.GetBnScale().GetType() == miopenFloat && + bn_problem.GetBnBias().GetType() == miopenFloat))) + return false; + int n, c, h, w; - std::tie(n, c, h, w) = tien<4>(problem.GetXDesc().GetLengths()); + std::tie(n, c, h, w) = tien<4>(bn_problem.GetXDesc().GetLengths()); unsigned int in_cstride = h * w; unsigned int in_nhw = n * in_cstride; @@ -58,13 +70,13 @@ bool BnFwdTrainingSpatialSingle::IsApplicable( bool bfpmixparm = false; bool bfp32parm = true; - if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetBnScaleBiasMeanVarDesc().GetType() == miopenHalf) + if(bn_problem.GetXDesc().GetType() == miopenHalf && + bn_problem.GetBnScale().GetType() == miopenHalf) { bfp32parm = false; } - else if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetBnScaleBiasMeanVarDesc().GetType() == miopenFloat) + else if(bn_problem.GetXDesc().GetType() == miopenHalf && + bn_problem.GetBnScale().GetType() == miopenFloat) { bfpmixparm = true; bfp32parm = false; @@ -97,14 +109,13 @@ BnFwdTrainingSpatialSingle::GetSolution(const ExecutionContext& context, bool bfp16parm = false; bool bfp32parm = true; - if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetBnScaleBiasMeanVarDesc().GetType() == miopenHalf) + if(problem.GetXDesc().GetType() == miopenHalf && problem.GetBnScale().GetType() == miopenHalf) { bfp16parm = true; bfp32parm = false; } else if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetBnScaleBiasMeanVarDesc().GetType() == miopenFloat) + problem.GetBnScale().GetType() == miopenFloat) { bfpmixparm = true; bfp32parm = false; @@ -238,7 +249,7 @@ BnFwdTrainingSpatialSingle::GetSolution(const ExecutionContext& context, result.construction_params.push_back(kernel); } - const auto dtype = problem.GetBnScaleBiasMeanVarDesc().GetType(); + const auto dtype = problem.GetBnScale().GetType(); const auto vn4 = (variant != 4); result.invoker_factory = [=](const std::vector& kernels) { diff --git a/src/solver/batchnorm/forward_training_ck.cpp b/src/solver/batchnorm/forward_training_ck.cpp index fec919cf79..62c6a7932f 100644 --- a/src/solver/batchnorm/forward_training_ck.cpp +++ b/src/solver/batchnorm/forward_training_ck.cpp @@ -361,7 +361,20 @@ bool BnCKFwdTraining::IsApplicable( return false; if(bn_problem.GetMode() != miopenBNSpatial) return false; - if(bn_problem.GetXDesc().GetType() != bn_problem.GetScaleBiasDiffDesc().GetType()) + // case 1 : fp16 or bfp16 + if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && + ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetBnScale().GetType()) && + ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetBnBias().GetType()) && + bn_problem.GetBnSMean().GetType() == miopenFloat && + bn_problem.GetBnSVar().GetType() == miopenFloat) || + // case 1 : fp32 or fp64 + (::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && + ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSVar().GetType())))) return false; switch(bn_problem.GetXDesc().GetType()) From 0242120b60af764fd6acfea1f6814d1d34dedcdc Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Sat, 2 Nov 2024 03:30:28 +0000 Subject: [PATCH 05/13] fix gtest issue in bn for ck and ocl --- src/batch_norm_api.cpp | 41 ++++-- src/driver_arguments.cpp | 119 +++++++++++++++--- src/fusion.cpp | 21 ++-- src/include/miopen/driver_arguments.hpp | 5 +- .../batchnorm/backward_spatial_single.cpp | 3 - src/solver/batchnorm/forward_inference.cpp | 7 +- .../batchnorm/forward_spatial_single.cpp | 7 +- test/fusionHost.hpp | 3 +- test/gtest/bn.hpp | 25 ++-- test/gtest/bn_bwd.cpp | 52 ++++---- test/gtest/bn_fwd_train.cpp | 62 ++++----- test/gtest/bn_infer.cpp | 58 ++++----- test/gtest/test_operations.hpp | 9 +- 13 files changed, 259 insertions(+), 153 deletions(-) diff --git a/src/batch_norm_api.cpp b/src/batch_norm_api.cpp index d3b824cee0..59828ebdb5 100644 --- a/src/batch_norm_api.cpp +++ b/src/batch_norm_api.cpp @@ -50,7 +50,10 @@ namespace miopen { namespace debug { void LogCmdBNorm(const miopenTensorDescriptor_t xDesc, - const miopenTensorDescriptor_t sMeanDesc, + const miopenTensorDescriptor_t yDesc, + const miopenTensorDescriptor_t scaleDesc, + const miopenTensorDescriptor_t biasDesc, + const miopenTensorDescriptor_t saveMeanDesc, miopenBatchNormMode_t bn_mode, const void* resultRunningMean, const void* resultRunningVariance, @@ -61,7 +64,10 @@ void LogCmdBNorm(const miopenTensorDescriptor_t xDesc, if(miopen::IsLoggingCmd()) { const std::string& str = BnormArgsForMIOpenDriver(xDesc, - sMeanDesc, + yDesc, + scaleDesc, + biasDesc, + saveMeanDesc, bn_mode, resultRunningMean, resultRunningVariance, @@ -206,7 +212,7 @@ miopenBatchNormalizationForwardInference_V2(miopenHandle_t handle, const miopenTensorDescriptor_t yDesc, void* y, const miopenTensorDescriptor_t scaleDesc, - const miopenTensorDescriptor_t BiasDesc, + const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t estMeanDesc, const miopenTensorDescriptor_t estVarianceDesc, void* bnScale, @@ -222,7 +228,7 @@ miopenBatchNormalizationForwardInference_V2(miopenHandle_t handle, yDesc, y, scaleDesc, - BiasDesc, + biasDesc, estMeanDesc, estVarianceDesc, bnScale, @@ -232,12 +238,15 @@ miopenBatchNormalizationForwardInference_V2(miopenHandle_t handle, epsilon); miopen::debug::LogCmdBNorm(xDesc, + yDesc, + scaleDesc, + biasDesc, estMeanDesc, bn_mode, - estimatedMean, - estimatedVariance, nullptr, nullptr, + estMeanDesc, + estimatedVariance, miopen::debug::BatchNormDirection_t::ForwardInference); // In case of NxCxDxHxW @@ -256,7 +265,7 @@ miopenBatchNormalizationForwardInference_V2(miopenHandle_t handle, : miopen::deref(yDesc), DataCast(y), miopen::deref(scaleDesc), - miopen::deref(BiasDesc), + miopen::deref(biasDesc), miopen::deref(estMeanDesc), miopen::deref(estVarianceDesc), DataCast(bnScale), @@ -277,7 +286,7 @@ miopenBatchNormalizationForwardTraining_V2(miopenHandle_t handle, const miopenTensorDescriptor_t yDesc, void* y, const miopenTensorDescriptor_t scaleDesc, - const miopenTensorDescriptor_t BiasDesc, + const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarianceDesc, void* bnScale, @@ -296,7 +305,7 @@ miopenBatchNormalizationForwardTraining_V2(miopenHandle_t handle, yDesc, y, scaleDesc, - BiasDesc, + biasDesc, savedMeanDesc, savedVarianceDesc, bnScale, @@ -309,6 +318,9 @@ miopenBatchNormalizationForwardTraining_V2(miopenHandle_t handle, resultSaveInvVariance); miopen::debug::LogCmdBNorm(xDesc, + yDesc, + scaleDesc, + biasDesc, savedMeanDesc, bn_mode, resultRunningMean, @@ -332,7 +344,7 @@ miopenBatchNormalizationForwardTraining_V2(miopenHandle_t handle, : miopen::deref(yDesc), DataCast(y), miopen::deref(scaleDesc), - miopen::deref(BiasDesc), + miopen::deref(biasDesc), miopen::deref(savedMeanDesc), miopen::deref(savedVarianceDesc), DataCast(bnScale), @@ -360,7 +372,7 @@ miopenBatchNormalizationBackward_V2(miopenHandle_t handle, const miopenTensorDescriptor_t dxDesc, void* dx, const miopenTensorDescriptor_t scaleDesc, - const miopenTensorDescriptor_t BiasDesc, + const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarianceDesc, const void* bnScale, @@ -379,7 +391,7 @@ miopenBatchNormalizationBackward_V2(miopenHandle_t handle, dxDesc, dx, scaleDesc, - BiasDesc, + biasDesc, savedMeanDesc, savedVarianceDesc, bnScale, @@ -389,6 +401,9 @@ miopenBatchNormalizationBackward_V2(miopenHandle_t handle, savedMean, savedInvVariance); miopen::debug::LogCmdBNorm(xDesc, + dyDesc, + scaleDesc, + biasDesc, savedMeanDesc, bn_mode, nullptr, @@ -417,7 +432,7 @@ miopenBatchNormalizationBackward_V2(miopenHandle_t handle, : miopen::deref(dxDesc), DataCast(dx), miopen::deref(scaleDesc), - miopen::deref(BiasDesc), + miopen::deref(biasDesc), miopen::deref(savedMeanDesc), miopen::deref(savedVarianceDesc), DataCast(bnScale), diff --git a/src/driver_arguments.cpp b/src/driver_arguments.cpp index e75ec31902..b7970d9833 100644 --- a/src/driver_arguments.cpp +++ b/src/driver_arguments.cpp @@ -64,27 +64,105 @@ void ConvDataType(std::stringstream& ss, const miopen::TensorDescriptor& desc) // We choose scaleMean because its a accumulator type. void BnDataType(std::stringstream& ss, const miopen::TensorDescriptor& xDesc, - const miopen::TensorDescriptor& sMeanDesc) + const miopen::TensorDescriptor& yDesc, + const miopen::TensorDescriptor& scaleDesc, + const miopen::TensorDescriptor& biasDesc, + const miopen::TensorDescriptor& sMeanDesc, + const BatchNormDirection_t bn_mode) { - if(xDesc.GetType() == miopenHalf && sMeanDesc.GetType() == miopenHalf) + if(bn_mode == BatchNormDirection_t::ForwardInference || + bn_mode == BatchNormDirection_t::ForwardTraining) { - ss << "bnormfp16"; - } - else if(xDesc.GetType() == miopenBFloat16 && sMeanDesc.GetType() == miopenBFloat16) - { - ss << "bnormbfp16"; - } - else if(xDesc.GetType() == miopenHalf && sMeanDesc.GetType() == miopenFloat) - { - ss << "bnormfp16fp32"; + if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf && + scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat && + sMeanDesc.GetType() == miopenFloat) + { + ss << "bnormfp16"; + } + else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenBFloat16 && + scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat && + sMeanDesc.GetType() == miopenFloat) + { + ss << "bnormbfp16"; + } + else if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf && + scaleDesc.GetType() == miopenHalf && biasDesc.GetType() == miopenHalf && + sMeanDesc.GetType() == miopenFloat) + { + ss << "bnormfp16fp32"; + } + else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenBFloat16 && + scaleDesc.GetType() == miopenBFloat16 && biasDesc.GetType() == miopenBFloat16 && + sMeanDesc.GetType() == miopenFloat) + { + ss << "bnormbfp16fp32"; + } + else + { + ss << "bnorm"; + } } - else if(xDesc.GetType() == miopenBFloat16 && sMeanDesc.GetType() == miopenFloat) + else if(bn_mode == BatchNormDirection_t::ForwardTraining) { - ss << "bnormbfp16fp32"; + if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf && + scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat && + sMeanDesc.GetType() == miopenFloat) + { + ss << "bnormfp16"; + } + else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenBFloat16 && + scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat && + sMeanDesc.GetType() == miopenFloat) + { + ss << "bnormbfp16"; + } + else if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf && + scaleDesc.GetType() == miopenHalf && biasDesc.GetType() == miopenHalf && + sMeanDesc.GetType() == miopenFloat) + { + ss << "bnormfp16fp32"; + } + else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenBFloat16 && + scaleDesc.GetType() == miopenBFloat16 && biasDesc.GetType() == miopenBFloat16 && + sMeanDesc.GetType() == miopenFloat) + { + ss << "bnormbfp16fp32"; + } + else + { + ss << "bnorm"; + } } - else + else if(bn_mode == BatchNormDirection_t::Backward) { - ss << "bnorm"; + if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf && + scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat && + sMeanDesc.GetType() == miopenFloat) + { + ss << "bnormfp16"; + } + else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenBFloat16 && + scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat && + sMeanDesc.GetType() == miopenFloat) + { + ss << "bnormbfp16"; + } + else if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenFloat && + scaleDesc.GetType() == miopenHalf && biasDesc.GetType() == miopenFloat && + sMeanDesc.GetType() == miopenFloat) + { + ss << "bnormfp16fp32"; + } + else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenFloat && + scaleDesc.GetType() == miopenBFloat16 && biasDesc.GetType() == miopenFloat && + sMeanDesc.GetType() == miopenFloat) + { + ss << "bnormbfp16fp32"; + } + else + { + ss << "bnorm"; + } } } @@ -228,6 +306,9 @@ std::string ConvArgsForMIOpenDriver(const miopen::TensorDescriptor& xDesc, } std::string BnormArgsForMIOpenDriver(const miopenTensorDescriptor_t xDesc, + const miopenTensorDescriptor_t yDesc, + const miopenTensorDescriptor_t scaleDesc, + const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t sMeanDesc, miopenBatchNormMode_t bn_mode, const void* resultRunningMean, @@ -241,7 +322,13 @@ std::string BnormArgsForMIOpenDriver(const miopenTensorDescriptor_t xDesc, miopenGetTensorDescriptorSize(xDesc, &size); std::stringstream ss; if(print_for_bn_driver) - BnDataType(ss, miopen::deref(xDesc), miopen::deref(sMeanDesc)); + BnDataType(ss, + miopen::deref(xDesc), + miopen::deref(yDesc), + miopen::deref(scaleDesc), + miopen::deref(biasDesc), + miopen::deref(sMeanDesc), + dir); ss << " -n " << miopen::deref(xDesc).GetLengths()[0] // clang-format off << " -c " << miopen::deref(xDesc).GetLengths()[1]; diff --git a/src/fusion.cpp b/src/fusion.cpp index e536f6a1a1..3c52f6c2a0 100644 --- a/src/fusion.cpp +++ b/src/fusion.cpp @@ -389,15 +389,18 @@ std::string LogCmdBnormFusion(const miopenFusionPlanDescriptor_t fusePlanDesc, i if(bn_op != nullptr) { - str += BnormArgsForMIOpenDriver(&bn_op->input_desc, - &bn_op->base_desc, - bn_op->mode, - nullptr, - nullptr, - nullptr, - nullptr, - miopen::debug::BatchNormDirection_t::ForwardInference, - false); + // str += BnormArgsForMIOpenDriver(&bn_op->input_desc, + // &bn_op->base_desc, + // nullptr, + // nullptr, + // nullptr, + // bn_op->mode, + // nullptr, + // nullptr, + // nullptr, + // nullptr, + // miopen::debug::BatchNormDirection_t::ForwardInference, + // false); } else { diff --git a/src/include/miopen/driver_arguments.hpp b/src/include/miopen/driver_arguments.hpp index a964e7fe27..f28a772e3c 100644 --- a/src/include/miopen/driver_arguments.hpp +++ b/src/include/miopen/driver_arguments.hpp @@ -67,7 +67,10 @@ std::string ConvArgsForMIOpenDriver(const miopen::TensorDescriptor& xDesc, bool print_for_conv_driver = true); std::string BnormArgsForMIOpenDriver(miopenTensorDescriptor_t xDesc, - miopenTensorDescriptor_t sMeanDesc, + miopenTensorDescriptor_t yDesc, + miopenTensorDescriptor_t scaleDesc, + miopenTensorDescriptor_t biasDesc, + miopenTensorDescriptor_t saveMeanDesc, miopenBatchNormMode_t bn_mode, const void* resultRunningMean, const void* resultRunningVariance, diff --git a/src/solver/batchnorm/backward_spatial_single.cpp b/src/solver/batchnorm/backward_spatial_single.cpp index ef569e18a7..3e4bb8bf78 100644 --- a/src/solver/batchnorm/backward_spatial_single.cpp +++ b/src/solver/batchnorm/backward_spatial_single.cpp @@ -59,9 +59,6 @@ bool BnBwdTrainingSpatialSingle::IsApplicable( } #endif - if(bn_problem.IsLayoutNHWC()) - return true; - // case 1 : fp16 or bfp16 if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetDXDesc().GetType()) && diff --git a/src/solver/batchnorm/forward_inference.cpp b/src/solver/batchnorm/forward_inference.cpp index 1fc2139285..8d08ece535 100644 --- a/src/solver/batchnorm/forward_inference.cpp +++ b/src/solver/batchnorm/forward_inference.cpp @@ -51,8 +51,8 @@ bool BnFwdInference::IsApplicable(const ExecutionContext&, return false; // case 1 : mix type - if(!((bn_problem.GetXDesc().GetType() == miopenHalf && - bn_problem.GetYDesc().GetType() == miopenHalf && + if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && bn_problem.GetBnScale().GetType() == miopenFloat && bn_problem.GetBnBias().GetType() == miopenFloat) || // case 2 : float type @@ -74,8 +74,7 @@ ConvSolution BnFwdInference::GetSolution(const ExecutionContext& context, bool bbfpmixparam = false; bool bfp16parm = false; bool bfp32parm = true; - if(problem.GetXDesc().GetType() == miopenHalf && - problem.GetBnScale().GetType() == miopenHalf) + if(problem.GetXDesc().GetType() == miopenHalf && problem.GetBnScale().GetType() == miopenHalf) { bfp16parm = true; bfp32parm = false; diff --git a/src/solver/batchnorm/forward_spatial_single.cpp b/src/solver/batchnorm/forward_spatial_single.cpp index 3dc1ae7707..91f5c66a03 100644 --- a/src/solver/batchnorm/forward_spatial_single.cpp +++ b/src/solver/batchnorm/forward_spatial_single.cpp @@ -46,12 +46,9 @@ bool BnFwdTrainingSpatialSingle::IsApplicable( bn_problem.GetMode() != miopenBNSpatial) return false; - if(bn_problem.IsLayoutNHWC()) - return true; - // case 1 : mix type - if(!((bn_problem.GetXDesc().GetType() == miopenHalf && - bn_problem.GetYDesc().GetType() == miopenHalf && + if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && bn_problem.GetBnScale().GetType() == miopenFloat && bn_problem.GetBnBias().GetType() == miopenFloat) || // case 2 : float type diff --git a/test/fusionHost.hpp b/test/fusionHost.hpp index d525b79cf6..6fbc428b1a 100644 --- a/test/fusionHost.hpp +++ b/test/fusionHost.hpp @@ -285,13 +285,14 @@ void batchNormSpatialHostFwdTrain(const tensor& input, } template void batchNormSpatialHostBwdTrain(const tensor& x_input, const tensor& dy_input, - tensor& dx_out, + tensor& dx_out, const tensor& bnScale, tensor& dscale, tensor& dbias, diff --git a/test/gtest/bn.hpp b/test/gtest/bn.hpp index a195b6b81e..1021952b54 100644 --- a/test/gtest/bn.hpp +++ b/test/gtest/bn.hpp @@ -263,22 +263,18 @@ struct BNBwdTest auto&& handle = get_handle(); bn_bwd_test_data.output.data = handle.Read(bn_bwd_test_data.out_dev, bn_bwd_test_data.output.data.size()); - bn_bwd_test_data.dScale.data = handle.Read(bn_bwd_test_data.dScale_dev, - bn_bwd_test_data.dScale.data.size()); - bn_bwd_test_data.dBias.data = - handle.Read(bn_bwd_test_data.dBias_dev, bn_bwd_test_data.dBias.data.size()); + bn_bwd_test_data.dScale.data = handle.Read( + bn_bwd_test_data.dScale_dev, bn_bwd_test_data.dScale.data.size()); + bn_bwd_test_data.dBias.data = handle.Read( + bn_bwd_test_data.dBias_dev, bn_bwd_test_data.dBias.data.size()); - test::ComputeCPUBNBwd(bn_bwd_test_data); + test::ComputeCPUBNBwd(bn_bwd_test_data); - test::CompareTensor(bn_bwd_test_data.output, bn_bwd_test_data.ref_out, 5e-4); - test::CompareTensor(bn_bwd_test_data.dScale, bn_bwd_test_data.dScale_ref, 5e-4); - test::CompareTensor(bn_bwd_test_data.dBias, bn_bwd_test_data.dBias_ref, 5e-4); + test::CompareTensor(bn_bwd_test_data.output, bn_bwd_test_data.ref_out, bwd_tol); + test::CompareTensor( + bn_bwd_test_data.dScale, bn_bwd_test_data.dScale_ref, bwd_tol); + test::CompareTensor( + bn_bwd_test_data.dBias, bn_bwd_test_data.dBias_ref, bwd_tol); } BNTestCase bn_config; @@ -294,6 +290,7 @@ struct BNBwdTest bn_bwd_test_data; miopenTensorLayout_t tensor_layout; BNApiType api_type; + double bwd_tol = 4e-3; }; template */ + +struct GPU_BN_CK_BWD_Large_FP16 : BNBwdTest { }; -struct GPU_BN_BWD_Large_FP16 - : BNBwdTest +struct GPU_BN_OCL_BWD_Large_FP16 + : BNBwdTest { }; -struct GPU_BN_BWD_Small_BFP16 : BNBwdTest +struct GPU_BN_CK_BWD_Large_BFP16 : BNBwdTest { }; -struct GPU_BN_BWD_Large_BFP16 : BNBwdTest +struct GPU_BN_OCL_BWD_Large_BFP16 + : BNBwdTest { }; @@ -61,12 +69,12 @@ struct GPU_BN_BWD_Large_FP64 : BNBwdTest()), + GPU_BN_CK_BWD_Large_FP16, + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV1})), + testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_BWD_Large_FP16, + GPU_BN_OCL_BWD_Large_FP16, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({miopenTensorNCHW}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); // bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_BWD_Small_BFP16, - testing::Combine(testing::ValuesIn(NetworkSmall()), + GPU_BN_CK_BWD_Large_BFP16, + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV1})), + testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_BWD_Large_BFP16, + GPU_BN_OCL_BWD_Large_BFP16, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({miopenTensorNCHW}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -110,7 +118,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_BWD_Small_FP32, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({miopenTensorNCHW}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -120,7 +128,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); -// fp64 +// // fp64 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_BWD_Small_FP64, testing::Combine(testing::ValuesIn(NetworkLarge()), diff --git a/test/gtest/bn_fwd_train.cpp b/test/gtest/bn_fwd_train.cpp index 8cc60e2d55..2f4ec26204 100644 --- a/test/gtest/bn_fwd_train.cpp +++ b/test/gtest/bn_fwd_train.cpp @@ -31,13 +31,14 @@ // ScaleDataType : half // BiasDataType : half // MeanVarDataType : float -struct GPU_BN_FWD_Train_Small_FP16 + +struct GPU_BN_CK_FWD_Train_Large_FP16 : BNFwdTrainTest { }; -struct GPU_BN_FWD_Train_Large_FP16 - : BNFwdTrainTest +struct GPU_BN_OCL_FWD_Train_Large_FP16 + : BNFwdTrainTest { }; @@ -46,11 +47,13 @@ struct GPU_BN_FWD_Train_Large_FP16 // ScaleDataType : bfloat16 // BiasDataType : bfloat16 // MeanVarDataType : float -struct GPU_BN_FWD_Train_Small_BFP16 : BNFwdTrainTest + +struct GPU_BN_CK_FWD_Train_Large_BFP16 + : BNFwdTrainTest { }; -struct GPU_BN_FWD_Train_Large_BFP16 : BNFwdTrainTest +struct GPU_BN_OCL_FWD_Train_Large_BFP16 : BNFwdTrainTest { }; @@ -71,55 +74,56 @@ struct GPU_BN_FWD_Train_Large_FP64 : BNFwdTrainTest()), - testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV1})), - TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_FWD_Train_Large_FP16, + GPU_BN_CK_FWD_Train_Large_FP16, testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); -// bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_FWD_Train_Small_BFP16, - testing::Combine(testing::ValuesIn(NetworkSmall()), - testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV1})), + GPU_BN_OCL_FWD_Train_Large_FP16, + testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::ValuesIn({miopenTensorNCHW}), + testing::ValuesIn({testBNAPIV1, testBNAPIV2})), TestNameGenerator()); +// // bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_FWD_Train_Large_BFP16, + GPU_BN_CK_FWD_Train_Large_BFP16, testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); -// fp32 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_FWD_Train_Small_FP32, + GPU_BN_OCL_FWD_Train_Large_BFP16, testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::ValuesIn({miopenTensorNCHW}), + testing::ValuesIn({testBNAPIV1, testBNAPIV2})), + TestNameGenerator()); + +// // fp32 +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_BN_FWD_Train_Small_FP32, + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -130,10 +134,10 @@ INSTANTIATE_TEST_SUITE_P(Smoke, testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); -// fp64 +// // fp64 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_FWD_Train_Small_FP64, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); diff --git a/test/gtest/bn_infer.cpp b/test/gtest/bn_infer.cpp index 4f32a2b3bd..28a2c24bff 100644 --- a/test/gtest/bn_infer.cpp +++ b/test/gtest/bn_infer.cpp @@ -26,33 +26,36 @@ #include "bn.hpp" -// NHWC solver accepts // XDataType : half_float::half // YDataYype : half_float::half // ScaleDataType : half_float::half // BiasDataType : half_float::half // MeanVarDataType : float -struct GPU_BN_Infer_Small_FP16 +struct GPU_BN_CK_Infer_Large_FP16 : BNInferTest { }; -struct GPU_BN_Infer_Large_FP16 - : BNInferTest +struct GPU_BN_OCL_Infer_Large_FP16 + : BNInferTest { }; -// bf16 NHWC solver accepts is only on CK solver // XDataType : bfloat16 // YDataYype : bfloat16 // ScaleDataType : bfloat16 // BiasDataType : bfloat16 // MeanVarDataType : float -struct GPU_BN_Infer_Small_BFP16 : BNInferTest +struct GPU_BN_CK_Infer_Large_BFP16 : BNInferTest { }; -struct GPU_BN_Infer_Large_BFP16 : BNInferTest +// XDataType : bfloat16 +// YDataYype : bfloat16 +// ScaleDataType : float +// BiasDataType : float +// MeanVarDataType : float +struct GPU_BN_OCL_Infer_Large_BFP16 : BNInferTest { }; @@ -73,14 +76,14 @@ struct GPU_BN_Infer_Large_FP64 : BNInferTest()), + GPU_BN_CK_Infer_Large_FP16, + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV1})), + testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_Infer_Large_FP16, + GPU_BN_OCL_Infer_Large_FP16, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV2})), + testing::ValuesIn({miopenTensorNCHW}), + testing::ValuesIn({testBNAPIV1, testBNAPIV2})), TestNameGenerator()); - // bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_Infer_Small_BFP16, + GPU_BN_CK_Infer_Large_BFP16, testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV1})), + testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_Infer_Large_BFP16, + GPU_BN_OCL_Infer_Large_BFP16, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV2})), + testing::ValuesIn({miopenTensorNCHW}), + testing::ValuesIn({testBNAPIV1, testBNAPIV2})), TestNameGenerator()); // fp32 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_Infer_Small_FP32, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV2})), + testing::ValuesIn({testBNAPIV1})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, @@ -135,9 +137,9 @@ INSTANTIATE_TEST_SUITE_P(Smoke, // fp64 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_Infer_Small_FP64, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV2})), + testing::ValuesIn({testBNAPIV1})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, diff --git a/test/gtest/test_operations.hpp b/test/gtest/test_operations.hpp index 298ac55e3e..2e07e4f02a 100644 --- a/test/gtest/test_operations.hpp +++ b/test/gtest/test_operations.hpp @@ -38,14 +38,7 @@ void ComputeCPUBNInference(DLModule& dl_module) dl_module.estVariance); } -template +template void ComputeCPUBNBwd(DLModule& dl_module) { batchNormSpatialHostBwdTrain(dl_module.input, From 185b7ec83ce7e4dddfabf5c2ef25648de4ee3f60 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Sat, 2 Nov 2024 03:47:32 +0000 Subject: [PATCH 06/13] fix minor issue --- src/driver_arguments.cpp | 35 ++++------------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/src/driver_arguments.cpp b/src/driver_arguments.cpp index b7970d9833..8641a37ce6 100644 --- a/src/driver_arguments.cpp +++ b/src/driver_arguments.cpp @@ -102,37 +102,6 @@ void BnDataType(std::stringstream& ss, ss << "bnorm"; } } - else if(bn_mode == BatchNormDirection_t::ForwardTraining) - { - if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf && - scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat && - sMeanDesc.GetType() == miopenFloat) - { - ss << "bnormfp16"; - } - else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenBFloat16 && - scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat && - sMeanDesc.GetType() == miopenFloat) - { - ss << "bnormbfp16"; - } - else if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf && - scaleDesc.GetType() == miopenHalf && biasDesc.GetType() == miopenHalf && - sMeanDesc.GetType() == miopenFloat) - { - ss << "bnormfp16fp32"; - } - else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenBFloat16 && - scaleDesc.GetType() == miopenBFloat16 && biasDesc.GetType() == miopenBFloat16 && - sMeanDesc.GetType() == miopenFloat) - { - ss << "bnormbfp16fp32"; - } - else - { - ss << "bnorm"; - } - } else if(bn_mode == BatchNormDirection_t::Backward) { if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf && @@ -164,6 +133,10 @@ void BnDataType(std::stringstream& ss, ss << "bnorm"; } } + else + { + MIOPEN_THROW("Bad Op direction"); + } } void BnDriverInfo(std::stringstream& ss, From e34c18f62932f5a7e55994d83818309a669f4ed8 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Sat, 2 Nov 2024 05:21:14 +0000 Subject: [PATCH 07/13] fix clang format --- src/ocl/batchnormocl.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ocl/batchnormocl.cpp b/src/ocl/batchnormocl.cpp index 84c587b4f5..78d2dbce66 100644 --- a/src/ocl/batchnormocl.cpp +++ b/src/ocl/batchnormocl.cpp @@ -154,8 +154,7 @@ void BatchNormForwardTraining(Handle& handle, const auto solvers = solver::SolverContainer{}; + solver::batchnorm::BnFwdTrainingPerActivation>{}; solvers.ExecutePrimitive(handle, problem, algo, invoke_params); From e9f7092f77eab84f65bd5c77f0de4523fd3842c3 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Sat, 2 Nov 2024 07:09:06 +0000 Subject: [PATCH 08/13] hip tidy --- src/driver_arguments.cpp | 6 ++++-- src/include/miopen/driver_arguments.hpp | 12 ++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/driver_arguments.cpp b/src/driver_arguments.cpp index 8641a37ce6..b730325add 100644 --- a/src/driver_arguments.cpp +++ b/src/driver_arguments.cpp @@ -282,7 +282,7 @@ std::string BnormArgsForMIOpenDriver(const miopenTensorDescriptor_t xDesc, const miopenTensorDescriptor_t yDesc, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, - const miopenTensorDescriptor_t sMeanDesc, + const miopenTensorDescriptor_t saveMeanDesc, miopenBatchNormMode_t bn_mode, const void* resultRunningMean, const void* resultRunningVariance, @@ -295,13 +295,15 @@ std::string BnormArgsForMIOpenDriver(const miopenTensorDescriptor_t xDesc, miopenGetTensorDescriptorSize(xDesc, &size); std::stringstream ss; if(print_for_bn_driver) + { BnDataType(ss, miopen::deref(xDesc), miopen::deref(yDesc), miopen::deref(scaleDesc), miopen::deref(biasDesc), - miopen::deref(sMeanDesc), + miopen::deref(saveMeanDesc), dir); + } ss << " -n " << miopen::deref(xDesc).GetLengths()[0] // clang-format off << " -c " << miopen::deref(xDesc).GetLengths()[1]; diff --git a/src/include/miopen/driver_arguments.hpp b/src/include/miopen/driver_arguments.hpp index f28a772e3c..aa59d86036 100644 --- a/src/include/miopen/driver_arguments.hpp +++ b/src/include/miopen/driver_arguments.hpp @@ -66,12 +66,12 @@ std::string ConvArgsForMIOpenDriver(const miopen::TensorDescriptor& xDesc, std::optional immediate_mode_solver_id, bool print_for_conv_driver = true); -std::string BnormArgsForMIOpenDriver(miopenTensorDescriptor_t xDesc, - miopenTensorDescriptor_t yDesc, - miopenTensorDescriptor_t scaleDesc, - miopenTensorDescriptor_t biasDesc, - miopenTensorDescriptor_t saveMeanDesc, - miopenBatchNormMode_t bn_mode, +std::string BnormArgsForMIOpenDriver(const miopenTensorDescriptor_t xDesc, + const miopenTensorDescriptor_t yDesc, + const miopenTensorDescriptor_t scaleDesc, + const miopenTensorDescriptor_t biasDesc, + const miopenTensorDescriptor_t saveMeanDesc, + const miopenBatchNormMode_t bn_mode, const void* resultRunningMean, const void* resultRunningVariance, const void* resultSaveMean, From 45741cccb913b3768523f0076a2f6bbf42d84103 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Sat, 2 Nov 2024 08:51:17 +0000 Subject: [PATCH 09/13] fix gtest sample and fix hiptidy --- src/include/miopen/driver_arguments.hpp | 12 ++++++------ test/gtest/bn_bwd.cpp | 12 ++++++------ test/gtest/bn_fwd_train.cpp | 8 ++++---- test/gtest/bn_infer.cpp | 24 ++++++++++++------------ test/gtest/bn_test_data.hpp | 4 +--- 5 files changed, 29 insertions(+), 31 deletions(-) diff --git a/src/include/miopen/driver_arguments.hpp b/src/include/miopen/driver_arguments.hpp index aa59d86036..f28a772e3c 100644 --- a/src/include/miopen/driver_arguments.hpp +++ b/src/include/miopen/driver_arguments.hpp @@ -66,12 +66,12 @@ std::string ConvArgsForMIOpenDriver(const miopen::TensorDescriptor& xDesc, std::optional immediate_mode_solver_id, bool print_for_conv_driver = true); -std::string BnormArgsForMIOpenDriver(const miopenTensorDescriptor_t xDesc, - const miopenTensorDescriptor_t yDesc, - const miopenTensorDescriptor_t scaleDesc, - const miopenTensorDescriptor_t biasDesc, - const miopenTensorDescriptor_t saveMeanDesc, - const miopenBatchNormMode_t bn_mode, +std::string BnormArgsForMIOpenDriver(miopenTensorDescriptor_t xDesc, + miopenTensorDescriptor_t yDesc, + miopenTensorDescriptor_t scaleDesc, + miopenTensorDescriptor_t biasDesc, + miopenTensorDescriptor_t saveMeanDesc, + miopenBatchNormMode_t bn_mode, const void* resultRunningMean, const void* resultRunningVariance, const void* resultSaveMean, diff --git a/test/gtest/bn_bwd.cpp b/test/gtest/bn_bwd.cpp index 94add304a5..ba4c136469 100644 --- a/test/gtest/bn_bwd.cpp +++ b/test/gtest/bn_bwd.cpp @@ -87,7 +87,7 @@ TEST_P(GPU_BN_BWD_Large_FP64, BnV2LargeBWDCKfp64) {} // fp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_BWD_Large_FP16, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -102,7 +102,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, // bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_BWD_Large_BFP16, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -117,28 +117,28 @@ INSTANTIATE_TEST_SUITE_P(Smoke, // fp32 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_BWD_Small_FP32, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_BWD_Large_FP32, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); // // fp64 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_BWD_Small_FP64, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_BWD_Large_FP64, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); diff --git a/test/gtest/bn_fwd_train.cpp b/test/gtest/bn_fwd_train.cpp index 2f4ec26204..d65b184c1c 100644 --- a/test/gtest/bn_fwd_train.cpp +++ b/test/gtest/bn_fwd_train.cpp @@ -93,7 +93,7 @@ TEST_P(GPU_BN_FWD_Train_Large_FP64, BnV2LargeFWD_TrainCKfp64) {} INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_FWD_Train_Large_FP16, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -108,7 +108,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, // // bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_FWD_Train_Large_BFP16, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -130,7 +130,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_FWD_Train_Large_FP32, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -144,7 +144,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_FWD_Train_Large_FP64, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); diff --git a/test/gtest/bn_infer.cpp b/test/gtest/bn_infer.cpp index 28a2c24bff..05a154f10b 100644 --- a/test/gtest/bn_infer.cpp +++ b/test/gtest/bn_infer.cpp @@ -83,18 +83,18 @@ TEST_P(GPU_BN_OCL_Infer_Large_FP16, BnV2LargeInferOCLfp16) {} TEST_P(GPU_BN_CK_Infer_Large_BFP16, BnV2LargeInferCKbfp16) {} TEST_P(GPU_BN_OCL_Infer_Large_BFP16, BnV2LargeInferOCLbfp16) {} -// // fp32 (float) -TEST_P(GPU_BN_Infer_Small_FP32, BnV1SmallInferCKfp32) {} -TEST_P(GPU_BN_Infer_Large_FP32, BnV2LargeInferCKfp32) {} +// // // fp32 (float) +TEST_P(GPU_BN_Infer_Small_FP32, BnV1SmallInferfp32) {} +TEST_P(GPU_BN_Infer_Large_FP32, BnV2LargeInferfp32) {} -// fp64 -TEST_P(GPU_BN_Infer_Small_FP64, BnV1SmallInferCKfp64) {} -TEST_P(GPU_BN_Infer_Large_FP64, BnV2LargeInferCKfp64) {} +// // // fp64 +TEST_P(GPU_BN_Infer_Small_FP64, BnV1SmallInferfp64) {} +TEST_P(GPU_BN_Infer_Large_FP64, BnV2LargeInferfp64) {} -// fp16 +// // fp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_Infer_Large_FP16, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -105,7 +105,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, testing::ValuesIn({miopenTensorNCHW}), testing::ValuesIn({testBNAPIV1, testBNAPIV2})), TestNameGenerator()); -// bfp16 +// // // bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_Infer_Large_BFP16, testing::Combine(testing::ValuesIn(NetworkSmall()), @@ -130,11 +130,11 @@ INSTANTIATE_TEST_SUITE_P(Smoke, INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_Infer_Large_FP32, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); -// fp64 +// // fp64 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_Infer_Small_FP64, testing::Combine(testing::ValuesIn(NetworkSmall()), @@ -144,7 +144,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_Infer_Large_FP64, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); diff --git a/test/gtest/bn_test_data.hpp b/test/gtest/bn_test_data.hpp index 9afa8ea4ed..d3b1c6b073 100644 --- a/test/gtest/bn_test_data.hpp +++ b/test/gtest/bn_test_data.hpp @@ -67,8 +67,6 @@ inline std::vector NetworkLarge() // pyt_mlperf_resnet50v1.5 return { {192, 1, 8, 8, miopenBNSpatial, miopen::batchnorm::Direction::Backward, 1, 0}, - {16, 8, 128, 256, miopenBNSpatial, miopen::batchnorm::Direction::ForwardTraining, 1, 0}, - {16, 8, 128, 256, miopenBNSpatial, miopen::batchnorm::Direction::ForwardInference, 1, 0}, {64, 2048, 7, 7, miopenBNSpatial, miopen::batchnorm::Direction::Backward, 0, 1}, {64, 2048, 7, 7, miopenBNSpatial, miopen::batchnorm::Direction::ForwardTraining, 1, 1}, {64, 2048, 7, 7, miopenBNSpatial, miopen::batchnorm::Direction::ForwardInference, 1, 0}, @@ -106,7 +104,7 @@ inline std::vector NetworkSmall() {192, 2, 8, 8, miopenBNSpatial, miopen::batchnorm::Direction::Backward, 1, 0}, {16, 8, 132, 28, miopenBNSpatial, miopen::batchnorm::Direction::Backward, 1, 0}, {16, 8, 128, 256, miopenBNSpatial, miopen::batchnorm::Direction::ForwardTraining, 1, 0}, - {64, 2048, 7, 7, miopenBNSpatial, miopen::batchnorm::Direction::Backward, 0, 1}, + {64, 2048, 17, 17, miopenBNSpatial, miopen::batchnorm::Direction::Backward, 0, 1}, }; } From a648b717ac2cf6e93fe656209d74172a514bf2c3 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Sat, 2 Nov 2024 09:19:22 +0000 Subject: [PATCH 10/13] * move type check logic to problem description of batch norm * add type checks to other ocl solvers * fix other minor issues --- src/batchnorm/problem_description.cpp | 108 ++++++++++++++++++ src/fusion.cpp | 24 ++-- .../miopen/batchnorm/problem_description.hpp | 20 ++-- src/solver/batchnorm/backward_ck.cpp | 15 +-- .../batchnorm/backward_per_activation.cpp | 8 +- .../batchnorm/backward_spatial_multiple.cpp | 2 + .../batchnorm/backward_spatial_single.cpp | 16 +-- src/solver/batchnorm/forward_inference.cpp | 12 +- src/solver/batchnorm/forward_inference_ck.cpp | 16 +-- .../batchnorm/forward_inference_fused.cpp | 1 - .../batchnorm/forward_per_activation.cpp | 8 +- .../batchnorm/forward_spatial_multiple.cpp | 3 + .../batchnorm/forward_spatial_single.cpp | 11 +- src/solver/batchnorm/forward_training_ck.cpp | 15 +-- 14 files changed, 154 insertions(+), 105 deletions(-) diff --git a/src/batchnorm/problem_description.cpp b/src/batchnorm/problem_description.cpp index 7eb2592c46..ec082ae7b1 100644 --- a/src/batchnorm/problem_description.cpp +++ b/src/batchnorm/problem_description.cpp @@ -36,6 +36,114 @@ namespace miopen { namespace batchnorm { +bool is_fp16_or_bfp16(miopenDataType_t type) +{ + return ((type == miopenHalf) || (type == miopenBFloat16)); +} + +bool is_fp32_or_fp64(miopenDataType_t type) +{ + return ((type == miopenFloat) || (type == miopenDouble)); +} + +bool IsOCLInferTypeValid(const ProblemDescription& bn_problem) +{ + // case 1 : mix type + return ((is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && + bn_problem.GetBnScale().GetType() == miopenFloat && + bn_problem.GetBnBias().GetType() == miopenFloat) || + // case 2 : float type + (bn_problem.GetXDesc().GetType() == miopenFloat && + bn_problem.GetYDesc().GetType() == miopenFloat && + bn_problem.GetBnScale().GetType() == miopenFloat && + bn_problem.GetBnBias().GetType() == miopenFloat)); +} + +bool IsCKInferTypeValid(const ProblemDescription& bn_problem) +{ + // case 1 : mix type + return ((is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && + is_fp16_or_bfp16(bn_problem.GetBnScale().GetType()) && + is_fp16_or_bfp16(bn_problem.GetBnBias().GetType()) && + bn_problem.GetBnSMean().GetType() == miopenFloat && + bn_problem.GetBnSVar().GetType() == miopenFloat) || + // case 2 : fp32 or fp64 + (is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && + is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnSVar().GetType()))); +} + +bool IsOCLFwdTrainTypeValid(const ProblemDescription& bn_problem) +{ + // case 1 : mix type + return ((is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && + bn_problem.GetBnScale().GetType() == miopenFloat && + bn_problem.GetBnBias().GetType() == miopenFloat) || + // case 2 : float type + (bn_problem.GetXDesc().GetType() == miopenFloat && + bn_problem.GetYDesc().GetType() == miopenFloat && + bn_problem.GetBnScale().GetType() == miopenFloat && + bn_problem.GetBnBias().GetType() == miopenFloat)); +} + +bool IsCKFwdTrainTypeValid(const ProblemDescription& bn_problem) +{ + // case 1 : mix type + return ((is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && + is_fp16_or_bfp16(bn_problem.GetBnScale().GetType()) && + is_fp16_or_bfp16(bn_problem.GetBnBias().GetType()) && + bn_problem.GetBnSMean().GetType() == miopenFloat && + bn_problem.GetBnSVar().GetType() == miopenFloat) || + // case 2 : fp32 or fp64 + (is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && + is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnSVar().GetType()))); +} + +bool IsOCLBwdTypeValid(const ProblemDescription& bn_problem) +{ + return ((is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + is_fp16_or_bfp16(bn_problem.GetDXDesc().GetType()) && + is_fp16_or_bfp16(bn_problem.GetDYDesc().GetType()) && + bn_problem.GetBnScale().GetType() == miopenFloat && + bn_problem.GetBnSMean().GetType() == miopenFloat && + bn_problem.GetBnSVar().GetType() == miopenFloat) || + // case 1 : fp32 or fp64 + (is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && + is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnSVar().GetType()))); +} + +bool IsCKBwdTypeValid(const ProblemDescription& bn_problem) +{ + return ((is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + bn_problem.GetDXDesc().GetType() == miopenFloat && + is_fp16_or_bfp16(bn_problem.GetBnScale().GetType()) && + bn_problem.GetDYDesc().GetType() == miopenFloat && + bn_problem.GetBnSMean().GetType() == miopenFloat && + bn_problem.GetBnSVar().GetType() == miopenFloat) || + // case 1 : fp32 or fp64 + (is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && + is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && + is_fp32_or_fp64(bn_problem.GetBnSVar().GetType()))); +} + NetworkConfig ProblemDescription::MakeNetworkConfig() const { switch(direction) diff --git a/src/fusion.cpp b/src/fusion.cpp index 3c52f6c2a0..5541409a67 100644 --- a/src/fusion.cpp +++ b/src/fusion.cpp @@ -389,18 +389,18 @@ std::string LogCmdBnormFusion(const miopenFusionPlanDescriptor_t fusePlanDesc, i if(bn_op != nullptr) { - // str += BnormArgsForMIOpenDriver(&bn_op->input_desc, - // &bn_op->base_desc, - // nullptr, - // nullptr, - // nullptr, - // bn_op->mode, - // nullptr, - // nullptr, - // nullptr, - // nullptr, - // miopen::debug::BatchNormDirection_t::ForwardInference, - // false); + str += BnormArgsForMIOpenDriver(&bn_op->input_desc, + nullptr, + nullptr, + nullptr, + nullptr, + bn_op->mode, + nullptr, + nullptr, + nullptr, + nullptr, + miopen::debug::BatchNormDirection_t::ForwardInference, + false); // having false allows safe handling of nullptrs } else { diff --git a/src/include/miopen/batchnorm/problem_description.hpp b/src/include/miopen/batchnorm/problem_description.hpp index a1ed9d2594..72d0365426 100644 --- a/src/include/miopen/batchnorm/problem_description.hpp +++ b/src/include/miopen/batchnorm/problem_description.hpp @@ -52,15 +52,8 @@ struct ProblemDescriptionTag { }; -inline bool is_fp16_or_bfp16(miopenDataType_t type) -{ - return ((type == miopenHalf) || (type == miopenBFloat16)); -} - -inline bool is_fp32_or_fp64(miopenDataType_t type) -{ - return ((type == miopenFloat) || (type == miopenDouble)); -} +bool is_fp16_or_bfp16(miopenDataType_t type); +bool is_fp32_or_fp64(miopenDataType_t type); struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, ProblemDescriptionTag @@ -363,6 +356,15 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, } }; +bool IsOCLInferTypeValid(const ProblemDescription& bn_problem); +bool IsCKInferTypeValid(const ProblemDescription& bn_problem); + +bool IsOCLFwdTrainTypeValid(const ProblemDescription& bn_problem); +bool IsCKFwdTrainTypeValid(const ProblemDescription& bn_problem); + +bool IsOCLBwdTypeValid(const ProblemDescription& bn_problem); +bool IsCKBwdTypeValid(const ProblemDescription& bn_problem); + } // namespace batchnorm } // namespace miopen diff --git a/src/solver/batchnorm/backward_ck.cpp b/src/solver/batchnorm/backward_ck.cpp index fd2613f807..451d73d28a 100644 --- a/src/solver/batchnorm/backward_ck.cpp +++ b/src/solver/batchnorm/backward_ck.cpp @@ -371,20 +371,7 @@ bool BnCKBwdBackward::IsApplicable( return false; if(!bn_problem.Is2D()) return false; - // case 1 : fp16 or bfp16 - if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && - bn_problem.GetDXDesc().GetType() == miopenFloat && - ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetBnScale().GetType()) && - bn_problem.GetDYDesc().GetType() == miopenFloat && - bn_problem.GetBnSMean().GetType() == miopenFloat && - bn_problem.GetBnSVar().GetType() == miopenFloat) || - // case 1 : fp32 or fp64 - (::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSVar().GetType())))) + if(!IsCKBwdTypeValid(bn_problem)) return false; switch(bn_problem.GetXDesc().GetType()) diff --git a/src/solver/batchnorm/backward_per_activation.cpp b/src/solver/batchnorm/backward_per_activation.cpp index adf72045a3..24e785f8de 100644 --- a/src/solver/batchnorm/backward_per_activation.cpp +++ b/src/solver/batchnorm/backward_per_activation.cpp @@ -43,8 +43,12 @@ bool BnBwdTrainingPerActivation::IsApplicable( { if(!problem.Is2D()) return false; - return problem.GetDirection() == miopen::batchnorm::Direction::Backward && - problem.GetMode() == miopenBNPerActivation; + if(problem.GetDirection() != miopen::batchnorm::Direction::Backward && + problem.GetMode() != miopenBNPerActivation) + return false; + if(!::miopen::batchnorm::IsOCLBwdTypeValid(problem)) + return false; + return true; } ConvSolution diff --git a/src/solver/batchnorm/backward_spatial_multiple.cpp b/src/solver/batchnorm/backward_spatial_multiple.cpp index 846f8c41c8..e26922f478 100644 --- a/src/solver/batchnorm/backward_spatial_multiple.cpp +++ b/src/solver/batchnorm/backward_spatial_multiple.cpp @@ -48,6 +48,8 @@ bool BnBwdTrainingSpatialMultiple::IsApplicable( { return false; } + if(!IsOCLBwdTypeValid(problem)) + return false; #if WORKAROUND_ISSUE_1549_FP16_BUILD_ERROR if(problem.GetXDesc().GetType() == miopenHalf && problem.GetBnScale().GetType() == miopenHalf) diff --git a/src/solver/batchnorm/backward_spatial_single.cpp b/src/solver/batchnorm/backward_spatial_single.cpp index 3e4bb8bf78..9b375f517c 100644 --- a/src/solver/batchnorm/backward_spatial_single.cpp +++ b/src/solver/batchnorm/backward_spatial_single.cpp @@ -58,21 +58,7 @@ bool BnBwdTrainingSpatialSingle::IsApplicable( return false; } #endif - - // case 1 : fp16 or bfp16 - if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && - ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetDXDesc().GetType()) && - ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetDYDesc().GetType()) && - bn_problem.GetBnScale().GetType() == miopenFloat && - bn_problem.GetBnSMean().GetType() == miopenFloat && - bn_problem.GetBnSVar().GetType() == miopenFloat) || - // case 1 : fp32 or fp64 - (::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSVar().GetType())))) + if(!IsOCLBwdTypeValid(bn_problem)) return false; int n, c, h, w; diff --git a/src/solver/batchnorm/forward_inference.cpp b/src/solver/batchnorm/forward_inference.cpp index 8d08ece535..d22bb01797 100644 --- a/src/solver/batchnorm/forward_inference.cpp +++ b/src/solver/batchnorm/forward_inference.cpp @@ -49,17 +49,7 @@ bool BnFwdInference::IsApplicable(const ExecutionContext&, return false; if(!bn_problem.Is2D()) return false; - - // case 1 : mix type - if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && - ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && - bn_problem.GetBnScale().GetType() == miopenFloat && - bn_problem.GetBnBias().GetType() == miopenFloat) || - // case 2 : float type - (bn_problem.GetXDesc().GetType() == miopenFloat && - bn_problem.GetYDesc().GetType() == miopenFloat && - bn_problem.GetBnScale().GetType() == miopenFloat && - bn_problem.GetBnBias().GetType() == miopenFloat))) + if(!IsOCLInferTypeValid(bn_problem)) return false; return true; diff --git a/src/solver/batchnorm/forward_inference_ck.cpp b/src/solver/batchnorm/forward_inference_ck.cpp index c4fc3b00ef..b89e2f3df1 100644 --- a/src/solver/batchnorm/forward_inference_ck.cpp +++ b/src/solver/batchnorm/forward_inference_ck.cpp @@ -334,21 +334,7 @@ bool BnCKFwdInference::IsApplicable( return false; if(bn_problem.GetMode() != miopenBNSpatial) return false; - - // case 1 : fp16 or bfp16 - if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && - ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && - ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetBnScale().GetType()) && - ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetBnBias().GetType()) && - bn_problem.GetBnSMean().GetType() == miopenFloat && - bn_problem.GetBnSVar().GetType() == miopenFloat) || - // case 1 : fp32 or fp64 - (::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSVar().GetType())))) + if(!IsCKInferTypeValid(bn_problem)) return false; switch(bn_problem.GetXDesc().GetType()) diff --git a/src/solver/batchnorm/forward_inference_fused.cpp b/src/solver/batchnorm/forward_inference_fused.cpp index b527b1bd61..19b9e01934 100644 --- a/src/solver/batchnorm/forward_inference_fused.cpp +++ b/src/solver/batchnorm/forward_inference_fused.cpp @@ -55,7 +55,6 @@ bool BnFwdInferActivationFused::IsApplicable(const FusionContext& /*context*/, return false; if(desc.op_map.at(1)->kind() != miopenFusionOpActivForward) return false; - return true; } diff --git a/src/solver/batchnorm/forward_per_activation.cpp b/src/solver/batchnorm/forward_per_activation.cpp index 54c03a1835..de83a8ecc1 100644 --- a/src/solver/batchnorm/forward_per_activation.cpp +++ b/src/solver/batchnorm/forward_per_activation.cpp @@ -41,8 +41,12 @@ namespace batchnorm { bool BnFwdTrainingPerActivation::IsApplicable( const ExecutionContext&, const miopen::batchnorm::ProblemDescription& problem) const { - return problem.GetDirection() == miopen::batchnorm::Direction::ForwardTraining || - problem.GetMode() == miopenBNPerActivation; + if(problem.GetDirection() != miopen::batchnorm::Direction::ForwardTraining || + problem.GetMode() != miopenBNPerActivation) + return false; + if(!IsOCLFwdTrainTypeValid(problem)) + return false; + return true; } ConvSolution diff --git a/src/solver/batchnorm/forward_spatial_multiple.cpp b/src/solver/batchnorm/forward_spatial_multiple.cpp index dba21d3550..6a2c42743b 100644 --- a/src/solver/batchnorm/forward_spatial_multiple.cpp +++ b/src/solver/batchnorm/forward_spatial_multiple.cpp @@ -47,6 +47,9 @@ bool BnFwdTrainingSpatialMultiple::IsApplicable( problem.GetMode() != miopenBNSpatial) return false; + if(!IsOCLFwdTrainTypeValid(problem)) + return false; + return !BnFwdTrainingSpatialSingle{}.IsApplicable(context, problem); } diff --git a/src/solver/batchnorm/forward_spatial_single.cpp b/src/solver/batchnorm/forward_spatial_single.cpp index 91f5c66a03..ccfebce987 100644 --- a/src/solver/batchnorm/forward_spatial_single.cpp +++ b/src/solver/batchnorm/forward_spatial_single.cpp @@ -46,16 +46,7 @@ bool BnFwdTrainingSpatialSingle::IsApplicable( bn_problem.GetMode() != miopenBNSpatial) return false; - // case 1 : mix type - if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && - ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && - bn_problem.GetBnScale().GetType() == miopenFloat && - bn_problem.GetBnBias().GetType() == miopenFloat) || - // case 2 : float type - (bn_problem.GetXDesc().GetType() == miopenFloat && - bn_problem.GetYDesc().GetType() == miopenFloat && - bn_problem.GetBnScale().GetType() == miopenFloat && - bn_problem.GetBnBias().GetType() == miopenFloat))) + if(!IsOCLFwdTrainTypeValid(bn_problem)) return false; int n, c, h, w; diff --git a/src/solver/batchnorm/forward_training_ck.cpp b/src/solver/batchnorm/forward_training_ck.cpp index 62c6a7932f..27640eb4d9 100644 --- a/src/solver/batchnorm/forward_training_ck.cpp +++ b/src/solver/batchnorm/forward_training_ck.cpp @@ -361,20 +361,7 @@ bool BnCKFwdTraining::IsApplicable( return false; if(bn_problem.GetMode() != miopenBNSpatial) return false; - // case 1 : fp16 or bfp16 - if(!((::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && - ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && - ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetBnScale().GetType()) && - ::miopen::batchnorm::is_fp16_or_bfp16(bn_problem.GetBnBias().GetType()) && - bn_problem.GetBnSMean().GetType() == miopenFloat && - bn_problem.GetBnSVar().GetType() == miopenFloat) || - // case 1 : fp32 or fp64 - (::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && - ::miopen::batchnorm::is_fp32_or_fp64(bn_problem.GetBnSVar().GetType())))) + if(!IsCKFwdTrainTypeValid(bn_problem)) return false; switch(bn_problem.GetXDesc().GetType()) From bed3d9aa4dc06481026f06046ef7a8556c33a498 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Sat, 2 Nov 2024 10:01:21 +0000 Subject: [PATCH 11/13] fix network cache --- src/batchnorm/problem_description.cpp | 6 ++++++ .../miopen/batchnorm/problem_description.hpp | 2 ++ test/gtest/bn_bwd.cpp | 12 ++++++------ test/gtest/bn_fwd_train.cpp | 10 +++++----- test/gtest/bn_infer.cpp | 18 +++++++++--------- 5 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/batchnorm/problem_description.cpp b/src/batchnorm/problem_description.cpp index ec082ae7b1..8a10bd6f27 100644 --- a/src/batchnorm/problem_description.cpp +++ b/src/batchnorm/problem_description.cpp @@ -295,6 +295,8 @@ NetworkConfig ProblemDescription::MakeForwardTrainingNetworkConfig() const ss << "hw" << in_cstride; } ss << "layout" << in_layout; + ss << "scaleType" << static_cast(IsScaleFp16()); + ss << "scaleType" << static_cast(IsScaleFp32()); return NetworkConfig{ss.str()}; } @@ -317,6 +319,8 @@ NetworkConfig ProblemDescription::MakeForwardInferenceNetworkConfig() const ss << "HWdims" << in_cstride; ss << "C" << c; ss << "layout" << in_layout; + ss << "scaleType" << static_cast(IsScaleFp16()); + ss << "scaleType" << static_cast(IsScaleFp32()); return NetworkConfig{ss.str()}; } @@ -447,6 +451,8 @@ NetworkConfig ProblemDescription::MakeBackwardNetworkConfig() const ss << "nhw" << in_nhw; } ss << "layout" << in_layout; + ss << "scaleType" << static_cast(IsScaleFp16()); + ss << "scaleType" << static_cast(IsScaleFp32()); return NetworkConfig{ss.str()}; } diff --git a/src/include/miopen/batchnorm/problem_description.hpp b/src/include/miopen/batchnorm/problem_description.hpp index 72d0365426..4fed9033a6 100644 --- a/src/include/miopen/batchnorm/problem_description.hpp +++ b/src/include/miopen/batchnorm/problem_description.hpp @@ -238,6 +238,8 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, bool IsFp16() const { return xDesc.GetType() == miopenHalf; } bool IsBFp16() const { return xDesc.GetType() == miopenBFloat16; } bool IsMix() const { return (IsFp16() || IsBFp16()) && sMeanDesc.GetType() == miopenFloat; } + bool IsScaleFp16() const { return scaleDesc.GetType() == miopenHalf; } + bool IsScaleFp32() const { return scaleDesc.GetType() == miopenFloat; } void Serialize(std::ostream& stream) const { stream << MakeNetworkConfig().ToString(); } diff --git a/test/gtest/bn_bwd.cpp b/test/gtest/bn_bwd.cpp index ba4c136469..9b93df6c81 100644 --- a/test/gtest/bn_bwd.cpp +++ b/test/gtest/bn_bwd.cpp @@ -87,7 +87,7 @@ TEST_P(GPU_BN_BWD_Large_FP64, BnV2LargeBWDCKfp64) {} // fp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_BWD_Large_FP16, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -102,9 +102,9 @@ INSTANTIATE_TEST_SUITE_P(Smoke, // bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_BWD_Large_BFP16, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV2})), + testing::ValuesIn({testBNAPIV1, testBNAPIV2})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, @@ -119,12 +119,12 @@ INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_BWD_Small_FP32, testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW}), - testing::ValuesIn({testBNAPIV2})), + testing::ValuesIn({testBNAPIV1})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_BWD_Large_FP32, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -133,7 +133,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_BWD_Small_FP64, testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV2})), + testing::ValuesIn({testBNAPIV1})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, diff --git a/test/gtest/bn_fwd_train.cpp b/test/gtest/bn_fwd_train.cpp index d65b184c1c..1953accee7 100644 --- a/test/gtest/bn_fwd_train.cpp +++ b/test/gtest/bn_fwd_train.cpp @@ -93,7 +93,7 @@ TEST_P(GPU_BN_FWD_Train_Large_FP64, BnV2LargeFWD_TrainCKfp64) {} INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_FWD_Train_Large_FP16, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -108,9 +108,9 @@ INSTANTIATE_TEST_SUITE_P(Smoke, // // bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_FWD_Train_Large_BFP16, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV2})), + testing::ValuesIn({testBNAPIV1, testBNAPIV2})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, @@ -130,7 +130,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_FWD_Train_Large_FP32, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -144,7 +144,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_FWD_Train_Large_FP64, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); diff --git a/test/gtest/bn_infer.cpp b/test/gtest/bn_infer.cpp index 05a154f10b..e717e149b0 100644 --- a/test/gtest/bn_infer.cpp +++ b/test/gtest/bn_infer.cpp @@ -83,18 +83,18 @@ TEST_P(GPU_BN_OCL_Infer_Large_FP16, BnV2LargeInferOCLfp16) {} TEST_P(GPU_BN_CK_Infer_Large_BFP16, BnV2LargeInferCKbfp16) {} TEST_P(GPU_BN_OCL_Infer_Large_BFP16, BnV2LargeInferOCLbfp16) {} -// // // fp32 (float) +// fp32 (float) TEST_P(GPU_BN_Infer_Small_FP32, BnV1SmallInferfp32) {} TEST_P(GPU_BN_Infer_Large_FP32, BnV2LargeInferfp32) {} -// // // fp64 +// fp64 TEST_P(GPU_BN_Infer_Small_FP64, BnV1SmallInferfp64) {} TEST_P(GPU_BN_Infer_Large_FP64, BnV2LargeInferfp64) {} -// // fp16 +// fp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_Infer_Large_FP16, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -105,10 +105,10 @@ INSTANTIATE_TEST_SUITE_P(Smoke, testing::ValuesIn({miopenTensorNCHW}), testing::ValuesIn({testBNAPIV1, testBNAPIV2})), TestNameGenerator()); -// // // bfp16 +// bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_Infer_Large_BFP16, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -130,11 +130,11 @@ INSTANTIATE_TEST_SUITE_P(Smoke, INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_Infer_Large_FP32, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); -// // fp64 +// fp64 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_Infer_Small_FP64, testing::Combine(testing::ValuesIn(NetworkSmall()), @@ -144,7 +144,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_Infer_Large_FP64, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); From 743cb9e879e51f114605917781211b0c16ba9b82 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Sat, 2 Nov 2024 13:12:22 +0000 Subject: [PATCH 12/13] fix minor testing issue --- src/batchnorm/problem_description.cpp | 67 +++++++++++++-------------- test/gtest/bn_bwd.cpp | 8 ++-- test/gtest/bn_fwd_train.cpp | 14 +++--- test/gtest/bn_infer.cpp | 4 +- 4 files changed, 44 insertions(+), 49 deletions(-) diff --git a/src/batchnorm/problem_description.cpp b/src/batchnorm/problem_description.cpp index 8a10bd6f27..daebf49208 100644 --- a/src/batchnorm/problem_description.cpp +++ b/src/batchnorm/problem_description.cpp @@ -46,18 +46,18 @@ bool is_fp32_or_fp64(miopenDataType_t type) return ((type == miopenFloat) || (type == miopenDouble)); } +bool is_fp32(miopenDataType_t type) { return (type == miopenFloat); } + bool IsOCLInferTypeValid(const ProblemDescription& bn_problem) { // case 1 : mix type - return ((is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && - is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && - bn_problem.GetBnScale().GetType() == miopenFloat && - bn_problem.GetBnBias().GetType() == miopenFloat) || - // case 2 : float type - (bn_problem.GetXDesc().GetType() == miopenFloat && - bn_problem.GetYDesc().GetType() == miopenFloat && - bn_problem.GetBnScale().GetType() == miopenFloat && - bn_problem.GetBnBias().GetType() == miopenFloat)); + return ( + (is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && + is_fp32(bn_problem.GetBnScale().GetType()) && is_fp32(bn_problem.GetBnBias().GetType())) || + // case 2 : float type + (is_fp32(bn_problem.GetXDesc().GetType()) && is_fp32(bn_problem.GetYDesc().GetType()) && + is_fp32(bn_problem.GetBnScale().GetType()) && is_fp32(bn_problem.GetBnBias().GetType()))); } bool IsCKInferTypeValid(const ProblemDescription& bn_problem) @@ -67,8 +67,8 @@ bool IsCKInferTypeValid(const ProblemDescription& bn_problem) is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && is_fp16_or_bfp16(bn_problem.GetBnScale().GetType()) && is_fp16_or_bfp16(bn_problem.GetBnBias().GetType()) && - bn_problem.GetBnSMean().GetType() == miopenFloat && - bn_problem.GetBnSVar().GetType() == miopenFloat) || + is_fp32(bn_problem.GetBnSMean().GetType()) && + is_fp32(bn_problem.GetBnSVar().GetType())) || // case 2 : fp32 or fp64 (is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && @@ -81,15 +81,13 @@ bool IsCKInferTypeValid(const ProblemDescription& bn_problem) bool IsOCLFwdTrainTypeValid(const ProblemDescription& bn_problem) { // case 1 : mix type - return ((is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && - is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && - bn_problem.GetBnScale().GetType() == miopenFloat && - bn_problem.GetBnBias().GetType() == miopenFloat) || - // case 2 : float type - (bn_problem.GetXDesc().GetType() == miopenFloat && - bn_problem.GetYDesc().GetType() == miopenFloat && - bn_problem.GetBnScale().GetType() == miopenFloat && - bn_problem.GetBnBias().GetType() == miopenFloat)); + return ( + (is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && + is_fp32(bn_problem.GetBnScale().GetType()) && is_fp32(bn_problem.GetBnBias().GetType())) || + // case 2 : float type + (is_fp32(bn_problem.GetXDesc().GetType()) && is_fp32(bn_problem.GetYDesc().GetType()) && + is_fp32(bn_problem.GetBnScale().GetType()) && is_fp32(bn_problem.GetBnBias().GetType()))); } bool IsCKFwdTrainTypeValid(const ProblemDescription& bn_problem) @@ -99,8 +97,8 @@ bool IsCKFwdTrainTypeValid(const ProblemDescription& bn_problem) is_fp16_or_bfp16(bn_problem.GetYDesc().GetType()) && is_fp16_or_bfp16(bn_problem.GetBnScale().GetType()) && is_fp16_or_bfp16(bn_problem.GetBnBias().GetType()) && - bn_problem.GetBnSMean().GetType() == miopenFloat && - bn_problem.GetBnSVar().GetType() == miopenFloat) || + is_fp32(bn_problem.GetBnSMean().GetType()) && + is_fp32(bn_problem.GetBnSVar().GetType())) || // case 2 : fp32 or fp64 (is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && @@ -112,19 +110,16 @@ bool IsCKFwdTrainTypeValid(const ProblemDescription& bn_problem) bool IsOCLBwdTypeValid(const ProblemDescription& bn_problem) { - return ((is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && - is_fp16_or_bfp16(bn_problem.GetDXDesc().GetType()) && - is_fp16_or_bfp16(bn_problem.GetDYDesc().GetType()) && - bn_problem.GetBnScale().GetType() == miopenFloat && - bn_problem.GetBnSMean().GetType() == miopenFloat && - bn_problem.GetBnSVar().GetType() == miopenFloat) || - // case 1 : fp32 or fp64 - (is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && - is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && - is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && - is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && - is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && - is_fp32_or_fp64(bn_problem.GetBnSVar().GetType()))); + return ( + (is_fp16_or_bfp16(bn_problem.GetXDesc().GetType()) && + is_fp16_or_bfp16(bn_problem.GetDXDesc().GetType()) && + is_fp16_or_bfp16(bn_problem.GetDYDesc().GetType()) && + is_fp32(bn_problem.GetBnScale().GetType()) && is_fp32(bn_problem.GetBnSMean().GetType()) && + is_fp32(bn_problem.GetBnSVar().GetType())) || + // case 1 : fp32 + (is_fp32(bn_problem.GetXDesc().GetType()) && is_fp32(bn_problem.GetDXDesc().GetType()) && + is_fp32(bn_problem.GetBnScale().GetType()) && is_fp32(bn_problem.GetBnBias().GetType()) && + is_fp32(bn_problem.GetBnSMean().GetType()) && is_fp32(bn_problem.GetBnSVar().GetType()))); } bool IsCKBwdTypeValid(const ProblemDescription& bn_problem) @@ -137,7 +132,7 @@ bool IsCKBwdTypeValid(const ProblemDescription& bn_problem) bn_problem.GetBnSVar().GetType() == miopenFloat) || // case 1 : fp32 or fp64 (is_fp32_or_fp64(bn_problem.GetXDesc().GetType()) && - is_fp32_or_fp64(bn_problem.GetYDesc().GetType()) && + is_fp32_or_fp64(bn_problem.GetDXDesc().GetType()) && is_fp32_or_fp64(bn_problem.GetBnScale().GetType()) && is_fp32_or_fp64(bn_problem.GetBnBias().GetType()) && is_fp32_or_fp64(bn_problem.GetBnSMean().GetType()) && diff --git a/test/gtest/bn_bwd.cpp b/test/gtest/bn_bwd.cpp index 9b93df6c81..2dbe462b99 100644 --- a/test/gtest/bn_bwd.cpp +++ b/test/gtest/bn_bwd.cpp @@ -87,7 +87,7 @@ TEST_P(GPU_BN_BWD_Large_FP64, BnV2LargeBWDCKfp64) {} // fp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_BWD_Large_FP16, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -102,9 +102,9 @@ INSTANTIATE_TEST_SUITE_P(Smoke, // bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_BWD_Large_BFP16, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV1, testBNAPIV2})), + testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, @@ -138,7 +138,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_BWD_Large_FP64, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); diff --git a/test/gtest/bn_fwd_train.cpp b/test/gtest/bn_fwd_train.cpp index 1953accee7..e6419400b7 100644 --- a/test/gtest/bn_fwd_train.cpp +++ b/test/gtest/bn_fwd_train.cpp @@ -93,7 +93,7 @@ TEST_P(GPU_BN_FWD_Train_Large_FP64, BnV2LargeFWD_TrainCKfp64) {} INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_FWD_Train_Large_FP16, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -108,9 +108,9 @@ INSTANTIATE_TEST_SUITE_P(Smoke, // // bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_FWD_Train_Large_BFP16, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV1, testBNAPIV2})), + testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, @@ -123,14 +123,14 @@ INSTANTIATE_TEST_SUITE_P(Smoke, // // fp32 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_FWD_Train_Small_FP32, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_FWD_Train_Large_FP32, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -139,12 +139,12 @@ INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_FWD_Train_Small_FP64, testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV2})), + testing::ValuesIn({testBNAPIV1})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_FWD_Train_Large_FP64, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); diff --git a/test/gtest/bn_infer.cpp b/test/gtest/bn_infer.cpp index e717e149b0..75c23c44f2 100644 --- a/test/gtest/bn_infer.cpp +++ b/test/gtest/bn_infer.cpp @@ -94,7 +94,7 @@ TEST_P(GPU_BN_Infer_Large_FP64, BnV2LargeInferfp64) {} // fp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_Infer_Large_FP16, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -108,7 +108,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, // bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_CK_Infer_Large_BFP16, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); From e1bb0c1b989bce863016d8241f331ba1d7c84fce Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Sat, 2 Nov 2024 19:24:53 +0000 Subject: [PATCH 13/13] disable ck bn for now --- src/ocl/batchnormocl.cpp | 9 +++++---- src/solver/batchnorm/forward_inference.cpp | 2 +- test/gtest/bn_bwd.cpp | 12 ++++++------ test/gtest/bn_fwd_train.cpp | 16 ++++++++-------- test/gtest/bn_infer.cpp | 18 +++++++++--------- 5 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/ocl/batchnormocl.cpp b/src/ocl/batchnormocl.cpp index 78d2dbce66..77442e030a 100644 --- a/src/ocl/batchnormocl.cpp +++ b/src/ocl/batchnormocl.cpp @@ -152,7 +152,7 @@ void BatchNormForwardTraining(Handle& handle, }(); const auto solvers = solver::SolverContainer{}; @@ -250,8 +250,9 @@ void BatchNormForwardInference(Handle& handle, }(); const auto algo = AlgorithmName{"miopenBatchNormalizationForwardInference"}; - const auto solvers = solver::SolverContainer{}; + const auto solvers = solver::SolverContainer{}; solvers.ExecutePrimitive(handle, problem, algo, invoke_params); } @@ -394,7 +395,7 @@ void BatchNormBackward(Handle& handle, }(); const auto solvers = solver::SolverContainer{}; diff --git a/src/solver/batchnorm/forward_inference.cpp b/src/solver/batchnorm/forward_inference.cpp index d22bb01797..a05fce5105 100644 --- a/src/solver/batchnorm/forward_inference.cpp +++ b/src/solver/batchnorm/forward_inference.cpp @@ -45,7 +45,7 @@ bool BnFwdInference::IsApplicable(const ExecutionContext&, return false; if(bn_problem.GetDirection() != miopen::batchnorm::Direction::ForwardInference) return false; - if(!(bn_problem.IsFp32() or bn_problem.IsFp16() || bn_problem.IsBFp16())) + if(!(bn_problem.IsFp32() or bn_problem.IsFp16() or bn_problem.IsBFp16())) return false; if(!bn_problem.Is2D()) return false; diff --git a/test/gtest/bn_bwd.cpp b/test/gtest/bn_bwd.cpp index 2dbe462b99..a84a8a8feb 100644 --- a/test/gtest/bn_bwd.cpp +++ b/test/gtest/bn_bwd.cpp @@ -69,11 +69,11 @@ struct GPU_BN_BWD_Large_FP64 : BNBwdTest()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); @@ -125,7 +125,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_BWD_Large_FP32, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({miopenTensorNCHW}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); // // fp64 diff --git a/test/gtest/bn_fwd_train.cpp b/test/gtest/bn_fwd_train.cpp index e6419400b7..c46da84245 100644 --- a/test/gtest/bn_fwd_train.cpp +++ b/test/gtest/bn_fwd_train.cpp @@ -74,11 +74,11 @@ struct GPU_BN_FWD_Train_Large_FP64 : BNFwdTrainTest()), - testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), - testing::ValuesIn({testBNAPIV2})), + testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::ValuesIn({miopenTensorNCHW}), + testing::ValuesIn({testBNAPIV1})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_FWD_Train_Large_FP32, - testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::Combine(testing::ValuesIn(NetworkLarge()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); diff --git a/test/gtest/bn_infer.cpp b/test/gtest/bn_infer.cpp index 75c23c44f2..591cbd0b1a 100644 --- a/test/gtest/bn_infer.cpp +++ b/test/gtest/bn_infer.cpp @@ -76,11 +76,11 @@ struct GPU_BN_Infer_Large_FP64 : BNInferTest()), - testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::ValuesIn({miopenTensorNCHW}), testing::ValuesIn({testBNAPIV1})), TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_Infer_Large_FP32, - testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::ValuesIn({miopenTensorNCHW}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator()); // fp64 @@ -144,7 +144,7 @@ INSTANTIATE_TEST_SUITE_P(Smoke, INSTANTIATE_TEST_SUITE_P(Smoke, GPU_BN_Infer_Large_FP64, - testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::Combine(testing::ValuesIn(NetworkSmall()), testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), testing::ValuesIn({testBNAPIV2})), TestNameGenerator());