ROCm · BrianHarrisonAMD · Nov 2, 2024 · Oct 30, 2024 · Oct 30, 2024 · Oct 31, 2024
diff --git a/src/batch_norm_api.cpp b/src/batch_norm_api.cpp
@@ -50,7 +50,10 @@ namespace miopen {
 namespace debug {
 
 void LogCmdBNorm(const miopenTensorDescriptor_t xDesc,
-                 const miopenTensorDescriptor_t sMeanDesc,
+                 const miopenTensorDescriptor_t yDesc,
+                 const miopenTensorDescriptor_t scaleDesc,
+                 const miopenTensorDescriptor_t biasDesc,
+                 const miopenTensorDescriptor_t saveMeanDesc,
                  miopenBatchNormMode_t bn_mode,
                  const void* resultRunningMean,
                  const void* resultRunningVariance,
@@ -61,7 +64,10 @@ void LogCmdBNorm(const miopenTensorDescriptor_t xDesc,
     if(miopen::IsLoggingCmd())
     {
         const std::string& str = BnormArgsForMIOpenDriver(xDesc,
-                                                          sMeanDesc,
+                                                          yDesc,
+                                                          scaleDesc,
+                                                          biasDesc,
+                                                          saveMeanDesc,
                                                           bn_mode,
                                                           resultRunningMean,
                                                           resultRunningVariance,
@@ -206,7 +212,7 @@ miopenBatchNormalizationForwardInference_V2(miopenHandle_t handle,
                                             const miopenTensorDescriptor_t yDesc,
                                             void* y,
                                             const miopenTensorDescriptor_t scaleDesc,
-                                            const miopenTensorDescriptor_t BiasDesc,
+                                            const miopenTensorDescriptor_t biasDesc,
                                             const miopenTensorDescriptor_t estMeanDesc,
                                             const miopenTensorDescriptor_t estVarianceDesc,
                                             void* bnScale,
@@ -222,7 +228,7 @@ miopenBatchNormalizationForwardInference_V2(miopenHandle_t handle,
                         yDesc,
                         y,
                         scaleDesc,
-                        BiasDesc,
+                        biasDesc,
                         estMeanDesc,
                         estVarianceDesc,
                         bnScale,
@@ -232,12 +238,15 @@ miopenBatchNormalizationForwardInference_V2(miopenHandle_t handle,
                         epsilon);
 
     miopen::debug::LogCmdBNorm(xDesc,
+                               yDesc,
+                               scaleDesc,
+                               biasDesc,
                                estMeanDesc,
                                bn_mode,
-                               estimatedMean,
-                               estimatedVariance,
                                nullptr,
                                nullptr,
+                               estMeanDesc,
+                               estimatedVariance,
                                miopen::debug::BatchNormDirection_t::ForwardInference);
 
     // In case of NxCxDxHxW
@@ -256,7 +265,7 @@ miopenBatchNormalizationForwardInference_V2(miopenHandle_t handle,
                         : miopen::deref(yDesc),
             DataCast(y),
             miopen::deref(scaleDesc),
-            miopen::deref(BiasDesc),
+            miopen::deref(biasDesc),
             miopen::deref(estMeanDesc),
             miopen::deref(estVarianceDesc),
             DataCast(bnScale),
@@ -277,7 +286,7 @@ miopenBatchNormalizationForwardTraining_V2(miopenHandle_t handle,
                                            const miopenTensorDescriptor_t yDesc,
                                            void* y,
                                            const miopenTensorDescriptor_t scaleDesc,
-                                           const miopenTensorDescriptor_t BiasDesc,
+                                           const miopenTensorDescriptor_t biasDesc,
                                            const miopenTensorDescriptor_t savedMeanDesc,
                                            const miopenTensorDescriptor_t savedVarianceDesc,
                                            void* bnScale,
@@ -296,7 +305,7 @@ miopenBatchNormalizationForwardTraining_V2(miopenHandle_t handle,
                         yDesc,
                         y,
                         scaleDesc,
-                        BiasDesc,
+                        biasDesc,
                         savedMeanDesc,
                         savedVarianceDesc,
                         bnScale,
@@ -309,6 +318,9 @@ miopenBatchNormalizationForwardTraining_V2(miopenHandle_t handle,
                         resultSaveInvVariance);
 
     miopen::debug::LogCmdBNorm(xDesc,
+                               yDesc,
+                               scaleDesc,
+                               biasDesc,
                                savedMeanDesc,
                                bn_mode,
                                resultRunningMean,
@@ -332,7 +344,7 @@ miopenBatchNormalizationForwardTraining_V2(miopenHandle_t handle,
                         : miopen::deref(yDesc),
             DataCast(y),
             miopen::deref(scaleDesc),
-            miopen::deref(BiasDesc),
+            miopen::deref(biasDesc),
             miopen::deref(savedMeanDesc),
             miopen::deref(savedVarianceDesc),
             DataCast(bnScale),
@@ -360,7 +372,7 @@ miopenBatchNormalizationBackward_V2(miopenHandle_t handle,
                                     const miopenTensorDescriptor_t dxDesc,
                                     void* dx,
                                     const miopenTensorDescriptor_t scaleDesc,
-                                    const miopenTensorDescriptor_t BiasDesc,
+                                    const miopenTensorDescriptor_t biasDesc,
                                     const miopenTensorDescriptor_t savedMeanDesc,
                                     const miopenTensorDescriptor_t savedVarianceDesc,
                                     const void* bnScale,
@@ -379,7 +391,7 @@ miopenBatchNormalizationBackward_V2(miopenHandle_t handle,
                         dxDesc,
                         dx,
                         scaleDesc,
-                        BiasDesc,
+                        biasDesc,
                         savedMeanDesc,
                         savedVarianceDesc,
                         bnScale,
@@ -389,6 +401,9 @@ miopenBatchNormalizationBackward_V2(miopenHandle_t handle,
                         savedMean,
                         savedInvVariance);
     miopen::debug::LogCmdBNorm(xDesc,
+                               dyDesc,
+                               scaleDesc,
+                               biasDesc,
                                savedMeanDesc,
                                bn_mode,
                                nullptr,
@@ -417,7 +432,7 @@ miopenBatchNormalizationBackward_V2(miopenHandle_t handle,
                         : miopen::deref(dxDesc),
             DataCast(dx),
             miopen::deref(scaleDesc),
-            miopen::deref(BiasDesc),
+            miopen::deref(biasDesc),
             miopen::deref(savedMeanDesc),
             miopen::deref(savedVarianceDesc),
             DataCast(bnScale),

diff --git a/src/batchnorm/problem_description.cpp b/src/batchnorm/problem_description.cpp
@@ -67,7 +67,7 @@ NetworkConfig ProblemDescription::MakeForwardTrainingNetworkConfig() const
     size_t ygridsize = 1;
 
     bool bfpmixparm = false;
-    if(xDesc.GetType() == miopenHalf && GetBnScaleBiasMeanVarDesc().GetType() == miopenFloat)
+    if(IsMix())
     {
         bfpmixparm = true;
     }
@@ -137,7 +137,7 @@ NetworkConfig ProblemDescription::MakeForwardTrainingNetworkConfig() const
             ss << "fp16" << static_cast<int>(IsFp16());
             ss << "fp32" << static_cast<int>(IsFp32());
             ss << "fp64" << static_cast<int>(IsFp64());
-            ss << "fbf16" << static_cast<int>(IsBfp16());
+            ss << "fbf16" << static_cast<int>(IsBFp16());
             ss << "fmix" << static_cast<int>(IsMix());
             ss << "c" << c;
         }
@@ -154,7 +154,7 @@ NetworkConfig ProblemDescription::MakeForwardTrainingNetworkConfig() const
             ss << "fp16" << static_cast<int>(IsFp16());
             ss << "fp32" << static_cast<int>(IsFp32());
             ss << "fp64" << static_cast<int>(IsFp64());
-            ss << "fbf16" << static_cast<int>(IsBfp16());
+            ss << "fbf16" << static_cast<int>(IsBFp16());
             ss << "fmix" << static_cast<int>(IsMix());
             ss << "single" << static_cast<int>(single);
             ss << "n" << n;
@@ -173,7 +173,7 @@ NetworkConfig ProblemDescription::MakeForwardTrainingNetworkConfig() const
         ss << "fp16" << static_cast<int>(IsFp16());
         ss << "fp32" << static_cast<int>(IsFp32());
         ss << "fp64" << static_cast<int>(IsFp64());
-        ss << "fbf16" << static_cast<int>(IsBfp16());
+        ss << "fbf16" << static_cast<int>(IsBFp16());
         ss << "fmix" << static_cast<int>(IsMix());
         ss << "gx" << xgridsize;
         ss << "gy" << ygridsize;
@@ -203,7 +203,7 @@ NetworkConfig ProblemDescription::MakeForwardInferenceNetworkConfig() const
     ss << "fp16" << static_cast<int>(IsFp16());
     ss << "fp32" << static_cast<int>(IsFp32());
     ss << "fp64" << static_cast<int>(IsFp64());
-    ss << "fbf16" << static_cast<int>(IsBfp16());
+    ss << "fbf16" << static_cast<int>(IsBFp16());
     ss << "fmix" << static_cast<int>(IsMix());
     ss << "mode" << bn_mode;
     ss << "HWdims" << in_cstride;
@@ -218,7 +218,7 @@ NetworkConfig ProblemDescription::MakeBackwardNetworkConfig() const
     std::ostringstream ss;
 
     bool bfpmixparm = false;
-    if(xDesc.GetType() == miopenHalf && GetScaleBiasDiffDesc().GetType() == miopenFloat)
+    if(xDesc.GetType() == miopenHalf && GetBnScale().GetType() == miopenFloat)
     {
         bfpmixparm = true;
     }
@@ -311,7 +311,7 @@ NetworkConfig ProblemDescription::MakeBackwardNetworkConfig() const
         ss << "fp16" << static_cast<int>(IsFp16());
         ss << "fp32" << static_cast<int>(IsFp32());
         ss << "fp64" << static_cast<int>(IsFp64());
-        ss << "fbf16" << static_cast<int>(IsBfp16());
+        ss << "fbf16" << static_cast<int>(IsBFp16());
         ss << "fmix" << static_cast<int>(IsMix());
         ss << "single" << static_cast<int>(single);
         ss << "gcn" << ldsgcn;
@@ -334,7 +334,7 @@ NetworkConfig ProblemDescription::MakeBackwardNetworkConfig() const
         ss << "fp16" << static_cast<int>(IsFp16());
         ss << "fp32" << static_cast<int>(IsFp32());
         ss << "fp64" << static_cast<int>(IsFp64());
-        ss << "fbf16" << static_cast<int>(IsBfp16());
+        ss << "fbf16" << static_cast<int>(IsBFp16());
         ss << "fmix" << static_cast<int>(IsMix());
         ss << "nhw" << in_nhw;
     }

diff --git a/src/driver_arguments.cpp b/src/driver_arguments.cpp
@@ -64,27 +64,105 @@ void ConvDataType(std::stringstream& ss, const miopen::TensorDescriptor& desc)
 // We choose scaleMean because its a accumulator type.
 void BnDataType(std::stringstream& ss,
                 const miopen::TensorDescriptor& xDesc,
-                const miopen::TensorDescriptor& sMeanDesc)
+                const miopen::TensorDescriptor& yDesc,
+                const miopen::TensorDescriptor& scaleDesc,
+                const miopen::TensorDescriptor& biasDesc,
+                const miopen::TensorDescriptor& sMeanDesc,
+                const BatchNormDirection_t bn_mode)
 {
-    if(xDesc.GetType() == miopenHalf && sMeanDesc.GetType() == miopenHalf)
+    if(bn_mode == BatchNormDirection_t::ForwardInference ||
+       bn_mode == BatchNormDirection_t::ForwardTraining)
     {
-        ss << "bnormfp16";
-    }
-    else if(xDesc.GetType() == miopenBFloat16 && sMeanDesc.GetType() == miopenBFloat16)
-    {
-        ss << "bnormbfp16";
-    }
-    else if(xDesc.GetType() == miopenHalf && sMeanDesc.GetType() == miopenFloat)
-    {
-        ss << "bnormfp16fp32";
+        if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf &&
+           scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat &&
+           sMeanDesc.GetType() == miopenFloat)
+        {
+            ss << "bnormfp16";
+        }
+        else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenBFloat16 &&
+                scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat &&
+                sMeanDesc.GetType() == miopenFloat)
+        {
+            ss << "bnormbfp16";
+        }
+        else if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf &&
+                scaleDesc.GetType() == miopenHalf && biasDesc.GetType() == miopenHalf &&
+                sMeanDesc.GetType() == miopenFloat)
+        {
+            ss << "bnormfp16fp32";
+        }
+        else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenBFloat16 &&
+                scaleDesc.GetType() == miopenBFloat16 && biasDesc.GetType() == miopenBFloat16 &&
+                sMeanDesc.GetType() == miopenFloat)
+        {
+            ss << "bnormbfp16fp32";
+        }
+        else
+        {
+            ss << "bnorm";
+        }
     }
-    else if(xDesc.GetType() == miopenBFloat16 && sMeanDesc.GetType() == miopenFloat)
+    else if(bn_mode == BatchNormDirection_t::ForwardTraining)
     {
-        ss << "bnormbfp16fp32";
+        if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf &&
+           scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat &&
+           sMeanDesc.GetType() == miopenFloat)
+        {
+            ss << "bnormfp16";
+        }
+        else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenBFloat16 &&
+                scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat &&
+                sMeanDesc.GetType() == miopenFloat)
+        {
+            ss << "bnormbfp16";
+        }
+        else if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf &&
+                scaleDesc.GetType() == miopenHalf && biasDesc.GetType() == miopenHalf &&
+                sMeanDesc.GetType() == miopenFloat)
+        {
+            ss << "bnormfp16fp32";
+        }
+        else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenBFloat16 &&
+                scaleDesc.GetType() == miopenBFloat16 && biasDesc.GetType() == miopenBFloat16 &&
+                sMeanDesc.GetType() == miopenFloat)
+        {
+            ss << "bnormbfp16fp32";
+        }
+        else
+        {
+            ss << "bnorm";
+        }
     }
-    else
+    else if(bn_mode == BatchNormDirection_t::Backward)
     {
-        ss << "bnorm";
+        if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenHalf &&
+           scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat &&
+           sMeanDesc.GetType() == miopenFloat)
+        {
+            ss << "bnormfp16";
+        }
+        else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenBFloat16 &&
+                scaleDesc.GetType() == miopenFloat && biasDesc.GetType() == miopenFloat &&
+                sMeanDesc.GetType() == miopenFloat)
+        {
+            ss << "bnormbfp16";
+        }
+        else if(xDesc.GetType() == miopenHalf && yDesc.GetType() == miopenFloat &&
+                scaleDesc.GetType() == miopenHalf && biasDesc.GetType() == miopenFloat &&
+                sMeanDesc.GetType() == miopenFloat)
+        {
+            ss << "bnormfp16fp32";
+        }
+        else if(xDesc.GetType() == miopenBFloat16 && yDesc.GetType() == miopenFloat &&
+                scaleDesc.GetType() == miopenBFloat16 && biasDesc.GetType() == miopenFloat &&
+                sMeanDesc.GetType() == miopenFloat)
+        {
+            ss << "bnormbfp16fp32";
+        }
+        else
+        {
+            ss << "bnorm";
+        }
     }
 }
 
@@ -228,6 +306,9 @@ std::string ConvArgsForMIOpenDriver(const miopen::TensorDescriptor& xDesc,
 }
 
 std::string BnormArgsForMIOpenDriver(const miopenTensorDescriptor_t xDesc,
+                                     const miopenTensorDescriptor_t yDesc,
+                                     const miopenTensorDescriptor_t scaleDesc,
+                                     const miopenTensorDescriptor_t biasDesc,
                                      const miopenTensorDescriptor_t sMeanDesc,
                                      miopenBatchNormMode_t bn_mode,
                                      const void* resultRunningMean,
@@ -241,7 +322,13 @@ std::string BnormArgsForMIOpenDriver(const miopenTensorDescriptor_t xDesc,
     miopenGetTensorDescriptorSize(xDesc, &size);
     std::stringstream ss;
     if(print_for_bn_driver)
-        BnDataType(ss, miopen::deref(xDesc), miopen::deref(sMeanDesc));
+        BnDataType(ss,
+                   miopen::deref(xDesc),
+                   miopen::deref(yDesc),
+                   miopen::deref(scaleDesc),
+                   miopen::deref(biasDesc),
+                   miopen::deref(sMeanDesc),
+                   dir);
 
     ss << " -n " << miopen::deref(xDesc).GetLengths()[0] // clang-format off
             << " -c " << miopen::deref(xDesc).GetLengths()[1];

diff --git a/src/fusion.cpp b/src/fusion.cpp
@@ -389,15 +389,18 @@ std::string LogCmdBnormFusion(const miopenFusionPlanDescriptor_t fusePlanDesc, i
 
     if(bn_op != nullptr)
     {
-        str += BnormArgsForMIOpenDriver(&bn_op->input_desc,
-                                        &bn_op->base_desc,
-                                        bn_op->mode,
-                                        nullptr,
-                                        nullptr,
-                                        nullptr,
-                                        nullptr,
-                                        miopen::debug::BatchNormDirection_t::ForwardInference,
-                                        false);
+        // str += BnormArgsForMIOpenDriver(&bn_op->input_desc,
+        //                                 &bn_op->base_desc,
+        //                                 nullptr,
+        //                                 nullptr,
+        //                                 nullptr,
+        //                                 bn_op->mode,
+        //                                 nullptr,
+        //                                 nullptr,
+        //                                 nullptr,
+        //                                 nullptr,
+        //                                 miopen::debug::BatchNormDirection_t::ForwardInference,
+        //                                 false);
     }
     else
     {