From bdbaf2e13801ac4471689a782c49a9809bceb4f3 Mon Sep 17 00:00:00 2001 From: atamazov Date: Thu, 23 Mar 2023 23:44:53 +0300 Subject: [PATCH 1/4] wa-issue-2038(01) Disable BF16 in ConvHipImplicitGemmV4R1Fwd for MI100/200 and all new targets. --- src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp index e8c78cab3e..65853ad45f 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp @@ -34,6 +34,8 @@ #include +#define WORKAROUND_ISSUE_2038 1 + MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R1) @@ -61,10 +63,20 @@ bool ConvHipImplicitGemmV4R1Fwd::IsApplicable(const ConvolutionContext& ctx, return false; if(!problem.IsLayoutDefault()) return false; - if(ctx.GetStream().GetDeviceName() == "gfx90a" && - problem.conv_problem.IsGfx90aFp16altRequired()) + const auto device_name = ctx.GetStream().GetDeviceName(); + if(device_name == "gfx90a" && problem.conv_problem.IsGfx90aFp16altRequired()) return false; +#if WORKAROUND_ISSUE_2038 + if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1{})) + if(problem.IsBfp16()) + // Explicitly enable all currently known GPUs except xDLOPs ones, which also means that + // all new GPUs will be disabled by default. + if(!(device_name == "gfx803" || device_name == "gfx900" || device_name == "gfx906" || + StartsWith(device_name, "gfx103"))) + return false; +#endif + std::size_t n = problem.batch_sz; std::size_t k = problem.n_outputs / problem.group_counts; std::size_t c = problem.n_inputs / problem.group_counts; From e84170cc5a9e4b5b43f84f1a1ad9809eec59df86 Mon Sep 17 00:00:00 2001 From: atamazov Date: Thu, 23 Mar 2023 23:58:44 +0300 Subject: [PATCH 2/4] wa-issue-2038(02) [tests] Do not smoke test ConvHipImplicitGemmV4R1Fwd with BF16 on MI100/200. --- test/CMakeLists.txt | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3cec8e6594..00df0ad62f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2069,18 +2069,33 @@ add_custom_test(smoke_solver_ConvHipImplicitGemmBwdDataV1R1 GFX103X_ENABLED TEST # MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1=1 is necessary due to WORKAROUND_iGemm_936 in Jenkinsfile, # which disables ConvHipImplicitGemmV4R1Fwd, but we still want to check that the solver is not broken. -add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1 GFX103X_ENABLED HALF_ENABLED BF16_ENABLED TEST_TUNING +add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1Fwd_fp32_fp16 GFX103X_ENABLED HALF_ENABLED TEST_TUNING COMMAND MIOPEN_FIND_ENFORCE=SEARCH_DB_UPDATE MIOPEN_DEBUG_TUNING_ITERATIONS_MAX=5 MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1=1 MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL=0 MIOPEN_FIND_MODE=normal MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmV4R1Fwd $ ${TEST_CONV_VERBOSE_F} --input 256 32 27 27 --weights 128 32 1 1 --pads_strides_dilations 0 0 1 1 1 1 ${MIOPEN_TEST_FLAGS_ARGS} +) + +add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1WrW GFX103X_ENABLED HALF_ENABLED BF16_ENABLED TEST_TUNING COMMAND MIOPEN_FIND_ENFORCE=SEARCH_DB_UPDATE MIOPEN_DEBUG_TUNING_ITERATIONS_MAX=5 MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL=0 MIOPEN_FIND_MODE=normal MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmV4R1WrW $ ${TEST_CONV_VERBOSE_W} --input 64 64 55 55 --weights 64 64 1 1 --pads_strides_dilations 0 0 1 1 1 1 ${MIOPEN_TEST_FLAGS_ARGS} ) +# MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1=1 is necessary due to WORKAROUND_iGemm_936 in Jenkinsfile, +# which disables ConvHipImplicitGemmV4R1Fwd, but we still want to check that the solver is not broken. +# smoke_solver_ConvHipImplicitGemmV4R1Fwd is split to BF16 and FP32+FP16 tests because of +# WORKAROUND_ISSUE_2038, which disables GFX908 and GFX90A for BF16. +add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1Fwd_bf16 GFX908_DISABLED GFX90A_DISABLED GFX103X_ENABLED FLOAT_DISABLED BF16_ENABLED TEST_TUNING + COMMAND MIOPEN_FIND_ENFORCE=SEARCH_DB_UPDATE MIOPEN_DEBUG_TUNING_ITERATIONS_MAX=5 + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1=1 + MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL=0 + MIOPEN_FIND_MODE=normal MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmV4R1Fwd $ + ${TEST_CONV_VERBOSE_F} --input 256 32 27 27 --weights 128 32 1 1 --pads_strides_dilations 0 0 1 1 1 1 ${MIOPEN_TEST_FLAGS_ARGS} +) + # MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1=1 is necessary due to WORKAROUND_SWDEV_229277_227616_229195, # which disables ConvHipImplicitGemmBwdDataV4R1, but we still want to check that the solver is not broken. add_custom_test(smoke_solver_ConvHipImplicitGemmBwdDataV4R1 GFX103X_ENABLED TEST_TUNING From 9ee84e088a87edb0645e82130959e1c39eea172a Mon Sep 17 00:00:00 2001 From: Jun Liu Date: Thu, 23 Mar 2023 22:06:37 -0700 Subject: [PATCH 3/4] Disable FP16 in ConvHipImplicitGemmV4R1Fwd for MI100/200 and all new targets. --- src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp | 2 +- test/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp index 65853ad45f..852eb7000d 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp @@ -69,7 +69,7 @@ bool ConvHipImplicitGemmV4R1Fwd::IsApplicable(const ConvolutionContext& ctx, #if WORKAROUND_ISSUE_2038 if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1{})) - if(problem.IsBfp16()) + if(problem.IsBfp16() || problem.IsFp16()) // Explicitly enable all currently known GPUs except xDLOPs ones, which also means that // all new GPUs will be disabled by default. if(!(device_name == "gfx803" || device_name == "gfx900" || device_name == "gfx906" || diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 00df0ad62f..7dc7272331 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2069,7 +2069,7 @@ add_custom_test(smoke_solver_ConvHipImplicitGemmBwdDataV1R1 GFX103X_ENABLED TEST # MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1=1 is necessary due to WORKAROUND_iGemm_936 in Jenkinsfile, # which disables ConvHipImplicitGemmV4R1Fwd, but we still want to check that the solver is not broken. -add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1Fwd_fp32_fp16 GFX103X_ENABLED HALF_ENABLED TEST_TUNING +add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1Fwd_fp32 GFX103X_ENABLED TEST_TUNING COMMAND MIOPEN_FIND_ENFORCE=SEARCH_DB_UPDATE MIOPEN_DEBUG_TUNING_ITERATIONS_MAX=5 MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1=1 MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL=0 From 9538f39db8d169a7b88280855f60fa428fef026d Mon Sep 17 00:00:00 2001 From: atamazov Date: Fri, 24 Mar 2023 15:02:24 +0300 Subject: [PATCH 4/4] wa-issue-2038(03) Disable also FP16. --- src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp | 2 +- test/CMakeLists.txt | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp index 65853ad45f..852eb7000d 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp @@ -69,7 +69,7 @@ bool ConvHipImplicitGemmV4R1Fwd::IsApplicable(const ConvolutionContext& ctx, #if WORKAROUND_ISSUE_2038 if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1{})) - if(problem.IsBfp16()) + if(problem.IsBfp16() || problem.IsFp16()) // Explicitly enable all currently known GPUs except xDLOPs ones, which also means that // all new GPUs will be disabled by default. if(!(device_name == "gfx803" || device_name == "gfx900" || device_name == "gfx906" || diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 00df0ad62f..0a696bfcc4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -2069,7 +2069,7 @@ add_custom_test(smoke_solver_ConvHipImplicitGemmBwdDataV1R1 GFX103X_ENABLED TEST # MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1=1 is necessary due to WORKAROUND_iGemm_936 in Jenkinsfile, # which disables ConvHipImplicitGemmV4R1Fwd, but we still want to check that the solver is not broken. -add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1Fwd_fp32_fp16 GFX103X_ENABLED HALF_ENABLED TEST_TUNING +add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1Fwd_fp32 GFX103X_ENABLED TEST_TUNING COMMAND MIOPEN_FIND_ENFORCE=SEARCH_DB_UPDATE MIOPEN_DEBUG_TUNING_ITERATIONS_MAX=5 MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1=1 MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL=0 @@ -2086,9 +2086,9 @@ add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1WrW GFX103X_ENABLED HALF_ENA # MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1=1 is necessary due to WORKAROUND_iGemm_936 in Jenkinsfile, # which disables ConvHipImplicitGemmV4R1Fwd, but we still want to check that the solver is not broken. -# smoke_solver_ConvHipImplicitGemmV4R1Fwd is split to BF16 and FP32+FP16 tests because of -# WORKAROUND_ISSUE_2038, which disables GFX908 and GFX90A for BF16. -add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1Fwd_bf16 GFX908_DISABLED GFX90A_DISABLED GFX103X_ENABLED FLOAT_DISABLED BF16_ENABLED TEST_TUNING +# smoke_solver_ConvHipImplicitGemmV4R1Fwd is split to FP16+BF16 and FP32 tests because of +# WORKAROUND_ISSUE_2038, which disables GFX908 and GFX90A for FP16 and BF16. +add_custom_test(smoke_solver_ConvHipImplicitGemmV4R1Fwd_fp16_bf16 GFX908_DISABLED GFX90A_DISABLED GFX103X_ENABLED FLOAT_DISABLED HALF_ENABLED BF16_ENABLED TEST_TUNING COMMAND MIOPEN_FIND_ENFORCE=SEARCH_DB_UPDATE MIOPEN_DEBUG_TUNING_ITERATIONS_MAX=5 MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1=1 MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL=0