diff --git a/src/ocl/batchnormocl.cpp b/src/ocl/batchnormocl.cpp index 7988ca1e98..008d411651 100644 --- a/src/ocl/batchnormocl.cpp +++ b/src/ocl/batchnormocl.cpp @@ -990,7 +990,9 @@ void BatchNormBackward(Handle& handle, ctx.use_asm_kernels && ctx.rmv.IsV2orV3() && (StartsWith(handle.GetDeviceName(), "gfx8") || (StartsWith(handle.GetDeviceName(), "gfx9") && - (handle.GetDeviceName() != "gfx90a")))) + (handle.GetDeviceName() != "gfx90a"))) && + (!handle.GetTargetProperties().Xnack() || + !*handle.GetTargetProperties().Xnack())) { kernel_name = "miopenGcnAsmBNBwdTrainSpatial"; program_name = "gcnAsmBNBwdTrainSpatial.s"; diff --git a/src/solver/conv_MP_bidirectional_winograd.cpp b/src/solver/conv_MP_bidirectional_winograd.cpp index 2fc0d8a86d..d7e68c2049 100644 --- a/src/solver/conv_MP_bidirectional_winograd.cpp +++ b/src/solver/conv_MP_bidirectional_winograd.cpp @@ -188,6 +188,10 @@ inline bool IsApplicableTransform(const ConvolutionContext& params) if(!(params.IsFp32() || params.IsFp16())) return false; + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const std::string name = params.GetStream().GetDeviceName(); if(!StartsWith(name, "gfx9") || name == "gfx90a") return false; diff --git a/src/solver/conv_asm_1x1u.cpp b/src/solver/conv_asm_1x1u.cpp index 6f894516b5..da8dc5b53e 100644 --- a/src/solver/conv_asm_1x1u.cpp +++ b/src/solver/conv_asm_1x1u.cpp @@ -393,6 +393,10 @@ bool ConvAsm1x1U::IsApplicable(const ConvolutionContext& params) const if(!(params.IsFp32() || params.IsFp16())) return false; + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const std::string name = params.GetStream().GetDeviceName(); if(name.find("gfx9") == std::string::npos) { diff --git a/src/solver/conv_asm_1x1u_stride2.cpp b/src/solver/conv_asm_1x1u_stride2.cpp index 55dbdcad0d..a314433e1e 100644 --- a/src/solver/conv_asm_1x1u_stride2.cpp +++ b/src/solver/conv_asm_1x1u_stride2.cpp @@ -488,6 +488,11 @@ bool ConvAsm1x1UV2::IsApplicable(const ConvolutionContext& params) const return false; if(!params.IsFp32()) return false; + + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const std::string name = params.GetStream().GetDeviceName(); if(name.find("gfx8") == std::string::npos && name.find("gfx9") == std::string::npos) { diff --git a/src/solver/conv_asm_3x3u.cpp b/src/solver/conv_asm_3x3u.cpp index 122911bf89..bc59232621 100644 --- a/src/solver/conv_asm_3x3u.cpp +++ b/src/solver/conv_asm_3x3u.cpp @@ -185,6 +185,11 @@ bool ConvAsm3x3U::IsApplicable(const ConvolutionContext& params) const return false; if(!params.rmv.IsV2orV3()) return false; + + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const std::string name = params.GetStream().GetDeviceName(); if(!(StartsWith(name, "gfx8") || StartsWith(name, "gfx9")) || name == "gfx90a") return false; diff --git a/src/solver/conv_asm_5x10u2v2b1.cpp b/src/solver/conv_asm_5x10u2v2b1.cpp index 3dce2ce1d1..dd468b7fe4 100644 --- a/src/solver/conv_asm_5x10u2v2b1.cpp +++ b/src/solver/conv_asm_5x10u2v2b1.cpp @@ -47,6 +47,10 @@ bool ConvAsm5x10u2v2b1::IsApplicable(const ConvolutionContext& params) const if(!params.rmv.IsV2orV3()) return false; + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const std::string name = params.GetStream().GetDeviceName(); const bool device_is_gfx8_9_no_xnack = (name == "gfx800" || name == "gfx802" || name == "gfx803" || name == "gfx804" || diff --git a/src/solver/conv_asm_5x10u2v2f1.cpp b/src/solver/conv_asm_5x10u2v2f1.cpp index 526460ba8c..05ad84f03a 100644 --- a/src/solver/conv_asm_5x10u2v2f1.cpp +++ b/src/solver/conv_asm_5x10u2v2f1.cpp @@ -48,6 +48,10 @@ bool ConvAsm5x10u2v2f1::IsApplicable(const ConvolutionContext& params) const if(!params.rmv.IsV2orV3()) return false; + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const std::string name = params.GetStream().GetDeviceName(); const bool device_is_gfx8_9_no_xnack = (name == "gfx800" || name == "gfx802" || name == "gfx803" || name == "gfx804" || diff --git a/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp b/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp index c4de714289..c542b7923c 100644 --- a/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp +++ b/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp @@ -48,6 +48,10 @@ bool ConvAsm7x7c3h224w224k64u2v2p3q3f1::IsApplicable(const ConvolutionContext& p if(!params.rmv.IsV2orV3()) return false; + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const std::string name = params.GetStream().GetDeviceName(); if(!(name == "gfx800" || name == "gfx802" || name == "gfx803" || name == "gfx804" || name == "gfx900" || name == "gfx904" || name == "gfx906" || name == "gfx908")) diff --git a/src/solver/conv_asm_dir_BwdWrW1x1.cpp b/src/solver/conv_asm_dir_BwdWrW1x1.cpp index 333cab568a..977387c24b 100644 --- a/src/solver/conv_asm_dir_BwdWrW1x1.cpp +++ b/src/solver/conv_asm_dir_BwdWrW1x1.cpp @@ -479,6 +479,10 @@ bool ConvAsmBwdWrW1x1::IsApplicable(const ConvolutionContext& params) const if(!params.rmv.IsV2orV3()) return false; + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const std::string name = params.GetStream().GetDeviceName(); if(name.find("gfx8") == std::string::npos && name.find("gfx9") == std::string::npos) { diff --git a/src/solver/conv_asm_dir_BwdWrW3x3.cpp b/src/solver/conv_asm_dir_BwdWrW3x3.cpp index d22c5f526f..5a7215f913 100644 --- a/src/solver/conv_asm_dir_BwdWrW3x3.cpp +++ b/src/solver/conv_asm_dir_BwdWrW3x3.cpp @@ -351,6 +351,11 @@ bool ConvAsmBwdWrW3x3::IsApplicable(const ConvolutionContext& params) const return false; if(!params.rmv.IsV2orV3()) return false; + + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const std::string name = params.GetStream().GetDeviceName(); if(!(StartsWith(name, "gfx8") || StartsWith(name, "gfx9")) || name == "gfx90a") return false; diff --git a/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp index 4290273271..24f999b397 100644 --- a/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp @@ -159,6 +159,10 @@ bool ConvAsmImplicitGemmV4R1DynamicBwd::IsApplicable(const ConvolutionContext& c return false; } + const auto target = ctx.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + std::string kernel_name; int block_size; int grid_size; diff --git a/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp b/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp index 51c6ea6b1c..655577ebc0 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp @@ -1006,6 +1006,10 @@ bool ConvAsmImplicitGemmGTCDynamicBwdXdlops::IsApplicable(const ConvolutionConte return false; } + const auto target = ctx.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + bool isValid; std::tie(isValid, std::ignore, std::ignore, std::ignore, std::ignore) = FindImplicitGemmGtcDynamicBwdKernel(ctx); diff --git a/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp index 1a4a4d733a..b8cd3786df 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp @@ -632,21 +632,10 @@ bool ConvAsmImplicitGemmGTCDynamicBwdXdlopsNHWC::IsApplicable(const ConvolutionC if(!ctx.IsLayoutNHWC()) return false; - const auto k = ctx.n_inputs; - const auto c = ctx.n_outputs; - const auto group = ctx.group_counts; - - if(ctx.IsFp32() && (k / group) % 4 != 0) - return false; // gemm_k limitation for fp32 - - if(ctx.IsFp16() && (k / group) % 16 != 0) - return false; // gemm_k limitation for fp16 + const auto target = ctx.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; // NOLINT (readability-simplify-boolean-expr) - if(ctx.IsFp16()) - { - if((c / group) % 2 != 0) - return false; // vector store limitation - } return true; } ConvSolution ConvAsmImplicitGemmGTCDynamicBwdXdlopsNHWC::GetSolution( diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp index 9e2dfea7ec..fe6dca8569 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp @@ -1529,6 +1529,10 @@ bool ConvAsmImplicitGemmGTCDynamicFwdXdlops::IsApplicable(const ConvolutionConte return false; } + const auto target = ctx.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + bool isValid; std::tie(isValid, std::ignore, std::ignore, std::ignore, std::ignore) = FindImplicitGemmGtcDynamicFwdKernel(ctx); diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp index 52caec72c1..9c96bb2106 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp @@ -583,6 +583,11 @@ bool ConvAsmImplicitGemmGTCDynamicFwdXdlopsNHWC::IsApplicable(const ConvolutionC if(!ctx.IsLayoutNHWC()) return false; + + const auto target = ctx.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; // NOLINT (readability-simplify-boolean-expr) + return true; } ConvSolution ConvAsmImplicitGemmGTCDynamicFwdXdlopsNHWC::GetSolution( diff --git a/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp index c3872d1264..b1c55a2129 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp @@ -616,6 +616,11 @@ bool ConvAsmImplicitGemmGTCDynamicWrwXdlopsNHWC::IsApplicable(const ConvolutionC if(!ctx.IsLayoutNHWC()) return false; + + const auto target = ctx.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; // NOLINT (readability-simplify-boolean-expr) + return true; } diff --git a/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp index 6a69eb4d91..3b033eb100 100644 --- a/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp @@ -303,6 +303,10 @@ bool ConvAsmImplicitGemmV4R1DynamicFwd::IsApplicable(const ConvolutionContext& c { return false; } + + const auto target = ctx.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; auto tunables = GetImplicitGemmV4R1DynamicTunables(); return !std::none_of( tunables.begin(), tunables.end(), [&](auto tunable) { return tunable.IsValid(ctx); }); @@ -342,6 +346,10 @@ bool ConvAsmImplicitGemmV4R1DynamicFwd_1x1::IsApplicable(const ConvolutionContex { return false; } + + const auto target = ctx.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; auto tunables = GetImplicitGemmV4R1DynamicTunables(); return !std::none_of( tunables.begin(), tunables.end(), [&](auto tunable) { return tunable.IsValid(ctx); }); diff --git a/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp b/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp index 7ac4547cf6..ec4689e84c 100644 --- a/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp +++ b/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp @@ -836,6 +836,10 @@ bool ConvAsmImplicitGemmGTCDynamicWrwXdlops::IsApplicable(const ConvolutionConte { return false; } + + const auto target = ctx.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; bool is_valid; std::tie(is_valid, std::ignore, std::ignore, std::ignore, std::ignore) = FindImplicitGemmWrwGTCDynamicXdlopsKernel(ctx); diff --git a/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp index ba769215db..87e87fa907 100644 --- a/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp @@ -329,6 +329,10 @@ bool ConvAsmImplicitGemmV4R1DynamicWrw::IsApplicable(const ConvolutionContext& c { return false; } + + const auto target = ctx.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; std::string kernel_name; int block_size; int grid_size; diff --git a/src/solver/conv_bin_wino3x3U.cpp b/src/solver/conv_bin_wino3x3U.cpp index b95679a563..91e51a9bbe 100644 --- a/src/solver/conv_bin_wino3x3U.cpp +++ b/src/solver/conv_bin_wino3x3U.cpp @@ -51,6 +51,10 @@ bool ConvBinWinograd3x3U::IsApplicable(const ConvolutionContext& params) const if(!(params.rmv.IsV2orV3() && params.use_asm_kernels)) return false; + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const auto name = params.GetStream().GetDeviceName(); if(!(name == "gfx803" || name == "gfx900" || name == "gfx906" || name == "gfx908")) return false; diff --git a/src/solver/conv_bin_winoRxS.cpp b/src/solver/conv_bin_winoRxS.cpp index 40c6e6d48f..4b12d0fb4e 100644 --- a/src/solver/conv_bin_winoRxS.cpp +++ b/src/solver/conv_bin_winoRxS.cpp @@ -240,6 +240,10 @@ bool ConvBinWinogradRxS::IsApplicable(const ConvolutionContext& params) const if(!params.rmv.IsV2orV3()) return false; + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const auto name = params.GetStream().GetDeviceName(); const bool fp16 = params.IsFp16(); if(fp16) diff --git a/src/solver/conv_multipass_wino3x3WrW.cpp b/src/solver/conv_multipass_wino3x3WrW.cpp index 646bccf0de..3df3280b2e 100644 --- a/src/solver/conv_multipass_wino3x3WrW.cpp +++ b/src/solver/conv_multipass_wino3x3WrW.cpp @@ -431,6 +431,10 @@ bool ConvWinograd3x3MultipassWrW return false; } + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + if(!(InTransform::IsApplicable(params) && OutTransform::IsApplicable(params) && FilterTransform::IsApplicable(params))) diff --git a/src/solver/conv_winoRxS_f2x3.cpp b/src/solver/conv_winoRxS_f2x3.cpp index 274fbd10c1..497340c8ac 100644 --- a/src/solver/conv_winoRxS_f2x3.cpp +++ b/src/solver/conv_winoRxS_f2x3.cpp @@ -44,8 +44,6 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1) -#define WORKAROUND_ISSUE_1093 1 - #define WINODATA 2 #define WINOFILTER 3 #define MAX_CU_LIMIT 512 @@ -453,12 +451,12 @@ static bool IsApplicableBase(const ConvolutionContext& params) if(!params.rmv.IsV3()) return false; + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const auto name = params.GetStream().GetDeviceName(); -#if WORKAROUND_ISSUE_1093 - if(!(StartsWith(name, "gfx9") || StartsWith(name, "gfx10")) || name == "gfx90a") -#else if(!(StartsWith(name, "gfx9") || StartsWith(name, "gfx10"))) -#endif return false; if(params.IsFp16() && !(StartsWith(name, "gfx906") || StartsWith(name, "gfx908") || StartsWith(name, "gfx1011") || diff --git a/src/solver/conv_winoRxS_f3x2.cpp b/src/solver/conv_winoRxS_f3x2.cpp index 63dd7cf611..7b21b7d752 100644 --- a/src/solver/conv_winoRxS_f3x2.cpp +++ b/src/solver/conv_winoRxS_f3x2.cpp @@ -314,6 +314,10 @@ bool ConvBinWinogradRxSf3x2::IsApplicable(const ConvolutionContext& params) cons if(!params.IsLayoutDefault()) return false; + const auto target = params.GetStream().GetTargetProperties(); + if(target.Xnack() && *target.Xnack()) + return false; + const auto max_cu = params.GetStream().GetMaxHardwareComputeUnits(); if(max_cu > MAX_CU_LIMIT) return false; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 82c49c5705..87188b1e16 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -47,7 +47,6 @@ option( WORKAROUND_ISSUE_898 "" ON) option( WORKAROUND_ISSUE_936 "" ON) option( WORKAROUND_ISSUE_1053 "" ON) option( WORKAROUND_ISSUE_1064 "" ON) -option( WORKAROUND_ISSUE_1093 "" ON) option( WORKAROUND_ISSUE_1095 "" ON) # Run the test suite to a depth limit @@ -198,9 +197,6 @@ if(MIOPEN_TEST_GFX908) endif() if (MIOPEN_TEST_GFX90a) - if(WORKAROUND_ISSUE_1093) - list(APPEND SKIP_TESTS test_find_db test_main test_immed_conv2d) - endif() if(WORKAROUND_ISSUE_1095) list(APPEND SKIP_TESTS test_dropout) endif()