diff --git a/src/convolution.cpp b/src/convolution.cpp index 06f8a39718..dd5210f20f 100644 --- a/src/convolution.cpp +++ b/src/convolution.cpp @@ -447,7 +447,11 @@ std::size_t ConvolutionDescriptor::ForwardGetWorkSpaceSize(Handle& handle, miopenConvSolution_t sol; GetForwardSolutions(handle, wDesc, xDesc, yDesc, 1, &count, &sol); if(count < 1 || (fm.IsHybrid() && sol.time < 0)) + { + ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage = + fm.IsOptimizedHybrid(); break; // Fall down to Normal Find. + } MIOPEN_LOG_I2(sol.workspace_size); return sol.workspace_size; } diff --git a/src/find_controls.cpp b/src/find_controls.cpp index 9d09a65e34..5ac543647d 100644 --- a/src/find_controls.cpp +++ b/src/find_controls.cpp @@ -194,6 +194,7 @@ const char* ToCString(const FindMode::Values mode) case FindMode::Values::Normal: return "NORMAL"; case FindMode::Values::Fast: return "FAST"; case FindMode::Values::Hybrid: return "HYBRID"; + case FindMode::Values::OptimizedHybrid: return "OPTIMIZED_HYBRID"; case FindMode::Values::End_: break; } return "<Unknown>"; @@ -218,6 +219,8 @@ FindMode::Values GetFindModeValueImpl2() return FindMode::Values::Fast; else if(str == "HYBRID") return FindMode::Values::Hybrid; + else if(str == "OPTIMIZED_HYBRID") + return FindMode::Values::OptimizedHybrid; else { // Nop. Fall down & try numerics. } diff --git a/src/include/miopen/execution_context.hpp b/src/include/miopen/execution_context.hpp old mode 100755 new mode 100644 index 26658ec3f7..5d6d4388b8 --- a/src/include/miopen/execution_context.hpp +++ b/src/include/miopen/execution_context.hpp @@ -76,7 +76,8 @@ struct ExecutionContext // to optimize the getWorkspaceSize() calls for speed. This specific optimization is correct // because Solvers shall be written so that the required workspace size does not depend on the // performance config. - bool disable_perfdb_access = false; + bool disable_perfdb_access = false; + bool skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage = false; inline Handle& GetStream() const { return *stream; } inline void SetStream(Handle* stream_) { stream = stream_; } diff --git a/src/include/miopen/find_controls.hpp b/src/include/miopen/find_controls.hpp index 8a8d7eab8f..9dda30dcf3 100644 --- a/src/include/miopen/find_controls.hpp +++ b/src/include/miopen/find_controls.hpp @@ -128,6 +128,7 @@ class FindMode Normal = Begin_, Fast, Hybrid, + OptimizedHybrid, End_, Default_ = Hybrid, }; @@ -139,7 +140,15 @@ class FindMode FindMode(const ConvolutionContext& ctx); bool IsFast() const { return value == Values::Fast && !debug::FindModeDisable; } - bool IsHybrid() const { return value == Values::Hybrid && !debug::FindModeDisable; } + bool IsHybrid() const + { + return (value == Values::Hybrid || value == Values::OptimizedHybrid) && + !debug::FindModeDisable; + } + bool IsOptimizedHybrid() const + { + return value == Values::OptimizedHybrid && !debug::FindModeDisable; + } friend std::ostream& operator<<(std::ostream&, const FindMode&); }; diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index 78dc4ef1da..67e5989680 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -223,10 +223,15 @@ ConvolutionDescriptor::FindDataImplicitGemmSolutions(Handle& handle, if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM{})) return {}; - const auto dir = isForward ? conv::Direction::Forward : conv::Direction::BackwardData; - auto ctx = ConvolutionContext{xDesc, wDesc, yDesc, *this, dir}; - ctx.do_search = exhaustiveSearch; - ctx.save_srch_req = true; + const auto dir = isForward ? conv::Direction::Forward : conv::Direction::BackwardData; + auto ctx = ConvolutionContext{xDesc, wDesc, yDesc, *this, dir}; + + const ProblemDescription problem(xDesc, wDesc, yDesc, *this, conv::Direction::BackwardData); + const miopen::FindMode fm(problem); + ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage = + fm.IsOptimizedHybrid(); + ctx.do_search = exhaustiveSearch; + ctx.save_srch_req = true; ctx.general_compile_options = ""; ctx.SetStream(&handle); ctx.SetBufs(bufs); @@ -789,6 +794,9 @@ void ConvolutionDescriptor::FindConvFwdAlgorithm(Handle& handle, } else { + ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage = + fm.IsOptimizedHybrid(); + MIOPEN_LOG_I("IsOptimizedHybrid fallback"); perf_db = UserFindDbRecord::TryLoad(handle, problem, [&](DbRecord& record) { DirConvFindCore(handle, xDesc, @@ -2138,7 +2146,7 @@ void ConvolutionDescriptor::FindConvBwdDataAlgorithm(Handle& handle, { perf_db = UserFindDbRecord::TryLoad(handle, problem, [&](DbRecord& record) { const auto network_config = problem.BuildConfKey(); - const auto invoke_ctx = conv::DataInvokeParams{ + auto invoke_ctx = conv::DataInvokeParams{ {dyDesc, dy, wDesc, w, dxDesc, dx}, workSpace, workSpaceSize}; // Winograd algo @@ -3511,6 +3519,8 @@ void ConvolutionDescriptor::FindConvBwdWeightsAlgorithm(Handle& handle, bufs.SetWrW(x, dw, dy); auto ctx = ConvolutionContext{xDesc, dwDesc, dyDesc, *this, conv::Direction::BackwardWeights}; + ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage = + fm.IsOptimizedHybrid(); ctx.do_search = exhaustiveSearch; ctx.SetStream(&handle); ctx.SetBufs(bufs); diff --git a/src/solver/conv_hip_implicit_gemm_bwd_data_v1r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_data_v1r1.cpp index a7855a6b5b..dd585e0b75 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_data_v1r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_data_v1r1.cpp @@ -635,6 +635,8 @@ size_t ConvHipImplicitGemmBwdDataV1R1::GetWorkspaceSize(const ConvolutionContext bool ConvHipImplicitGemmBwdDataV1R1::IsApplicable(const ConvolutionContext& ctx) const { + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; if(!ctx.direction.IsBackwardData()) return false; if(!ctx.use_hip_kernels) diff --git a/src/solver/conv_hip_implicit_gemm_bwd_data_v1r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_data_v1r1_xdlops.cpp index 9a016c17fa..e02b74d14e 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_data_v1r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_data_v1r1_xdlops.cpp @@ -55,6 +55,8 @@ size_t ConvHipImplicitGemmBwdDataV1R1Xdlops::GetWorkspaceSize(const ConvolutionC bool ConvHipImplicitGemmBwdDataV1R1Xdlops::IsApplicable(const ConvolutionContext& ctx) const { + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; if(!ctx.direction.IsBackwardData()) return false; if(!ctx.use_hip_kernels) diff --git a/src/solver/conv_hip_implicit_gemm_bwd_data_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_data_v4r1.cpp index 93c5acaf69..425c0570fd 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_data_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_data_v4r1.cpp @@ -733,6 +733,8 @@ ConvHipImplicitGemmBwdDataV4R1::CalculateGemmSize(const ConvolutionContext& ctx, bool ConvHipImplicitGemmBwdDataV4R1::IsApplicable(const ConvolutionContext& ctx) const { + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; #if WORKAROUND_SWDEV_229277_227616_229195 if(!IsHccCompiler()) return false; diff --git a/src/solver/conv_hip_implicit_gemm_bwd_data_v4r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_data_v4r1_xdlops.cpp index 073aa7c224..1d47305e61 100755 --- a/src/solver/conv_hip_implicit_gemm_bwd_data_v4r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_data_v4r1_xdlops.cpp @@ -590,6 +590,8 @@ ConvHipImplicitGemmBwdDataV4R1Xdlops::CalculateGemmSize(const ConvolutionContext bool ConvHipImplicitGemmBwdDataV4R1Xdlops::IsApplicable(const ConvolutionContext& ctx) const { + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; if(!ctx.direction.IsBackwardData()) return false; if(!ctx.use_hip_kernels) diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp index 85f7e9fbe6..cca402ebed 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp @@ -907,6 +907,11 @@ int ConvHipImplicitGemmForwardV4R4Xdlops::RunAndMeasureSolution(const miopen::Ha bool ConvHipImplicitGemmForwardV4R4Xdlops::IsApplicable(const ConvolutionContext& ctx) const { + MIOPEN_LOG_I("skip_solutions = " + << ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage); + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; + if(!IsXdlopsSupport(ctx)) return false; diff --git a/src/solver/conv_hip_implicit_gemm_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_v4r1.cpp index 76ff07f150..81bcf3b6a3 100644 --- a/src/solver/conv_hip_implicit_gemm_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_v4r1.cpp @@ -40,6 +40,8 @@ namespace solver { bool ConvHipImplicitGemmV4R1Fwd::IsApplicable(const ConvolutionContext& ctx) const { + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; if(!ctx.direction.IsForward()) return false; if(!ctx.use_hip_kernels) @@ -68,6 +70,8 @@ bool ConvHipImplicitGemmV4R1Fwd::IsApplicable(const ConvolutionContext& ctx) con bool ConvHipImplicitGemmV4R1WrW::IsApplicable(const ConvolutionContext& ctx) const { + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; if(!ctx.direction.IsBackwardWrW()) return false; if(!ctx.use_hip_kernels) diff --git a/src/solver/conv_hip_implicit_gemm_v4r4.cpp b/src/solver/conv_hip_implicit_gemm_v4r4.cpp index f8cd79281b..44e3d4cbb0 100644 --- a/src/solver/conv_hip_implicit_gemm_v4r4.cpp +++ b/src/solver/conv_hip_implicit_gemm_v4r4.cpp @@ -583,6 +583,8 @@ ConvHipImplicitGemmV4R4Fwd::CalculateGemmSize(const ConvolutionContext& ctx) bool ConvHipImplicitGemmV4R4Fwd::IsApplicable(const ConvolutionContext& ctx) const { + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; if(!ctx.direction.IsForward()) return false; if(!ctx.use_hip_kernels) diff --git a/src/solver/conv_hip_implicit_gemm_v4r4_gen_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_v4r4_gen_xdlops.cpp index 751c2f6d6d..70d32dfcb6 100644 --- a/src/solver/conv_hip_implicit_gemm_v4r4_gen_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_v4r4_gen_xdlops.cpp @@ -465,6 +465,8 @@ int ConvHipImplicitGemmV4R4GenWrWXdlops::RunAndMeasureSolution(const miopen::Han bool ConvHipImplicitGemmV4R4GenFwdXdlops::IsApplicable(const ConvolutionContext& ctx) const { + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; if(!(ctx.IsFp16() || ctx.IsBfp16())) return false; if(!ctx.use_hip_kernels) @@ -478,6 +480,8 @@ bool ConvHipImplicitGemmV4R4GenFwdXdlops::IsApplicable(const ConvolutionContext& bool ConvHipImplicitGemmV4R4GenWrWXdlops::IsApplicable(const ConvolutionContext& ctx) const { + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; if(!(ctx.IsFp32() || ctx.IsFp16() || ctx.IsBfp16())) return false; if(!ctx.use_hip_kernels) diff --git a/src/solver/conv_hip_implicit_gemm_v4r4_gen_xdlops_fwd_fp32.cpp b/src/solver/conv_hip_implicit_gemm_v4r4_gen_xdlops_fwd_fp32.cpp index de691dbd2e..2b5178a286 100644 --- a/src/solver/conv_hip_implicit_gemm_v4r4_gen_xdlops_fwd_fp32.cpp +++ b/src/solver/conv_hip_implicit_gemm_v4r4_gen_xdlops_fwd_fp32.cpp @@ -557,6 +557,8 @@ int ConvHipImplicitGemmV4R4GenXdlopsFwdFp32::RunAndMeasureSolution(const miopen: bool ConvHipImplicitGemmV4R4GenXdlopsFwdFp32::IsApplicable(const ConvolutionContext& ctx) const { + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; if(!(ctx.IsFp32())) return false; if(!ctx.use_hip_kernels) diff --git a/src/solver/conv_hip_implicit_gemm_v4r4_gen_xdlops_wrw_fp32.cpp b/src/solver/conv_hip_implicit_gemm_v4r4_gen_xdlops_wrw_fp32.cpp index 104e0964ac..c495f60e3e 100644 --- a/src/solver/conv_hip_implicit_gemm_v4r4_gen_xdlops_wrw_fp32.cpp +++ b/src/solver/conv_hip_implicit_gemm_v4r4_gen_xdlops_wrw_fp32.cpp @@ -590,6 +590,8 @@ int ConvHipImplicitGemmV4R4GenXdlopsWrWFp32::RunAndMeasureSolution(const miopen: bool ConvHipImplicitGemmV4R4GenXdlopsWrWFp32::IsApplicable(const ConvolutionContext& ctx) const { + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; /// \todo Fix and remove this workaround. /// There are random failures with certain configs, /// see https://github.com/ROCmSoftwarePlatform/MIOpen/pull/228 diff --git a/src/solver/conv_hip_implicit_gemm_wrw_weights_v4r4.cpp b/src/solver/conv_hip_implicit_gemm_wrw_weights_v4r4.cpp index 8fc9c12cdd..8e2031eae1 100644 --- a/src/solver/conv_hip_implicit_gemm_wrw_weights_v4r4.cpp +++ b/src/solver/conv_hip_implicit_gemm_wrw_weights_v4r4.cpp @@ -586,6 +586,8 @@ ConvHipImplicitGemmV4R4WrW::CalculateGemmSize(const ConvolutionContext& ctx) bool ConvHipImplicitGemmV4R4WrW::IsApplicable(const ConvolutionContext& ctx) const { + if(ctx.skip_solutions_that_take_long_time_to_build_and_have_narrow_coverage) + return false; if(ctx.direction.IsForward() || ctx.direction.IsBackwardData()) return false; if(!ctx.use_hip_kernels)