From d3a2dde22b9b1d5b698ddcd7fabc4ede28e93256 Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Fri, 15 Dec 2023 10:18:34 -0600 Subject: [PATCH] Address comments --- src/jitlayers.h | 16 ++- src/pipeline.cpp | 251 +++++++++++++++++++++++++++-------------------- 2 files changed, 159 insertions(+), 108 deletions(-) diff --git a/src/jitlayers.h b/src/jitlayers.h index ed2d944626de3d..f9c7d6b4dd8069 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -90,15 +90,27 @@ struct OptimizationOptions { bool dump_native; bool external_use; bool llvm_only; + bool always_inline; + bool enable_early_simplifications; + bool enable_early_optimizations; + bool enable_scalar_optimizations; bool enable_vector_pipeline; + bool remove_ni; + bool cleanup; static constexpr OptimizationOptions defaults( bool lower_intrinsics=true, bool dump_native=false, bool external_use=false, bool llvm_only=false, - bool enable_vector_pipeline=true) { - return {lower_intrinsics, dump_native, external_use, llvm_only, enable_vector_pipeline}; + bool always_inline=true, + bool enable_early_simplifications=true, + bool enable_early_optimizations=true, + bool enable_scalar_optimizations=true, + bool enable_vector_pipeline=true, + bool remove_ni=true, + bool cleanup=true) { + return {lower_intrinsics, dump_native, external_use, llvm_only, always_inline, enable_early_simplifications, enable_early_optimizations, enable_scalar_optimizations, enable_vector_pipeline, remove_ni, cleanup}; } }; diff --git a/src/pipeline.cpp b/src/pipeline.cpp index b800f065a76d8c..af1996b7e7f651 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -333,6 +333,7 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder #ifdef JL_DEBUG_BUILD addVerificationPasses(MPM, options.llvm_only); #endif + if (options.enable_early_simplifications) { // Place after verification in case we want to force it anyways MPM.addPass(ForceFunctionAttrsPass()); invokePipelineStartCallbacks(MPM, PB, O); @@ -360,11 +361,13 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } invokeEarlySimplificationCallbacks(MPM, PB, O); + } MPM.addPass(AfterEarlySimplificationMarkerPass()); } static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { MPM.addPass(BeforeEarlyOptimizationMarkerPass()); + if (options.enable_early_optimizations) { invokeOptimizerEarlyCallbacks(MPM, PB, O); { CGSCCPassManager CGPM; @@ -411,112 +414,119 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } MPM.addPass(GlobalDCEPass()); + } MPM.addPass(AfterEarlyOptimizationMarkerPass()); } static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { FPM.addPass(BeforeLoopOptimizationMarkerPass()); - { - LoopPassManager LPM; - LPM.addPass(LowerSIMDLoopPass()); + if (options.enable_loop_optimizations) { + { + LoopPassManager LPM; + LPM.addPass(LowerSIMDLoopPass()); + if (O.getSpeedupLevel() >= 2) { + LPM.addPass(LoopRotatePass()); + } + invokeLateLoopOptimizationCallbacks(LPM, PB, O); + //We don't know if the loop callbacks support MSSA + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); + } if (O.getSpeedupLevel() >= 2) { - LPM.addPass(LoopRotatePass()); + LoopPassManager LPM; + LPM.addPass(BeforeLICMMarkerPass()); + LPM.addPass(LICMPass(LICMOptions())); + LPM.addPass(JuliaLICMPass()); + LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true)); + LPM.addPass(LICMPass(LICMOptions())); + LPM.addPass(JuliaLICMPass()); + LPM.addPass(AfterLICMMarkerPass()); + //LICM needs MemorySSA now, so we must use it + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true)); } - invokeLateLoopOptimizationCallbacks(LPM, PB, O); - //We don't know if the loop callbacks support MSSA - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); - } - if (O.getSpeedupLevel() >= 2) { - LoopPassManager LPM; - LPM.addPass(BeforeLICMMarkerPass()); - LPM.addPass(LICMPass(LICMOptions())); - LPM.addPass(JuliaLICMPass()); - LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true)); - LPM.addPass(LICMPass(LICMOptions())); - LPM.addPass(JuliaLICMPass()); - LPM.addPass(AfterLICMMarkerPass()); - //LICM needs MemorySSA now, so we must use it - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true)); - } - if (O.getSpeedupLevel() >= 2) { - FPM.addPass(IRCEPass()); - } - { - LoopPassManager LPM; - LPM.addPass(BeforeLoopSimplificationMarkerPass()); if (O.getSpeedupLevel() >= 2) { - LPM.addPass(LoopInstSimplifyPass()); - LPM.addPass(LoopIdiomRecognizePass()); - LPM.addPass(IndVarSimplifyPass()); - LPM.addPass(LoopDeletionPass()); - // This unroll will only unroll loops when the trip count is known and small, - // so that no loop remains - LPM.addPass(LoopFullUnrollPass()); + FPM.addPass(IRCEPass()); + } + { + LoopPassManager LPM; + LPM.addPass(BeforeLoopSimplificationMarkerPass()); + if (O.getSpeedupLevel() >= 2) { + LPM.addPass(LoopInstSimplifyPass()); + LPM.addPass(LoopIdiomRecognizePass()); + LPM.addPass(IndVarSimplifyPass()); + LPM.addPass(LoopDeletionPass()); + // This unroll will only unroll loops when the trip count is known and small, + // so that no loop remains + LPM.addPass(LoopFullUnrollPass()); + } + invokeLoopOptimizerEndCallbacks(LPM, PB, O); + LPM.addPass(AfterLoopSimplificationMarkerPass()); + //We don't know if the loop end callbacks support MSSA + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); } - invokeLoopOptimizerEndCallbacks(LPM, PB, O); - LPM.addPass(AfterLoopSimplificationMarkerPass()); - //We don't know if the loop end callbacks support MSSA - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false)); } FPM.addPass(AfterLoopOptimizationMarkerPass()); } static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { FPM.addPass(BeforeScalarOptimizationMarkerPass()); - if (O.getSpeedupLevel() >= 2) { - JULIA_PASS(FPM.addPass(AllocOptPass())); -#if JL_LLVM_VERSION >= 160000 - // TODO check the LLVM 15 default. - FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); -#else - FPM.addPass(SROAPass()); -#endif - FPM.addPass(InstSimplifyPass()); - FPM.addPass(GVNPass()); - FPM.addPass(MemCpyOptPass()); - FPM.addPass(SCCPPass()); - FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(DCEPass()); - FPM.addPass(IRCEPass()); - FPM.addPass(InstCombinePass()); - FPM.addPass(JumpThreadingPass()); - } - if (O.getSpeedupLevel() >= 3) { - FPM.addPass(GVNPass()); - } - if (O.getSpeedupLevel() >= 2) { - FPM.addPass(DSEPass()); - invokePeepholeEPCallbacks(FPM, PB, O); - FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); - JULIA_PASS(FPM.addPass(AllocOptPass())); - { - LoopPassManager LPM; - LPM.addPass(LoopDeletionPass()); - LPM.addPass(LoopInstSimplifyPass()); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + if (options.enable_scalar_optimizations) { + if (O.getSpeedupLevel() >= 2) { + JULIA_PASS(FPM.addPass(AllocOptPass())); + #if JL_LLVM_VERSION >= 160000 + // TODO check the LLVM 15 default. + FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); + #else + FPM.addPass(SROAPass()); + #endif + FPM.addPass(InstSimplifyPass()); + FPM.addPass(GVNPass()); + FPM.addPass(MemCpyOptPass()); + FPM.addPass(SCCPPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(DCEPass()); + FPM.addPass(IRCEPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(JumpThreadingPass()); + } + if (O.getSpeedupLevel() >= 3) { + FPM.addPass(GVNPass()); } - FPM.addPass(LoopDistributePass()); + if (O.getSpeedupLevel() >= 2) { + FPM.addPass(DSEPass()); + invokePeepholeEPCallbacks(FPM, PB, O); + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); + JULIA_PASS(FPM.addPass(AllocOptPass())); + { + LoopPassManager LPM; + LPM.addPass(LoopDeletionPass()); + LPM.addPass(LoopInstSimplifyPass()); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM))); + } + FPM.addPass(LoopDistributePass()); + } + invokeScalarOptimizerCallbacks(FPM, PB, O); } - invokeScalarOptimizerCallbacks(FPM, PB, O); FPM.addPass(AfterScalarOptimizationMarkerPass()); } static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { FPM.addPass(BeforeVectorizationMarkerPass()); - //TODO look into loop vectorize options - FPM.addPass(InjectTLIMappings()); - FPM.addPass(LoopVectorizePass()); - FPM.addPass(LoopLoadEliminationPass()); - FPM.addPass(InstCombinePass()); - FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); - FPM.addPass(SLPVectorizerPass()); - invokeVectorizerCallbacks(FPM, PB, O); - FPM.addPass(VectorCombinePass()); - FPM.addPass(ADCEPass()); - //TODO add BDCEPass here? - // This unroll will unroll vectorized loops - // as well as loops that we tried but failed to vectorize - FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false))); + if (options.enable_vector_pipeline) { + //TODO look into loop vectorize options + FPM.addPass(InjectTLIMappings()); + FPM.addPass(LoopVectorizePass()); + FPM.addPass(LoopLoadEliminationPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); + FPM.addPass(SLPVectorizerPass()); + invokeVectorizerCallbacks(FPM, PB, O); + FPM.addPass(VectorCombinePass()); + FPM.addPass(ADCEPass()); + //TODO add BDCEPass here? + // This unroll will unroll vectorized loops + // as well as loops that we tried but failed to vectorize + FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false))); + } FPM.addPass(AfterVectorizationMarkerPass()); } @@ -532,6 +542,7 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder * } // Needed **before** LateLowerGCFrame on LLVM < 12 // due to bug in `CreateAlignmentAssumption`. + assert(options.remove_ni); JULIA_PASS(MPM.addPass(RemoveNIPass())); { FunctionPassManager FPM; @@ -551,7 +562,7 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder * FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions())); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } - } else { + } else if (!options.remove_ni) { JULIA_PASS(MPM.addPass(RemoveNIPass())); } MPM.addPass(AfterIntrinsicLoweringMarkerPass()); @@ -559,22 +570,24 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder * static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { MPM.addPass(BeforeCleanupMarkerPass()); - if (O.getSpeedupLevel() >= 2) { - FunctionPassManager FPM; - JULIA_PASS(FPM.addPass(CombineMulAddPass())); - FPM.addPass(DivRemPairsPass()); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); - } - invokeOptimizerLastCallbacks(MPM, PB, O); - MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); - addSanitizerPasses(MPM, O); - { - FunctionPassManager FPM; - JULIA_PASS(FPM.addPass(DemoteFloat16Pass())); + if (options.cleanup) { if (O.getSpeedupLevel() >= 2) { - FPM.addPass(GVNPass()); + FunctionPassManager FPM; + JULIA_PASS(FPM.addPass(CombineMulAddPass())); + FPM.addPass(DivRemPairsPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + invokeOptimizerLastCallbacks(MPM, PB, O); + MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); + addSanitizerPasses(MPM, O); + { + FunctionPassManager FPM; + JULIA_PASS(FPM.addPass(DemoteFloat16Pass())); + if (O.getSpeedupLevel() >= 2) { + FPM.addPass(GVNPass()); + } + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } MPM.addPass(AfterCleanupMarkerPass()); } @@ -582,13 +595,14 @@ static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimi static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT { MPM.addPass(BeforeOptimizationMarkerPass()); buildEarlySimplificationPipeline(MPM, PB, O, options); - MPM.addPass(AlwaysInlinerPass()); + if (options.always_inline) + MPM.addPass(AlwaysInlinerPass()); buildEarlyOptimizerPipeline(MPM, PB, O, options); { FunctionPassManager FPM; buildLoopOptimizerPipeline(FPM, PB, O, options); buildScalarOptimizerPipeline(FPM, PB, O, options); - if (O.getSpeedupLevel() >= 2 && options.enable_vector_pipeline) { + if (O.getSpeedupLevel() >= 2) { buildVectorPipeline(FPM, PB, O, options); } FPM.addPass(WarnMissedTransformationsPass()); @@ -599,11 +613,26 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL MPM.addPass(AfterOptimizationMarkerPass()); } -extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, void *PB, int Speedup, int Size, - int lower_intrinsics, int dump_native, int external_use, int llvm_only) JL_NOTSAFEPOINT +struct PipelineConfig { + int Speedup; + int Size; + int lower_intrinsics; + int dump_native; + int external_use; + int llvm_only; + int always_inline; + int enable_early_simplifications; + int enable_early_optimizations; + int enable_scalar_optimizations; + int enable_vector_pipeline; + int remove_ni; + int cleanup; +}; + +extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, void *PB, PipelineConfig* config) JL_NOTSAFEPOINT { OptimizationLevel O; - switch (Size) { + switch (config->Size) { case 1: O = OptimizationLevel::Os; break; @@ -611,7 +640,7 @@ extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, voi O = OptimizationLevel::Oz; break; case 0: - switch (Speedup) { + switch (config->Speedup) { case 0: O = OptimizationLevel::O0; break; @@ -627,7 +656,17 @@ extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, voi } } buildPipeline(*reinterpret_cast(MPM), reinterpret_cast(PB), O, - OptimizationOptions{!!lower_intrinsics, !!dump_native, !!external_use, !!llvm_only}); + OptimizationOptions{!!config->lower_intrinsics, + !!config->dump_native, + !!config->external_use, + !!config->llvm_only, + !!config->always_inline, + !!config->enable_early_simplifications, + !!config->enable_early_optimizations, + !!config->enable_scalar_optimizations, + !!config->enable_vector_pipeline, + !!config->remove_ni, + !!config->cleanup}); } #undef JULIA_PASS