Skip to content

Commit

Permalink
Try further tuning
Browse files Browse the repository at this point in the history
  • Loading branch information
gbaraldi committed Jan 12, 2024
1 parent 73cdfd8 commit 30ed1f0
Showing 1 changed file with 33 additions and 14 deletions.
47 changes: 33 additions & 14 deletions src/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <llvm/Passes/PassPlugin.h>

// NewPM needs to manually include all the pass headers
#include <llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h>
#include <llvm/Transforms/IPO/AlwaysInliner.h>
#include <llvm/Transforms/IPO/Annotation2Metadata.h>
#include <llvm/Transforms/IPO/ConstantMerge.h>
Expand All @@ -46,6 +47,7 @@
#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
#include <llvm/Transforms/Scalar/ADCE.h>
#include <llvm/Transforms/Scalar/AnnotationRemarks.h>
#include <llvm/Transforms/Scalar/BDCE.h>
#include <llvm/Transforms/Scalar/CorrelatedValuePropagation.h>
#include <llvm/Transforms/Scalar/DCE.h>
#include <llvm/Transforms/Scalar/DeadStoreElimination.h>
Expand Down Expand Up @@ -75,7 +77,9 @@
#include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
#include <llvm/Transforms/Scalar/SimplifyCFG.h>
#include <llvm/Transforms/Scalar/WarnMissedTransforms.h>
#include <llvm/Transforms/Utils/LibCallsShrinkWrap.h>
#include <llvm/Transforms/Utils/InjectTLIMappings.h>
#include <llvm/Transforms/Utils/RelLookupTableConverter.h>
#include <llvm/Transforms/Vectorize/LoopVectorize.h>
#include <llvm/Transforms/Vectorize/SLPVectorizer.h>
#include <llvm/Transforms/Vectorize/VectorCombine.h>
Expand Down Expand Up @@ -209,10 +213,10 @@ namespace {
.convertSwitchRangeToICmp(true)
.convertSwitchToLookupTable(true)
.forwardSwitchCondToPhi(true)
.needCanonicalLoops(false)
//These mess with loop rotation, so only do them after that
.hoistCommonInsts(true)
// Causes an SRET assertion error in late-gc-lowering
// .sinkCommonInsts(true)
.sinkCommonInsts(true)
;
}
#if JL_LLVM_VERSION < 150000
Expand Down Expand Up @@ -357,7 +361,7 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder
if (O.getSpeedupLevel() >= 1) {
#if JL_LLVM_VERSION >= 160000
// TODO check the LLVM 15 default.
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
#else
FPM.addPass(SROAPass());
#endif
Expand Down Expand Up @@ -397,20 +401,23 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB,
if (O.getSpeedupLevel() >= 2) {
#if JL_LLVM_VERSION >= 160000
// TODO check the LLVM 15 default.
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
#else
FPM.addPass(SROAPass());
#endif
// SROA can duplicate PHI nodes which can block LowerSIMD
FPM.addPass(InstCombinePass());
FPM.addPass(EarlyCSEPass());
FPM.addPass(JumpThreadingPass());
FPM.addPass(CorrelatedValuePropagationPass());
FPM.addPass(InstCombinePass());
FPM.addPass(AggressiveInstCombinePass());
FPM.addPass(LibCallsShrinkWrapPass());

FPM.addPass(ReassociatePass());
FPM.addPass(EarlyCSEPass());
JULIA_PASS(FPM.addPass(AllocOptPass()));
} else { // if (O.getSpeedupLevel() >= 1) (exactly)
FPM.addPass(InstCombinePass());
FPM.addPass(EarlyCSEPass());
FPM.addPass(InstCombinePass());
}
invokePeepholeEPCallbacks(FPM, PB, O);
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
Expand Down Expand Up @@ -472,16 +479,18 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
JULIA_PASS(FPM.addPass(AllocOptPass()));
#if JL_LLVM_VERSION >= 160000
// TODO check the LLVM 15 default.
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
#else
FPM.addPass(SROAPass());
#endif
FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
FPM.addPass(InstSimplifyPass());
FPM.addPass(GVNPass());
FPM.addPass(MemCpyOptPass());
FPM.addPass(SCCPPass());
FPM.addPass(BDCEPass());
FPM.addPass(CorrelatedValuePropagationPass());
FPM.addPass(DCEPass());
FPM.addPass(ADCEPass());
FPM.addPass(IRCEPass());
FPM.addPass(InstCombinePass());
FPM.addPass(JumpThreadingPass());
Expand All @@ -496,11 +505,12 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
JULIA_PASS(FPM.addPass(AllocOptPass()));
{
LoopPassManager LPM;
LPM.addPass(LoopDeletionPass());
LPM.addPass(LoopInstSimplifyPass());
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
LPM.addPass(LICMPass(LICMOptions()));
LPM.addPass(JuliaLICMPass());
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true));
}
FPM.addPass(LoopDistributePass());
FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
FPM.addPass(InstCombinePass());
}
invokeScalarOptimizerCallbacks(FPM, PB, O);
FPM.addPass(AfterScalarOptimizationMarkerPass());
Expand All @@ -509,6 +519,13 @@ static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *
static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
FPM.addPass(BeforeVectorizationMarkerPass());
//TODO look into loop vectorize options
// Rerotate loops that might have been unrotated in the simplification
LoopPassManager LPM;
LPM.addPass(LoopRotatePass());
LPM.addPass(LoopDeletionPass());
FPM.addPass(createFunctionToLoopPassAdaptor(
std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
FPM.addPass(LoopDistributePass());
FPM.addPass(InjectTLIMappings());
FPM.addPass(LoopVectorizePass());
FPM.addPass(LoopLoadEliminationPass());
Expand All @@ -517,11 +534,13 @@ static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, Optim
FPM.addPass(SLPVectorizerPass());
invokeVectorizerCallbacks(FPM, PB, O);
FPM.addPass(VectorCombinePass());
FPM.addPass(ADCEPass());
FPM.addPass(InstCombinePass());
//TODO add BDCEPass here?
// This unroll will unroll vectorized loops
// as well as loops that we tried but failed to vectorize
FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false)));
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(LICMOptions()), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
FPM.addPass(AfterVectorizationMarkerPass());
}

Expand Down

0 comments on commit 30ed1f0

Please sign in to comment.