Skip to content

Commit

Permalink
Address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
wsmoses committed Dec 18, 2023
1 parent 69d2b54 commit d3a2dde
Show file tree
Hide file tree
Showing 2 changed files with 159 additions and 108 deletions.
16 changes: 14 additions & 2 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,27 @@ struct OptimizationOptions {
bool dump_native;
bool external_use;
bool llvm_only;
bool always_inline;
bool enable_early_simplifications;
bool enable_early_optimizations;
bool enable_scalar_optimizations;
bool enable_vector_pipeline;
bool remove_ni;
bool cleanup;

static constexpr OptimizationOptions defaults(
bool lower_intrinsics=true,
bool dump_native=false,
bool external_use=false,
bool llvm_only=false,
bool enable_vector_pipeline=true) {
return {lower_intrinsics, dump_native, external_use, llvm_only, enable_vector_pipeline};
bool always_inline=true,
bool enable_early_simplifications=true,
bool enable_early_optimizations=true,
bool enable_scalar_optimizations=true,
bool enable_vector_pipeline=true,
bool remove_ni=true,
bool cleanup=true) {
return {lower_intrinsics, dump_native, external_use, llvm_only, always_inline, enable_early_simplifications, enable_early_optimizations, enable_scalar_optimizations, enable_vector_pipeline, remove_ni, cleanup};
}
};

Expand Down
251 changes: 145 additions & 106 deletions src/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder
#ifdef JL_DEBUG_BUILD
addVerificationPasses(MPM, options.llvm_only);
#endif
if (options.enable_early_simplifications) {
// Place after verification in case we want to force it anyways
MPM.addPass(ForceFunctionAttrsPass());
invokePipelineStartCallbacks(MPM, PB, O);
Expand Down Expand Up @@ -360,11 +361,13 @@ static void buildEarlySimplificationPipeline(ModulePassManager &MPM, PassBuilder
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
invokeEarlySimplificationCallbacks(MPM, PB, O);
}
MPM.addPass(AfterEarlySimplificationMarkerPass());
}

static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
MPM.addPass(BeforeEarlyOptimizationMarkerPass());
if (options.enable_early_optimizations) {
invokeOptimizerEarlyCallbacks(MPM, PB, O);
{
CGSCCPassManager CGPM;
Expand Down Expand Up @@ -411,112 +414,119 @@ static void buildEarlyOptimizerPipeline(ModulePassManager &MPM, PassBuilder *PB,
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
MPM.addPass(GlobalDCEPass());
}
MPM.addPass(AfterEarlyOptimizationMarkerPass());
}

static void buildLoopOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
FPM.addPass(BeforeLoopOptimizationMarkerPass());
{
LoopPassManager LPM;
LPM.addPass(LowerSIMDLoopPass());
if (options.enable_loop_optimizations) {
{
LoopPassManager LPM;
LPM.addPass(LowerSIMDLoopPass());
if (O.getSpeedupLevel() >= 2) {
LPM.addPass(LoopRotatePass());
}
invokeLateLoopOptimizationCallbacks(LPM, PB, O);
//We don't know if the loop callbacks support MSSA
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
}
if (O.getSpeedupLevel() >= 2) {
LPM.addPass(LoopRotatePass());
LoopPassManager LPM;
LPM.addPass(BeforeLICMMarkerPass());
LPM.addPass(LICMPass(LICMOptions()));
LPM.addPass(JuliaLICMPass());
LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true));
LPM.addPass(LICMPass(LICMOptions()));
LPM.addPass(JuliaLICMPass());
LPM.addPass(AfterLICMMarkerPass());
//LICM needs MemorySSA now, so we must use it
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true));
}
invokeLateLoopOptimizationCallbacks(LPM, PB, O);
//We don't know if the loop callbacks support MSSA
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
}
if (O.getSpeedupLevel() >= 2) {
LoopPassManager LPM;
LPM.addPass(BeforeLICMMarkerPass());
LPM.addPass(LICMPass(LICMOptions()));
LPM.addPass(JuliaLICMPass());
LPM.addPass(SimpleLoopUnswitchPass(/*NonTrivial*/true, true));
LPM.addPass(LICMPass(LICMOptions()));
LPM.addPass(JuliaLICMPass());
LPM.addPass(AfterLICMMarkerPass());
//LICM needs MemorySSA now, so we must use it
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */true));
}
if (O.getSpeedupLevel() >= 2) {
FPM.addPass(IRCEPass());
}
{
LoopPassManager LPM;
LPM.addPass(BeforeLoopSimplificationMarkerPass());
if (O.getSpeedupLevel() >= 2) {
LPM.addPass(LoopInstSimplifyPass());
LPM.addPass(LoopIdiomRecognizePass());
LPM.addPass(IndVarSimplifyPass());
LPM.addPass(LoopDeletionPass());
// This unroll will only unroll loops when the trip count is known and small,
// so that no loop remains
LPM.addPass(LoopFullUnrollPass());
FPM.addPass(IRCEPass());
}
{
LoopPassManager LPM;
LPM.addPass(BeforeLoopSimplificationMarkerPass());
if (O.getSpeedupLevel() >= 2) {
LPM.addPass(LoopInstSimplifyPass());
LPM.addPass(LoopIdiomRecognizePass());
LPM.addPass(IndVarSimplifyPass());
LPM.addPass(LoopDeletionPass());
// This unroll will only unroll loops when the trip count is known and small,
// so that no loop remains
LPM.addPass(LoopFullUnrollPass());
}
invokeLoopOptimizerEndCallbacks(LPM, PB, O);
LPM.addPass(AfterLoopSimplificationMarkerPass());
//We don't know if the loop end callbacks support MSSA
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
}
invokeLoopOptimizerEndCallbacks(LPM, PB, O);
LPM.addPass(AfterLoopSimplificationMarkerPass());
//We don't know if the loop end callbacks support MSSA
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
}
FPM.addPass(AfterLoopOptimizationMarkerPass());
}

static void buildScalarOptimizerPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
FPM.addPass(BeforeScalarOptimizationMarkerPass());
if (O.getSpeedupLevel() >= 2) {
JULIA_PASS(FPM.addPass(AllocOptPass()));
#if JL_LLVM_VERSION >= 160000
// TODO check the LLVM 15 default.
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
#else
FPM.addPass(SROAPass());
#endif
FPM.addPass(InstSimplifyPass());
FPM.addPass(GVNPass());
FPM.addPass(MemCpyOptPass());
FPM.addPass(SCCPPass());
FPM.addPass(CorrelatedValuePropagationPass());
FPM.addPass(DCEPass());
FPM.addPass(IRCEPass());
FPM.addPass(InstCombinePass());
FPM.addPass(JumpThreadingPass());
}
if (O.getSpeedupLevel() >= 3) {
FPM.addPass(GVNPass());
}
if (O.getSpeedupLevel() >= 2) {
FPM.addPass(DSEPass());
invokePeepholeEPCallbacks(FPM, PB, O);
FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
JULIA_PASS(FPM.addPass(AllocOptPass()));
{
LoopPassManager LPM;
LPM.addPass(LoopDeletionPass());
LPM.addPass(LoopInstSimplifyPass());
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
if (options.enable_scalar_optimizations) {
if (O.getSpeedupLevel() >= 2) {
JULIA_PASS(FPM.addPass(AllocOptPass()));
#if JL_LLVM_VERSION >= 160000
// TODO check the LLVM 15 default.
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
#else
FPM.addPass(SROAPass());
#endif
FPM.addPass(InstSimplifyPass());
FPM.addPass(GVNPass());
FPM.addPass(MemCpyOptPass());
FPM.addPass(SCCPPass());
FPM.addPass(CorrelatedValuePropagationPass());
FPM.addPass(DCEPass());
FPM.addPass(IRCEPass());
FPM.addPass(InstCombinePass());
FPM.addPass(JumpThreadingPass());
}
if (O.getSpeedupLevel() >= 3) {
FPM.addPass(GVNPass());
}
FPM.addPass(LoopDistributePass());
if (O.getSpeedupLevel() >= 2) {
FPM.addPass(DSEPass());
invokePeepholeEPCallbacks(FPM, PB, O);
FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
JULIA_PASS(FPM.addPass(AllocOptPass()));
{
LoopPassManager LPM;
LPM.addPass(LoopDeletionPass());
LPM.addPass(LoopInstSimplifyPass());
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
}
FPM.addPass(LoopDistributePass());
}
invokeScalarOptimizerCallbacks(FPM, PB, O);
}
invokeScalarOptimizerCallbacks(FPM, PB, O);
FPM.addPass(AfterScalarOptimizationMarkerPass());
}

static void buildVectorPipeline(FunctionPassManager &FPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
FPM.addPass(BeforeVectorizationMarkerPass());
//TODO look into loop vectorize options
FPM.addPass(InjectTLIMappings());
FPM.addPass(LoopVectorizePass());
FPM.addPass(LoopLoadEliminationPass());
FPM.addPass(InstCombinePass());
FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
FPM.addPass(SLPVectorizerPass());
invokeVectorizerCallbacks(FPM, PB, O);
FPM.addPass(VectorCombinePass());
FPM.addPass(ADCEPass());
//TODO add BDCEPass here?
// This unroll will unroll vectorized loops
// as well as loops that we tried but failed to vectorize
FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false)));
if (options.enable_vector_pipeline) {
//TODO look into loop vectorize options
FPM.addPass(InjectTLIMappings());
FPM.addPass(LoopVectorizePass());
FPM.addPass(LoopLoadEliminationPass());
FPM.addPass(InstCombinePass());
FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
FPM.addPass(SLPVectorizerPass());
invokeVectorizerCallbacks(FPM, PB, O);
FPM.addPass(VectorCombinePass());
FPM.addPass(ADCEPass());
//TODO add BDCEPass here?
// This unroll will unroll vectorized loops
// as well as loops that we tried but failed to vectorize
FPM.addPass(LoopUnrollPass(LoopUnrollOptions(O.getSpeedupLevel(), /*OnlyWhenForced = */ false, /*ForgetSCEV = */false)));
}
FPM.addPass(AfterVectorizationMarkerPass());
}

Expand All @@ -532,6 +542,7 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder *
}
// Needed **before** LateLowerGCFrame on LLVM < 12
// due to bug in `CreateAlignmentAssumption`.
assert(options.remove_ni);
JULIA_PASS(MPM.addPass(RemoveNIPass()));
{
FunctionPassManager FPM;
Expand All @@ -551,44 +562,47 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder *
FPM.addPass(SimplifyCFGPass(aggressiveSimplifyCFGOptions()));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
} else {
} else if (!options.remove_ni) {
JULIA_PASS(MPM.addPass(RemoveNIPass()));
}
MPM.addPass(AfterIntrinsicLoweringMarkerPass());
}

static void buildCleanupPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
MPM.addPass(BeforeCleanupMarkerPass());
if (O.getSpeedupLevel() >= 2) {
FunctionPassManager FPM;
JULIA_PASS(FPM.addPass(CombineMulAddPass()));
FPM.addPass(DivRemPairsPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
invokeOptimizerLastCallbacks(MPM, PB, O);
MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
addSanitizerPasses(MPM, O);
{
FunctionPassManager FPM;
JULIA_PASS(FPM.addPass(DemoteFloat16Pass()));
if (options.cleanup) {
if (O.getSpeedupLevel() >= 2) {
FPM.addPass(GVNPass());
FunctionPassManager FPM;
JULIA_PASS(FPM.addPass(CombineMulAddPass()));
FPM.addPass(DivRemPairsPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
invokeOptimizerLastCallbacks(MPM, PB, O);
MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
addSanitizerPasses(MPM, O);
{
FunctionPassManager FPM;
JULIA_PASS(FPM.addPass(DemoteFloat16Pass()));
if (O.getSpeedupLevel() >= 2) {
FPM.addPass(GVNPass());
}
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
MPM.addPass(AfterCleanupMarkerPass());
}

static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, const OptimizationOptions &options) JL_NOTSAFEPOINT {
MPM.addPass(BeforeOptimizationMarkerPass());
buildEarlySimplificationPipeline(MPM, PB, O, options);
MPM.addPass(AlwaysInlinerPass());
if (options.always_inline)
MPM.addPass(AlwaysInlinerPass());
buildEarlyOptimizerPipeline(MPM, PB, O, options);
{
FunctionPassManager FPM;
buildLoopOptimizerPipeline(FPM, PB, O, options);
buildScalarOptimizerPipeline(FPM, PB, O, options);
if (O.getSpeedupLevel() >= 2 && options.enable_vector_pipeline) {
if (O.getSpeedupLevel() >= 2) {
buildVectorPipeline(FPM, PB, O, options);
}
FPM.addPass(WarnMissedTransformationsPass());
Expand All @@ -599,19 +613,34 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL
MPM.addPass(AfterOptimizationMarkerPass());
}

extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, void *PB, int Speedup, int Size,
int lower_intrinsics, int dump_native, int external_use, int llvm_only) JL_NOTSAFEPOINT
struct PipelineConfig {
int Speedup;
int Size;
int lower_intrinsics;
int dump_native;
int external_use;
int llvm_only;
int always_inline;
int enable_early_simplifications;
int enable_early_optimizations;
int enable_scalar_optimizations;
int enable_vector_pipeline;
int remove_ni;
int cleanup;
};

extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, void *PB, PipelineConfig* config) JL_NOTSAFEPOINT
{
OptimizationLevel O;
switch (Size) {
switch (config->Size) {
case 1:
O = OptimizationLevel::Os;
break;
default:
O = OptimizationLevel::Oz;
break;
case 0:
switch (Speedup) {
switch (config->Speedup) {
case 0:
O = OptimizationLevel::O0;
break;
Expand All @@ -627,7 +656,17 @@ extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, voi
}
}
buildPipeline(*reinterpret_cast<ModulePassManager*>(MPM), reinterpret_cast<PassBuilder*>(PB), O,
OptimizationOptions{!!lower_intrinsics, !!dump_native, !!external_use, !!llvm_only});
OptimizationOptions{!!config->lower_intrinsics,
!!config->dump_native,
!!config->external_use,
!!config->llvm_only,
!!config->always_inline,
!!config->enable_early_simplifications,
!!config->enable_early_optimizations,
!!config->enable_scalar_optimizations,
!!config->enable_vector_pipeline,
!!config->remove_ni,
!!config->cleanup});
}

#undef JULIA_PASS
Expand Down

0 comments on commit d3a2dde

Please sign in to comment.