Skip to content

Commit

Permalink
Bring in newpm (new pass manager) updates to master (#47038)
Browse files Browse the repository at this point in the history
* Workaround missing ASAN global
* Add alias analysis at O2 instead of O3
* Disable runtime unrolling
* Make SimpleLoopUnswitch act like LoopUnswitch
* Add --time-passes support
* Only add verification passes in debug mode
* Hide assertion function
  • Loading branch information
pchintalapudi authored Apr 17, 2023
1 parent 32003af commit e08e144
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 28 deletions.
11 changes: 10 additions & 1 deletion src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8838,6 +8838,15 @@ extern "C" void jl_init_llvm(void)
clopt = llvmopts.lookup("enable-tail-merge"); // NOO TOUCHIE; NO TOUCH! See #922
if (clopt->getNumOccurrences() == 0)
cl::ProvidePositionalOption(clopt, "0", 1);
#ifdef JL_USE_NEW_PM
// For parity with LoopUnswitch
clopt = llvmopts.lookup("unswitch-threshold");
if (clopt->getNumOccurrences() == 0)
cl::ProvidePositionalOption(clopt, "100", 1);
clopt = llvmopts.lookup("enable-unswitch-cost-multiplier");
if (clopt->getNumOccurrences() == 0)
cl::ProvidePositionalOption(clopt, "false", 1);
#endif
// if the patch adding this option has been applied, lower its limit to provide
// better DAGCombiner performance.
clopt = llvmopts.lookup("combiner-store-merge-dependence-limit");
Expand Down Expand Up @@ -8916,7 +8925,7 @@ extern "C" JL_DLLEXPORT void jl_init_codegen_impl(void)
extern "C" JL_DLLEXPORT void jl_teardown_codegen_impl() JL_NOTSAFEPOINT
{
// output LLVM timings and statistics
reportAndResetTimings();
jl_ExecutionEngine->printTimers();
PrintStatistics();
}

Expand Down
49 changes: 38 additions & 11 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,9 @@ namespace {
int optlevel;
PMCreator(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
: TM(cantFail(createJTMBFromTM(TM, optlevel).createTargetMachine())), optlevel(optlevel) {}
// overload for newpm compatibility
PMCreator(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &) JL_NOTSAFEPOINT
: PMCreator(TM, optlevel) {}
PMCreator(const PMCreator &other) JL_NOTSAFEPOINT
: PMCreator(*other.TM, other.optlevel) {}
PMCreator(PMCreator &&other) JL_NOTSAFEPOINT
Expand All @@ -1128,18 +1131,23 @@ namespace {
struct PMCreator {
orc::JITTargetMachineBuilder JTMB;
OptimizationLevel O;
PMCreator(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
: JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)) {}
std::vector<std::function<void()>> &printers;
PMCreator(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &printers) JL_NOTSAFEPOINT
: JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers) {}

auto operator()() JL_NOTSAFEPOINT {
return std::make_unique<NewPM>(cantFail(JTMB.createTargetMachine()), O);
auto NPM = std::make_unique<NewPM>(cantFail(JTMB.createTargetMachine()), O);
printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT {
NPM->printTimers();
});
return NPM;
}
};
#endif

struct OptimizerT {
OptimizerT(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
: optlevel(optlevel), PMs(PMCreator(TM, optlevel)) {}
OptimizerT(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &printers) JL_NOTSAFEPOINT
: optlevel(optlevel), PMs(PMCreator(TM, optlevel, printers)) {}
OptimizerT(OptimizerT&) JL_NOTSAFEPOINT = delete;
OptimizerT(OptimizerT&&) JL_NOTSAFEPOINT = default;

Expand Down Expand Up @@ -1247,11 +1255,15 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
return jl_data_layout;
}

JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel)
JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &PrintLLVMTimers)
: CompileLayer(BaseLayer.getExecutionSession(), BaseLayer,
std::make_unique<CompilerT>(orc::irManglingOptionsFromTargetOptions(TM.Options), TM, optlevel)),
OptimizeLayer(CompileLayer.getExecutionSession(), CompileLayer,
llvm::orc::IRTransformLayer::TransformFunction(OptimizerT(TM, optlevel))) {}
llvm::orc::IRTransformLayer::TransformFunction(OptimizerT(TM, optlevel, PrintLLVMTimers))) {}

#ifdef _COMPILER_ASAN_ENABLED_
int64_t ___asan_globals_registered;
#endif

JuliaOJIT::JuliaOJIT()
: TM(createTargetMachine()),
Expand Down Expand Up @@ -1285,10 +1297,10 @@ JuliaOJIT::JuliaOJIT()
#endif
LockLayer(ObjectLayer),
Pipelines{
std::make_unique<PipelineT>(LockLayer, *TM, 0),
std::make_unique<PipelineT>(LockLayer, *TM, 1),
std::make_unique<PipelineT>(LockLayer, *TM, 2),
std::make_unique<PipelineT>(LockLayer, *TM, 3),
std::make_unique<PipelineT>(LockLayer, *TM, 0, PrintLLVMTimers),
std::make_unique<PipelineT>(LockLayer, *TM, 1, PrintLLVMTimers),
std::make_unique<PipelineT>(LockLayer, *TM, 2, PrintLLVMTimers),
std::make_unique<PipelineT>(LockLayer, *TM, 3, PrintLLVMTimers),
},
OptSelLayer(Pipelines)
{
Expand Down Expand Up @@ -1393,6 +1405,11 @@ JuliaOJIT::JuliaOJIT()
reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::origin)), JITSymbolFlags::Exported);
cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt)));
#endif
#ifdef _COMPILER_ASAN_ENABLED_
orc::SymbolMap asan_crt;
asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&___asan_globals_registered, JITSymbolFlags::Exported);
cantFail(JD.define(orc::absoluteSymbols(asan_crt)));
#endif
}

JuliaOJIT::~JuliaOJIT() = default;
Expand Down Expand Up @@ -1583,6 +1600,16 @@ size_t JuliaOJIT::getTotalBytes() const
}
#endif

void JuliaOJIT::printTimers()
{
#ifdef JL_USE_NEW_PM
for (auto &printer : PrintLLVMTimers) {
printer();
}
#endif
reportAndResetTimings();
}

JuliaOJIT *jl_ExecutionEngine;

// destructively move the contents of src into dest
Expand Down
17 changes: 13 additions & 4 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@
// and feature support (e.g. Windows, JITEventListeners for various profilers,
// etc.). Thus, we currently only use JITLink where absolutely required, that is,
// for Mac/aarch64.
// #define JL_FORCE_JITLINK

#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) || defined(JL_FORCE_JITLINK)
#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) || defined(_COMPILER_ASAN_ENABLED_) || defined(JL_FORCE_JITLINK)
# if JL_LLVM_VERSION < 130000
# pragma message("On aarch64-darwin, LLVM version >= 13 is required for JITLink; fallback suffers from occasional segfaults")
# endif
Expand Down Expand Up @@ -91,6 +89,12 @@ struct OptimizationOptions {
}
};

// LLVM's new pass manager is scheduled to replace the legacy pass manager
// for middle-end IR optimizations. However, we have not qualified the new
// pass manager on our optimization pipeline yet, so this remains an optional
// define
// #define JL_USE_NEW_PM

struct NewPM {
std::unique_ptr<TargetMachine> TM;
StandardInstrumentations SI;
Expand All @@ -103,6 +107,8 @@ struct NewPM {
~NewPM() JL_NOTSAFEPOINT;

void run(Module &M) JL_NOTSAFEPOINT;

void printTimers() JL_NOTSAFEPOINT;
};

struct AnalysisManagers {
Expand Down Expand Up @@ -420,7 +426,7 @@ class JuliaOJIT {
std::unique_ptr<WNMutex> mutex;
};
struct PipelineT {
PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel);
PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &PrintLLVMTimers);
CompileLayerT CompileLayer;
OptimizeLayerT OptimizeLayer;
};
Expand Down Expand Up @@ -490,6 +496,7 @@ class JuliaOJIT {
TargetIRAnalysis getTargetIRAnalysis() const JL_NOTSAFEPOINT;

size_t getTotalBytes() const JL_NOTSAFEPOINT;
void printTimers() JL_NOTSAFEPOINT;

jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT {
return dump_emitted_mi_name_stream;
Expand Down Expand Up @@ -522,6 +529,8 @@ class JuliaOJIT {
jl_locked_stream dump_compiles_stream;
jl_locked_stream dump_llvm_opt_stream;

std::vector<std::function<void()>> PrintLLVMTimers;

ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPool;

#ifndef JL_USE_JITLINK
Expand Down
27 changes: 15 additions & 12 deletions src/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ namespace {
// Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask);
// Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
// Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn();
MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
// MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
// MPM.addPass(ModuleAddressSanitizerPass(
// Opts, UseGlobalGC, UseOdrIndicator, DestructorKind));
//Let's assume the defaults are actually fine for our purposes
Expand All @@ -173,11 +173,13 @@ namespace {
// }
}

void addVerificationPasses(ModulePassManager &MPM, bool llvm_only) JL_NOTSAFEPOINT {
#ifdef JL_DEBUG_BUILD
static inline void addVerificationPasses(ModulePassManager &MPM, bool llvm_only) JL_NOTSAFEPOINT {
if (!llvm_only)
MPM.addPass(llvm::createModuleToFunctionPassAdaptor(GCInvariantVerifierPass()));
MPM.addPass(VerifierPass());
}
#endif

auto basicSimplifyCFGOptions() JL_NOTSAFEPOINT {
return SimplifyCFGOptions()
Expand Down Expand Up @@ -244,9 +246,9 @@ namespace {

//Use for O1 and below
static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT {
// #ifdef JL_DEBUG_BUILD
#ifdef JL_DEBUG_BUILD
addVerificationPasses(MPM, options.llvm_only);
// #endif
#endif
invokePipelineStartCallbacks(MPM, PB, O);
MPM.addPass(ConstantMergePass());
if (!options.dump_native) {
Expand Down Expand Up @@ -320,9 +322,9 @@ static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimiza

//Use for O2 and above
static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT {
// #ifdef JL_DEBUG_BUILD
#ifdef JL_DEBUG_BUILD
addVerificationPasses(MPM, options.llvm_only);
// #endif
#endif
invokePipelineStartCallbacks(MPM, PB, O);
MPM.addPass(ConstantMergePass());
{
Expand Down Expand Up @@ -382,7 +384,7 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat
#endif
LPM2.addPass(LICMPass(LICMOptions()));
JULIA_PASS(LPM2.addPass(JuliaLICMPass()));
LPM2.addPass(SimpleLoopUnswitchPass());
LPM2.addPass(SimpleLoopUnswitchPass(true, true));
LPM2.addPass(LICMPass(LICMOptions()));
JULIA_PASS(LPM2.addPass(JuliaLICMPass()));
//LICM needs MemorySSA now, so we must use it
Expand All @@ -399,7 +401,7 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat
//We don't know if the loop end callbacks support MSSA
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
}
FPM.addPass(LoopUnrollPass());
FPM.addPass(LoopUnrollPass(LoopUnrollOptions().setRuntime(false)));
JULIA_PASS(FPM.addPass(AllocOptPass()));
FPM.addPass(SROAPass());
FPM.addPass(InstSimplifyPass());
Expand Down Expand Up @@ -541,11 +543,8 @@ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
// Register the AA manager first so that our version is the one used.
FAM.registerPass([&] JL_NOTSAFEPOINT {
AAManager AA;
// TODO: Why are we only doing this for -O3?
if (O.getSpeedupLevel() >= 3) {
AA.registerFunctionAnalysis<BasicAA>();
}
if (O.getSpeedupLevel() >= 2) {
AA.registerFunctionAnalysis<BasicAA>();
AA.registerFunctionAnalysis<ScopedNoAliasAA>();
AA.registerFunctionAnalysis<TypeBasedAA>();
}
Expand Down Expand Up @@ -603,6 +602,10 @@ void NewPM::run(Module &M) {
#endif
}

void NewPM::printTimers() {
SI.getTimePasses().print();
}

OptimizationLevel getOptLevel(int optlevel) {
switch (std::min(std::max(optlevel, 0), 3)) {
case 0:
Expand Down

0 comments on commit e08e144

Please sign in to comment.