Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bring in newpm (new pass manager) updates to master #47038

Merged
merged 9 commits into from
Apr 17, 2023
Merged
11 changes: 10 additions & 1 deletion src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8838,6 +8838,15 @@ extern "C" void jl_init_llvm(void)
clopt = llvmopts.lookup("enable-tail-merge"); // NOO TOUCHIE; NO TOUCH! See #922
if (clopt->getNumOccurrences() == 0)
cl::ProvidePositionalOption(clopt, "0", 1);
#ifdef JL_USE_NEW_PM
// For parity with LoopUnswitch
clopt = llvmopts.lookup("unswitch-threshold");
if (clopt->getNumOccurrences() == 0)
cl::ProvidePositionalOption(clopt, "100", 1);
clopt = llvmopts.lookup("enable-unswitch-cost-multiplier");
if (clopt->getNumOccurrences() == 0)
cl::ProvidePositionalOption(clopt, "false", 1);
#endif
// if the patch adding this option has been applied, lower its limit to provide
// better DAGCombiner performance.
clopt = llvmopts.lookup("combiner-store-merge-dependence-limit");
Expand Down Expand Up @@ -8916,7 +8925,7 @@ extern "C" JL_DLLEXPORT void jl_init_codegen_impl(void)
extern "C" JL_DLLEXPORT void jl_teardown_codegen_impl() JL_NOTSAFEPOINT
{
// output LLVM timings and statistics
reportAndResetTimings();
jl_ExecutionEngine->printTimers();
PrintStatistics();
}

Expand Down
49 changes: 38 additions & 11 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,9 @@ namespace {
int optlevel;
PMCreator(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
: TM(cantFail(createJTMBFromTM(TM, optlevel).createTargetMachine())), optlevel(optlevel) {}
// overload for newpm compatibility
PMCreator(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &) JL_NOTSAFEPOINT
: PMCreator(TM, optlevel) {}
PMCreator(const PMCreator &other) JL_NOTSAFEPOINT
: PMCreator(*other.TM, other.optlevel) {}
PMCreator(PMCreator &&other) JL_NOTSAFEPOINT
Expand All @@ -1128,18 +1131,23 @@ namespace {
struct PMCreator {
orc::JITTargetMachineBuilder JTMB;
OptimizationLevel O;
PMCreator(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
: JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)) {}
std::vector<std::function<void()>> &printers;
PMCreator(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &printers) JL_NOTSAFEPOINT
: JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers) {}

auto operator()() JL_NOTSAFEPOINT {
return std::make_unique<NewPM>(cantFail(JTMB.createTargetMachine()), O);
auto NPM = std::make_unique<NewPM>(cantFail(JTMB.createTargetMachine()), O);
printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT {
NPM->printTimers();
});
return NPM;
}
};
#endif

struct OptimizerT {
OptimizerT(TargetMachine &TM, int optlevel) JL_NOTSAFEPOINT
: optlevel(optlevel), PMs(PMCreator(TM, optlevel)) {}
OptimizerT(TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &printers) JL_NOTSAFEPOINT
: optlevel(optlevel), PMs(PMCreator(TM, optlevel, printers)) {}
OptimizerT(OptimizerT&) JL_NOTSAFEPOINT = delete;
OptimizerT(OptimizerT&&) JL_NOTSAFEPOINT = default;

Expand Down Expand Up @@ -1247,11 +1255,15 @@ llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
return jl_data_layout;
}

JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel)
JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &PrintLLVMTimers)
: CompileLayer(BaseLayer.getExecutionSession(), BaseLayer,
std::make_unique<CompilerT>(orc::irManglingOptionsFromTargetOptions(TM.Options), TM, optlevel)),
OptimizeLayer(CompileLayer.getExecutionSession(), CompileLayer,
llvm::orc::IRTransformLayer::TransformFunction(OptimizerT(TM, optlevel))) {}
llvm::orc::IRTransformLayer::TransformFunction(OptimizerT(TM, optlevel, PrintLLVMTimers))) {}

#ifdef _COMPILER_ASAN_ENABLED_
int64_t ___asan_globals_registered;
#endif

JuliaOJIT::JuliaOJIT()
: TM(createTargetMachine()),
Expand Down Expand Up @@ -1285,10 +1297,10 @@ JuliaOJIT::JuliaOJIT()
#endif
LockLayer(ObjectLayer),
Pipelines{
std::make_unique<PipelineT>(LockLayer, *TM, 0),
std::make_unique<PipelineT>(LockLayer, *TM, 1),
std::make_unique<PipelineT>(LockLayer, *TM, 2),
std::make_unique<PipelineT>(LockLayer, *TM, 3),
std::make_unique<PipelineT>(LockLayer, *TM, 0, PrintLLVMTimers),
std::make_unique<PipelineT>(LockLayer, *TM, 1, PrintLLVMTimers),
std::make_unique<PipelineT>(LockLayer, *TM, 2, PrintLLVMTimers),
std::make_unique<PipelineT>(LockLayer, *TM, 3, PrintLLVMTimers),
},
OptSelLayer(Pipelines)
{
Expand Down Expand Up @@ -1393,6 +1405,11 @@ JuliaOJIT::JuliaOJIT()
reinterpret_cast<void *>(static_cast<uintptr_t>(msan_workaround::MSanTLS::origin)), JITSymbolFlags::Exported);
cantFail(GlobalJD.define(orc::absoluteSymbols(msan_crt)));
#endif
#ifdef _COMPILER_ASAN_ENABLED_
orc::SymbolMap asan_crt;
asan_crt[mangle("___asan_globals_registered")] = JITEvaluatedSymbol::fromPointer(&___asan_globals_registered, JITSymbolFlags::Exported);
cantFail(JD.define(orc::absoluteSymbols(asan_crt)));
#endif
}

JuliaOJIT::~JuliaOJIT() = default;
Expand Down Expand Up @@ -1583,6 +1600,16 @@ size_t JuliaOJIT::getTotalBytes() const
}
#endif

void JuliaOJIT::printTimers()
{
#ifdef JL_USE_NEW_PM
for (auto &printer : PrintLLVMTimers) {
printer();
}
#endif
reportAndResetTimings();
}

JuliaOJIT *jl_ExecutionEngine;

// destructively move the contents of src into dest
Expand Down
17 changes: 13 additions & 4 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@
// and feature support (e.g. Windows, JITEventListeners for various profilers,
// etc.). Thus, we currently only use JITLink where absolutely required, that is,
// for Mac/aarch64.
// #define JL_FORCE_JITLINK

#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) || defined(JL_FORCE_JITLINK)
#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) || defined(_COMPILER_ASAN_ENABLED_) || defined(JL_FORCE_JITLINK)
# if JL_LLVM_VERSION < 130000
# pragma message("On aarch64-darwin, LLVM version >= 13 is required for JITLink; fallback suffers from occasional segfaults")
# endif
Expand Down Expand Up @@ -91,6 +89,12 @@ struct OptimizationOptions {
}
};

// LLVM's new pass manager is scheduled to replace the legacy pass manager
// for middle-end IR optimizations. However, we have not qualified the new
// pass manager on our optimization pipeline yet, so this remains an optional
// define
// #define JL_USE_NEW_PM

struct NewPM {
std::unique_ptr<TargetMachine> TM;
StandardInstrumentations SI;
Expand All @@ -103,6 +107,8 @@ struct NewPM {
~NewPM() JL_NOTSAFEPOINT;

void run(Module &M) JL_NOTSAFEPOINT;

void printTimers() JL_NOTSAFEPOINT;
};

struct AnalysisManagers {
Expand Down Expand Up @@ -420,7 +426,7 @@ class JuliaOJIT {
std::unique_ptr<WNMutex> mutex;
};
struct PipelineT {
PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel);
PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel, std::vector<std::function<void()>> &PrintLLVMTimers);
CompileLayerT CompileLayer;
OptimizeLayerT OptimizeLayer;
};
Expand Down Expand Up @@ -490,6 +496,7 @@ class JuliaOJIT {
TargetIRAnalysis getTargetIRAnalysis() const JL_NOTSAFEPOINT;

size_t getTotalBytes() const JL_NOTSAFEPOINT;
void printTimers() JL_NOTSAFEPOINT;

jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT {
return dump_emitted_mi_name_stream;
Expand Down Expand Up @@ -522,6 +529,8 @@ class JuliaOJIT {
jl_locked_stream dump_compiles_stream;
jl_locked_stream dump_llvm_opt_stream;

std::vector<std::function<void()>> PrintLLVMTimers;

ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPool;

#ifndef JL_USE_JITLINK
Expand Down
27 changes: 15 additions & 12 deletions src/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ namespace {
// Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask);
// Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope;
// Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn();
MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
// MPM.addPass(RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
// MPM.addPass(ModuleAddressSanitizerPass(
// Opts, UseGlobalGC, UseOdrIndicator, DestructorKind));
//Let's assume the defaults are actually fine for our purposes
Expand All @@ -173,11 +173,13 @@ namespace {
// }
}

void addVerificationPasses(ModulePassManager &MPM, bool llvm_only) JL_NOTSAFEPOINT {
#ifdef JL_DEBUG_BUILD
static inline void addVerificationPasses(ModulePassManager &MPM, bool llvm_only) JL_NOTSAFEPOINT {
if (!llvm_only)
MPM.addPass(llvm::createModuleToFunctionPassAdaptor(GCInvariantVerifierPass()));
MPM.addPass(VerifierPass());
}
#endif

auto basicSimplifyCFGOptions() JL_NOTSAFEPOINT {
return SimplifyCFGOptions()
Expand Down Expand Up @@ -244,9 +246,9 @@ namespace {

//Use for O1 and below
static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT {
// #ifdef JL_DEBUG_BUILD
#ifdef JL_DEBUG_BUILD
addVerificationPasses(MPM, options.llvm_only);
// #endif
#endif
invokePipelineStartCallbacks(MPM, PB, O);
MPM.addPass(ConstantMergePass());
if (!options.dump_native) {
Expand Down Expand Up @@ -320,9 +322,9 @@ static void buildBasicPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimiza

//Use for O2 and above
static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationLevel O, OptimizationOptions options) JL_NOTSAFEPOINT {
// #ifdef JL_DEBUG_BUILD
#ifdef JL_DEBUG_BUILD
addVerificationPasses(MPM, options.llvm_only);
// #endif
#endif
invokePipelineStartCallbacks(MPM, PB, O);
MPM.addPass(ConstantMergePass());
{
Expand Down Expand Up @@ -382,7 +384,7 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat
#endif
LPM2.addPass(LICMPass(LICMOptions()));
JULIA_PASS(LPM2.addPass(JuliaLICMPass()));
LPM2.addPass(SimpleLoopUnswitchPass());
LPM2.addPass(SimpleLoopUnswitchPass(true, true));
LPM2.addPass(LICMPass(LICMOptions()));
JULIA_PASS(LPM2.addPass(JuliaLICMPass()));
//LICM needs MemorySSA now, so we must use it
Expand All @@ -399,7 +401,7 @@ static void buildFullPipeline(ModulePassManager &MPM, PassBuilder *PB, Optimizat
//We don't know if the loop end callbacks support MSSA
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA = */false));
}
FPM.addPass(LoopUnrollPass());
FPM.addPass(LoopUnrollPass(LoopUnrollOptions().setRuntime(false)));
JULIA_PASS(FPM.addPass(AllocOptPass()));
FPM.addPass(SROAPass());
FPM.addPass(InstSimplifyPass());
Expand Down Expand Up @@ -541,11 +543,8 @@ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
// Register the AA manager first so that our version is the one used.
FAM.registerPass([&] JL_NOTSAFEPOINT {
AAManager AA;
// TODO: Why are we only doing this for -O3?
if (O.getSpeedupLevel() >= 3) {
AA.registerFunctionAnalysis<BasicAA>();
}
if (O.getSpeedupLevel() >= 2) {
AA.registerFunctionAnalysis<BasicAA>();
AA.registerFunctionAnalysis<ScopedNoAliasAA>();
AA.registerFunctionAnalysis<TypeBasedAA>();
}
Expand Down Expand Up @@ -603,6 +602,10 @@ void NewPM::run(Module &M) {
#endif
}

void NewPM::printTimers() {
SI.getTimePasses().print();
}

OptimizationLevel getOptLevel(int optlevel) {
switch (std::min(std::max(optlevel, 0), 3)) {
case 0:
Expand Down