Skip to content

Commit

Permalink
Don't reuse passmanagers across compilations (JuliaLang#52054)
Browse files Browse the repository at this point in the history
  • Loading branch information
gbaraldi authored and mkitti committed Dec 9, 2023
1 parent 4262c52 commit 90f398a
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 45 deletions.
1 change: 1 addition & 0 deletions doc/src/devdocs/locks.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ The following are definitely leaf locks (level 1), and must not try to acquire a
> * jl_in_stackwalk (Win32)
> * ResourcePool<?>::mutex
> * RLST_mutex
> * llvm_printing_mutex
> * jl_locked_stream::mutex
> * debuginfo_asyncsafe
> * inference_timing_mutex
Expand Down
3 changes: 3 additions & 0 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1414,12 +1414,15 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
return outputs;
}

extern int jl_is_timing_passes;
static unsigned compute_image_thread_count(const ModuleInfo &info) {
// 32-bit systems are very memory-constrained
#ifdef _P32
LLVM_DEBUG(dbgs() << "32-bit systems are restricted to a single thread\n");
return 1;
#endif
if (jl_is_timing_passes) // LLVM isn't thread safe when timing the passes https://github.com/llvm/llvm-project/issues/44417
return 1;
// COFF has limits on external symbols (even hidden) up to 65536. We reserve the last few
// for any of our other symbols that we insert during compilation.
if (info.triple.isOSBinFormatCOFF() && info.globals > 64000) {
Expand Down
7 changes: 7 additions & 0 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9483,6 +9483,8 @@ char jl_using_oprofile_jitevents = 0; // Non-zero if running under OProfile
char jl_using_perf_jitevents = 0;
#endif

int jl_is_timing_passes = 0;

extern "C" void jl_init_llvm(void)
{
jl_page_size = jl_getpagesize();
Expand Down Expand Up @@ -9541,6 +9543,11 @@ extern "C" void jl_init_llvm(void)
if (clopt && clopt->getNumOccurrences() == 0) {
clopt->addOccurrence(1, clopt->ArgStr, "false", true);
}

clopt = llvmopts.lookup("time-passes");
if (clopt && clopt->getNumOccurrences() > 0)
jl_is_timing_passes = 1;

jl_ExecutionEngine = new JuliaOJIT();

bool jl_using_gdb_jitevents = false;
Expand Down
20 changes: 12 additions & 8 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1249,26 +1249,30 @@ namespace {
orc::JITTargetMachineBuilder JTMB;
OptimizationLevel O;
SmallVector<std::function<void()>, 0> &printers;
PMCreator(TargetMachine &TM, int optlevel, SmallVector<std::function<void()>, 0> &printers) JL_NOTSAFEPOINT
: JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers) {}
std::mutex &llvm_printing_mutex;
PMCreator(TargetMachine &TM, int optlevel, SmallVector<std::function<void()>, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT
: JTMB(createJTMBFromTM(TM, optlevel)), O(getOptLevel(optlevel)), printers(printers), llvm_printing_mutex(llvm_printing_mutex) {}

auto operator()() JL_NOTSAFEPOINT {
auto TM = cantFail(JTMB.createTargetMachine());
fixupTM(*TM);
auto NPM = std::make_unique<NewPM>(std::move(TM), O);
// TODO this needs to be locked, as different resource pools may add to the printer vector at the same time
printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT {
NPM->printTimers();
});
{
std::lock_guard<std::mutex> lock(llvm_printing_mutex);
printers.push_back([NPM = NPM.get()]() JL_NOTSAFEPOINT {
NPM->printTimers();
});
}
return NPM;
}
};

template<size_t N>
struct OptimizerT {
OptimizerT(TargetMachine &TM, SmallVector<std::function<void()>, 0> &printers) JL_NOTSAFEPOINT {
OptimizerT(TargetMachine &TM, SmallVector<std::function<void()>, 0> &printers, std::mutex &llvm_printing_mutex) JL_NOTSAFEPOINT {
for (size_t i = 0; i < N; i++) {
PMs[i] = std::make_unique<JuliaOJIT::ResourcePool<std::unique_ptr<PassManager>>>(PMCreator(TM, i, printers));
PMs[i] = std::make_unique<JuliaOJIT::ResourcePool<std::unique_ptr<PassManager>>>(PMCreator(TM, i, printers, llvm_printing_mutex));
}
}

Expand Down Expand Up @@ -1706,7 +1710,7 @@ JuliaOJIT::JuliaOJIT()
LockLayer(ObjectLayer),
CompileLayer(ES, LockLayer, std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)),
JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(SharedBytes, RLST_mutex))),
OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT<N_optlevels>(*TM, PrintLLVMTimers))),
OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT<N_optlevels>(*TM, PrintLLVMTimers, llvm_printing_mutex))),
OptSelLayer(ES, OptimizeLayer, orc::IRTransformLayer::TransformFunction(selectOptLevel)),
DepsVerifyLayer(ES, OptSelLayer, orc::IRTransformLayer::TransformFunction(validateExternRelocations)),
ExternalCompileLayer(ES, LockLayer,
Expand Down
11 changes: 4 additions & 7 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <llvm/IR/Value.h>
#include <llvm/IR/PassManager.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/PassTimingInfo.h>

#include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
#include <llvm/ExecutionEngine/Orc/IRTransformLayer.h>
Expand Down Expand Up @@ -101,14 +102,9 @@ struct OptimizationOptions {

struct NewPM {
std::unique_ptr<TargetMachine> TM;
#if JL_LLVM_VERSION < 160000
StandardInstrumentations SI;
#endif
std::unique_ptr<PassInstrumentationCallbacks> PIC;
PassBuilder PB;
ModulePassManager MPM;
OptimizationLevel O;

OptimizationOptions options;
TimePassesHandler TimePasses;
NewPM(std::unique_ptr<TargetMachine> TM, OptimizationLevel O, OptimizationOptions options = OptimizationOptions::defaults()) JL_NOTSAFEPOINT;
~NewPM() JL_NOTSAFEPOINT;

Expand Down Expand Up @@ -582,6 +578,7 @@ class JuliaOJIT {
jl_locked_stream dump_compiles_stream;
jl_locked_stream dump_llvm_opt_stream;

std::mutex llvm_printing_mutex{};
SmallVector<std::function<void()>, 0> PrintLLVMTimers;

ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPool;
Expand Down
55 changes: 25 additions & 30 deletions src/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -701,21 +701,6 @@ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
PIC.addClassToPassName("AfterOptimizationMarkerPass", "AfterOptimization");
}

#if JL_LLVM_VERSION >= 160000
auto createPIC() JL_NOTSAFEPOINT {
auto PIC = std::make_unique<PassInstrumentationCallbacks>();
adjustPIC(*PIC);
return PIC;
}
#else
auto createPIC(StandardInstrumentations &SI) JL_NOTSAFEPOINT {
auto PIC = std::make_unique<PassInstrumentationCallbacks>();
adjustPIC(*PIC);
SI.registerCallbacks(*PIC);
return PIC;
}
#endif

FunctionAnalysisManager createFAM(OptimizationLevel O, TargetMachine &TM) JL_NOTSAFEPOINT {

FunctionAnalysisManager FAM;
Expand Down Expand Up @@ -744,15 +729,8 @@ PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
}

NewPM::NewPM(std::unique_ptr<TargetMachine> TM, OptimizationLevel O, OptimizationOptions options) :
TM(std::move(TM)),
#if JL_LLVM_VERSION < 160000
SI(false),
PIC(createPIC(SI)),
#else
PIC(createPIC()),
#endif
PB(this->TM.get(), PipelineTuningOptions(), None, PIC.get()),
MPM(createMPM(PB, O, options)), O(O) {}
TM(std::move(TM)), O(O), options(options), TimePasses() {}


NewPM::~NewPM() = default;

Expand All @@ -778,17 +756,34 @@ void NewPM::run(Module &M) {
//We must recreate the analysis managers every time
//so that analyses from previous runs of the pass manager
//do not hang around for the next run
AnalysisManagers AM{*TM, PB, O};

#if JL_LLVM_VERSION >= 160000
StandardInstrumentations SI(M.getContext(),false);
#else
StandardInstrumentations SI(false);
#endif
FunctionAnalysisManager FAM(createFAM(O, *TM.get()));
PassInstrumentationCallbacks PIC;
adjustPIC(PIC);
TimePasses.registerCallbacks(PIC);
SI.registerCallbacks(PIC, &FAM);
SI.getTimePasses().setOutStream(nulls()); //TODO: figure out a better way of doing this
LoopAnalysisManager LAM;
CGSCCAnalysisManager CGAM;
ModuleAnalysisManager MAM;
PassBuilder PB(TM.get(), PipelineTuningOptions(), None, &PIC);
PB.registerLoopAnalyses(LAM);
PB.registerFunctionAnalyses(FAM);
PB.registerCGSCCAnalyses(CGAM);
PB.registerModuleAnalyses(MAM);
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
ModulePassManager MPM = createMPM(PB, O, options);
#ifndef __clang_gcanalyzer__ /* the analyzer cannot prove we have not added instrumentation callbacks with safepoints */
MPM.run(M, AM.MAM);
MPM.run(M, MAM);
#endif
}

void NewPM::printTimers() {
#if JL_LLVM_VERSION < 160000
SI.getTimePasses().print();
#endif
TimePasses.print();
}

OptimizationLevel getOptLevel(int optlevel) {
Expand Down

0 comments on commit 90f398a

Please sign in to comment.