From 800104cfc797fa28184c3863c529201138eb247d Mon Sep 17 00:00:00 2001 From: Prem Chintalapudi Date: Mon, 18 Apr 2022 15:05:34 -0400 Subject: [PATCH] Create new pass manager struct --- src/aotcompile.cpp | 123 +++++++++++++++++++---------------- src/codegen.cpp | 49 +++++++------- src/jitlayers.cpp | 19 ++++-- src/jitlayers.h | 21 +++++- src/llvm-multiversioning.cpp | 2 +- src/passes.h | 2 + 6 files changed, 130 insertions(+), 86 deletions(-) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 47570274380e4..2b8e5d59cea06 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -556,7 +556,8 @@ void jl_dump_native_impl(void *native_code, PM.add(createBitcodeWriterPass(unopt_bc_OS)); PM.run(M); } - optimizeModule(M, TM.get(), jl_options.opt_level, true, true); + NewPassManager NPM(*TM, jl_options.opt_level, true, true); + NPM.run(M); legacy::PassManager PM; if (bc_fname) PM.add(createBitcodeWriterPass(bc_OS)); @@ -1049,7 +1050,7 @@ static void addSanitizers(ModulePassManager &MPM, int optlevel) { // } } -static void addPipeline(ModulePassManager &MPM, int opt_level, bool lower_intrinsics, bool dump_native) +static void addPipeline(ModulePassManager &MPM, int opt_level, bool lower_intrinsics, bool dump_native, bool external_use) { // TODO: CommonInstruction hoisting/sinking enables AllocOpt // to merge allocations and sometimes eliminate them, @@ -1115,7 +1116,7 @@ static void addPipeline(ModulePassManager &MPM, int opt_level, bool lower_intrin } MPM.addPass(LowerSIMDLoop()); // Annotate loop marked with "loopinfo" as LLVM parallel loop if (dump_native) { - MPM.addPass(MultiVersioning()); + MPM.addPass(MultiVersioning(external_use)); MPM.addPass(CPUFeatures()); // minimal clean-up to get rid of CPU feature checks if (opt_level == 1) { @@ -1150,7 +1151,7 @@ static void addPipeline(ModulePassManager &MPM, int opt_level, bool lower_intrin } if (dump_native) - MPM.addPass(MultiVersioning()); + MPM.addPass(MultiVersioning(external_use)); MPM.addPass(CPUFeatures()); { FunctionPassManager FPM; @@ -1314,80 +1315,88 @@ static void addPipeline(ModulePassManager &MPM, int opt_level, bool lower_intrin // TargetMachine::registerPassBuilderCallbacks. We need to find a solution either in working with upstream // or adapting PassBuilder (or subclassing it) to suite our needs. This is in particular important for // BPF, NVPTX, and AMDGPU. - -void optimizeModule(Module &M, TargetMachine *TM, int opt_level, bool lower_intrinsics, bool dump_native) -{ - // llvm::PassBuilder pb(targetMachine->LLVM, llvm::PipelineTuningOptions(), llvm::None, &passInstrumentationCallbacks); - PassInstrumentationCallbacks PIC; - StandardInstrumentations SI(false); - SI.registerCallbacks(PIC); +namespace { // NewPM + auto createPIC(StandardInstrumentations &SI) { + auto PIC = std::make_unique(); + SI.registerCallbacks(*PIC); //Borrowed from LLVM PassBuilder.cpp:386 #define MODULE_PASS(NAME, CREATE_PASS) \ -PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ -PIC.addClassToPassName(CLASS, NAME); +PIC->addClassToPassName(CLASS, NAME); #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ -PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define FUNCTION_PASS(NAME, CREATE_PASS) \ -PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ -PIC.addClassToPassName(CLASS, NAME); +PIC->addClassToPassName(CLASS, NAME); #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ -PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOPNEST_PASS(NAME, CREATE_PASS) \ -PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOP_PASS(NAME, CREATE_PASS) \ -PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ -PIC.addClassToPassName(CLASS, NAME); +PIC->addClassToPassName(CLASS, NAME); #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ -PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define CGSCC_PASS(NAME, CREATE_PASS) \ -PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ -PIC.addClassToPassName(CLASS, NAME); +PIC->addClassToPassName(CLASS, NAME); #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ -PIC.addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #include "llvm-julia-passes.inc" - PassBuilder PB(TM, PipelineTuningOptions(), None, &PIC); - // Create the analysis managers. - LoopAnalysisManager LAM; - PB.registerLoopAnalyses(LAM); - - AAManager AA; - // TODO: Why are we only doing this for -O3? - if (opt_level >= 3) { - AA.registerFunctionAnalysis(); + return PIC; } - if (opt_level >= 2) { - AA.registerFunctionAnalysis(); - AA.registerFunctionAnalysis(); - } - // TM->registerDefaultAliasAnalyses(AA); - - FunctionAnalysisManager FAM; - // Register the AA manager first so that our version is the one used. - FAM.registerPass([&] { return std::move(AA); }); - // Register our TargetLibraryInfoImpl. - FAM.registerPass([&] { return llvm::TargetIRAnalysis(TM->getTargetIRAnalysis()); }); - FAM.registerPass([&] { return llvm::TargetLibraryAnalysis(llvm::TargetLibraryInfoImpl(TM->getTargetTriple())); }); - PB.registerFunctionAnalyses(FAM); - - CGSCCAnalysisManager CGAM; - PB.registerCGSCCAnalyses(CGAM); + auto createFAM(TargetMachine &TM, int opt_level) { + AAManager AA; + // TODO: Why are we only doing this for -O3? + if (opt_level >= 3) { + AA.registerFunctionAnalysis(); + } + if (opt_level >= 2) { + AA.registerFunctionAnalysis(); + AA.registerFunctionAnalysis(); + } + // TM->registerDefaultAliasAnalyses(AA); + + FunctionAnalysisManager FAM; + // Register the AA manager first so that our version is the one used. + FAM.registerPass([&] { return std::move(AA); }); + // Register our TargetLibraryInfoImpl. + FAM.registerPass([&] { return llvm::TargetIRAnalysis(TM.getTargetIRAnalysis()); }); + FAM.registerPass([&] { return llvm::TargetLibraryAnalysis(llvm::TargetLibraryInfoImpl(TM.getTargetTriple())); }); + return FAM; + } - ModuleAnalysisManager MAM; - PB.registerModuleAnalyses(MAM); + auto createPB(TargetMachine &TM, PassInstrumentationCallbacks &PIC, LoopAnalysisManager &LAM, FunctionAnalysisManager &FAM, CGSCCAnalysisManager &CGAM, ModuleAnalysisManager &MAM) { + PassBuilder PB(&TM, PipelineTuningOptions(), None, &PIC); + PB.registerLoopAnalyses(LAM); + PB.registerFunctionAnalyses(FAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerModuleAnalyses(MAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + return PB; + } - PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + auto createMPM(int opt_level, bool lower_intrinsics, bool dump_native, bool external_use) { + ModulePassManager MPM; + addPipeline(MPM, opt_level, lower_intrinsics, dump_native, external_use); + return MPM; + } +} - ModulePassManager MPM; - addPipeline(MPM, opt_level, lower_intrinsics, dump_native); +NewPassManager::NewPassManager(TargetMachine &TM, int opt_level, bool lower_intrinsics, bool dump_native, bool external_use) +: SI(false), PIC(createPIC(SI)), LAM(), FAM(createFAM(TM, opt_level)), CGAM(), MAM(), + PB(createPB(TM, *PIC, LAM, FAM, CGAM, MAM)), + MPM(createMPM(opt_level, lower_intrinsics, dump_native, external_use)) {} +void NewPassManager::run(Module &M) { MPM.run(M, MAM); } @@ -1550,10 +1559,12 @@ void *jl_get_llvmf_defn_impl(jl_method_instance_t *mi, size_t world, char getwra // and will better match what's actually in sysimg. for (auto &global : output.globals) global.second->setLinkage(GlobalValue::ExternalLinkage); - if (optimize) + if (optimize) { //Safe b/c context lock is held by output // PM->run(*m.getModuleUnlocked()); - optimizeModule(*m.getModuleUnlocked(), &jl_ExecutionEngine->getTargetMachine(), jl_options.opt_level); + NewPassManager NPM(jl_ExecutionEngine->getTargetMachine(), jl_options.opt_level); + NPM.run(*m.getModuleUnlocked()); + } const std::string *fname; if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam") getwrapper = false; diff --git a/src/codegen.cpp b/src/codegen.cpp index 218c58c081f23..f42c6183c432a 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -86,32 +86,8 @@ #include #include -#define DEBUG_TYPE "julia_irgen_codegen" - using namespace llvm; -STATISTIC(EmittedAllocas, "Number of allocas emitted"); -STATISTIC(EmittedIntToPtrs, "Number of inttoptrs emitted"); -STATISTIC(ModulesCreated, "Number of LLVM Modules created"); -STATISTIC(EmittedBoxCompares, "Number of box compares emitted"); -STATISTIC(EmittedBitsUnionCompares, "Number of bitsunion compares emitted"); -STATISTIC(EmittedBitsCompares, "Number of bits compares emitted"); -STATISTIC(EmittedEgals, "Number of egals emitted"); -STATISTIC(EmittedOpfields, "Number of opfields emitted"); -STATISTIC(EmittedBuiltinCalls, "Number of builtin calls emitted"); -STATISTIC(EmittedJLCalls, "Number of jlcalls emitted"); -STATISTIC(EmittedSpecfunCalls, "Number of specialized calls emitted"); -STATISTIC(EmittedInvokes, "Number of invokes emitted"); -STATISTIC(EmittedCalls, "Number of calls emitted"); -STATISTIC(EmittedUndefVarErrors, "Number of undef var errors emitted"); -STATISTIC(EmittedOpaqueClosureFunctions, "Number of opaque closures emitted"); -STATISTIC(EmittedToJLInvokes, "Number of tojlinvoke calls emitted"); -STATISTIC(EmittedCFuncInvalidates, "Number of C function invalidates emitted"); -STATISTIC(GeneratedCFuncWrappers, "Number of C function wrappers generated"); -STATISTIC(GeneratedCCallables, "Number of C-callable functions generated"); -STATISTIC(GeneratedInvokeWrappers, "Number of invoke wrappers generated"); -STATISTIC(EmittedFunctions, "Number of functions emitted"); - //Drag some useful type functions into our namespace //to reduce verbosity of our code auto getInt1Ty(LLVMContext &ctxt) { @@ -186,6 +162,31 @@ typedef Instruction TerminatorInst; #include "processor.h" #include "julia_assert.h" +#undef DEBUG_TYPE +#define DEBUG_TYPE "julia_irgen_codegen" + +STATISTIC(EmittedAllocas, "Number of allocas emitted"); +STATISTIC(EmittedIntToPtrs, "Number of inttoptrs emitted"); +STATISTIC(ModulesCreated, "Number of LLVM Modules created"); +STATISTIC(EmittedBoxCompares, "Number of box compares emitted"); +STATISTIC(EmittedBitsUnionCompares, "Number of bitsunion compares emitted"); +STATISTIC(EmittedBitsCompares, "Number of bits compares emitted"); +STATISTIC(EmittedEgals, "Number of egals emitted"); +STATISTIC(EmittedOpfields, "Number of opfields emitted"); +STATISTIC(EmittedBuiltinCalls, "Number of builtin calls emitted"); +STATISTIC(EmittedJLCalls, "Number of jlcalls emitted"); +STATISTIC(EmittedSpecfunCalls, "Number of specialized calls emitted"); +STATISTIC(EmittedInvokes, "Number of invokes emitted"); +STATISTIC(EmittedCalls, "Number of calls emitted"); +STATISTIC(EmittedUndefVarErrors, "Number of undef var errors emitted"); +STATISTIC(EmittedOpaqueClosureFunctions, "Number of opaque closures emitted"); +STATISTIC(EmittedToJLInvokes, "Number of tojlinvoke calls emitted"); +STATISTIC(EmittedCFuncInvalidates, "Number of C function invalidates emitted"); +STATISTIC(GeneratedCFuncWrappers, "Number of C function wrappers generated"); +STATISTIC(GeneratedCCallables, "Number of C-callable functions generated"); +STATISTIC(GeneratedInvokeWrappers, "Number of invoke wrappers generated"); +STATISTIC(EmittedFunctions, "Number of functions emitted"); + JL_STREAM *dump_emitted_mi_name_stream = NULL; extern "C" JL_DLLEXPORT void jl_dump_emitted_mi_name_impl(void *s) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 2ebc00ad5fba8..4a28568a7d540 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -480,12 +480,14 @@ CodeGenOpt::Level CodeGenOptLevelFor(int optlevel) #endif } +#ifndef JL_USE_NEW_PM static void addPassesForOptLevel(legacy::PassManager &PM, TargetMachine &TM, int optlevel) { addTargetPasses(&PM, &TM); addOptimizationPasses(&PM, optlevel); addMachinePasses(&PM, &TM, optlevel); } +#endif static auto countBasicBlocks(const Function &F) { @@ -882,6 +884,12 @@ namespace { } }; +#ifdef JL_USE_NEW_PM + typedef NewPassManager PassManager; +#else + typedef legacy::PassManager PassManager; +#endif + struct PMCreator { std::unique_ptr TM; int optlevel; @@ -897,10 +905,14 @@ namespace { swap(*this, other); return *this; } - std::unique_ptr operator()() { + std::unique_ptr operator()() { +#ifdef JL_USE_NEW_PM + return std::make_unique(*TM, optlevel); +#else auto PM = std::make_unique(); addPassesForOptLevel(*PM, *TM, optlevel); return PM; +#endif } }; @@ -933,8 +945,7 @@ namespace { JL_TIMING(LLVM_OPT); //Run the optimization - // (***PMs).run(M); - optimizeModule(M, &jl_ExecutionEngine->getTargetMachine(), optlevel); + (***PMs).run(M); uint64_t end_time = 0; if (dump_llvm_opt_stream != NULL) { @@ -958,7 +969,7 @@ namespace { } private: int optlevel; - JuliaOJIT::ResourcePool> PMs; + JuliaOJIT::ResourcePool> PMs; }; struct CompilerT : orc::IRCompileLayer::IRCompiler { diff --git a/src/jitlayers.h b/src/jitlayers.h index 176c1a2686e4c..ccd3f066abb87 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include #include #include @@ -44,16 +47,32 @@ # include #endif +#define JL_USE_NEW_PM + using namespace llvm; extern "C" jl_cgparams_t jl_default_cgparams; +struct NewPassManager { + StandardInstrumentations SI; + std::unique_ptr PIC; + LoopAnalysisManager LAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + PassBuilder PB; + ModulePassManager MPM; + + NewPassManager(TargetMachine &TM, int opt_level, bool lower_intrinsics = true, bool dump_native = false, bool external_use = false); + + void run(Module &M); +}; + void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM); void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false, bool external_use=false); void addMachinePasses(legacy::PassManagerBase *PM, TargetMachine *TM, int optlevel); void jl_finalize_module(orc::ThreadSafeModule m); void jl_merge_module(orc::ThreadSafeModule &dest, orc::ThreadSafeModule src); -void optimizeModule(Module &M, TargetMachine *TM, int opt_level, bool lower_intrinsics=true, bool dump_native=false); GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M); DataLayout jl_create_datalayout(TargetMachine &TM); diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp index 1c0818c5e9f07..a0ae03eba18c5 100644 --- a/src/llvm-multiversioning.cpp +++ b/src/llvm-multiversioning.cpp @@ -1184,7 +1184,7 @@ PreservedAnalyses MultiVersioning::run(Module &M, ModuleAnalysisManager &AM) auto GetCG = [&]() -> CallGraph & { return AM.getResult(M); }; - if (runMultiVersioning(M, GetLI, GetCG, false)) { + if (runMultiVersioning(M, GetLI, GetCG, external_use)) { auto preserved = PreservedAnalyses::allInSet(); preserved.preserve(); return preserved; diff --git a/src/passes.h b/src/passes.h index 307f4a74d192b..82922a95db565 100644 --- a/src/passes.h +++ b/src/passes.h @@ -65,6 +65,8 @@ struct FinalLowerGCPass : PassInfoMixin { }; struct MultiVersioning : PassInfoMixin { + bool external_use; + MultiVersioning(bool external_use = false) : external_use(external_use) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); static bool isRequired() { return true; } };