From f4b690b68c41cd198af19afd88596873442678d0 Mon Sep 17 00:00:00 2001 From: Bharadwaj Yadavalli Date: Sun, 9 Dec 2018 17:49:44 -0500 Subject: [PATCH] [NFC] Decouple the dependency on both X86 and ARM target builds to allow for single target build (either X86 or ARM) of llvm-mctoll --- ARM/ARMMachineInstructionRaiser.cpp | 25 +++--- ARM/ARMModuleRaiser.cpp | 85 +++++++++++++++++++ ARM/ARMModuleRaiser.h | 34 ++++++++ ARM/CMakeLists.txt | 1 + CMakeLists.txt | 14 ++++ MachineFunctionRaiser.cpp | 124 ++++------------------------ MachineFunctionRaiser.h | 3 + ModuleRaiser.h | 62 +++++++------- Raisers.def.in | 29 +++++++ X86/CMakeLists.txt | 1 + X86/X86MachineInstructionRaiser.cpp | 70 +++++++++------- X86/X86ModuleRaiser.cpp | 89 ++++++++++++++++++++ X86/X86ModuleRaiser.h | 32 +++++++ llvm-mctoll.cpp | 52 ++++++++---- 14 files changed, 426 insertions(+), 195 deletions(-) create mode 100644 ARM/ARMModuleRaiser.cpp create mode 100644 ARM/ARMModuleRaiser.h create mode 100644 Raisers.def.in create mode 100644 X86/X86ModuleRaiser.cpp create mode 100644 X86/X86ModuleRaiser.h diff --git a/ARM/ARMMachineInstructionRaiser.cpp b/ARM/ARMMachineInstructionRaiser.cpp index 6398c8b8..66335186 100644 --- a/ARM/ARMMachineInstructionRaiser.cpp +++ b/ARM/ARMMachineInstructionRaiser.cpp @@ -15,6 +15,8 @@ #include "ARMMachineInstructionRaiser.h" #include "ARMEliminatePrologEpilog.h" #include "ARMFunctionPrototype.h" +#include "ARMModuleRaiser.h" +#include "MachineFunctionRaiser.h" using namespace llvm; @@ -35,7 +37,6 @@ bool ARMMachineInstructionRaiser::raiseMachineFunction() { bool ARMMachineInstructionRaiser::raise() { raiseMachineFunction(); - return true; } @@ -71,15 +72,15 @@ FunctionType *ARMMachineInstructionRaiser::getRaisedFunctionPrototype() { return raisedFunction->getFunctionType(); } -#ifdef __cplusplus -extern "C" { -#endif -MachineInstructionRaiser * -InitializeARMMachineInstructionRaiser(MachineFunction &machFunc, Module &m, - const ModuleRaiser *mr, - MCInstRaiser *mcir) { - return new ARMMachineInstructionRaiser(machFunc, mr, mcir); -} -#ifdef __cplusplus +// Create a new MachineFunctionRaiser object and add it to the list of +// MachineFunction raiser objects of this module. +MachineFunctionRaiser *ARMModuleRaiser::CreateAndAddMachineFunctionRaiser( + Function *f, const ModuleRaiser *mr, uint64_t start, uint64_t end) { + MachineFunctionRaiser *mfRaiser = new MachineFunctionRaiser( + *M, mr->getMachineModuleInfo()->getOrCreateMachineFunction(*f), mr, start, + end); + mfRaiser->setMachineInstrRaiser(new ARMMachineInstructionRaiser( + mfRaiser->getMachineFunction(), mr, mfRaiser->getMCInstRaiser())); + mfRaiserVector.push_back(mfRaiser); + return mfRaiser; } -#endif diff --git a/ARM/ARMModuleRaiser.cpp b/ARM/ARMModuleRaiser.cpp new file mode 100644 index 00000000..45c82aea --- /dev/null +++ b/ARM/ARMModuleRaiser.cpp @@ -0,0 +1,85 @@ +//===- ARMModuleRaiser.h - Binary raiser utility llvm-mctoll --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of ARMModuleRaiser class for use by +// llvm-mctoll. +// +//===----------------------------------------------------------------------===// + +#include "ARMModuleRaiser.h" +#include "llvm/Object/ELFObjectFile.h" + +using namespace llvm; + +namespace RaiserContext { +extern SmallVector ModuleRaiserRegistry; +} + +bool ARMModuleRaiser::collectDynamicRelocations() { + if (!Obj->isELF()) { + return false; + } + + const ELF32LEObjectFile *Elf32LEObjFile = dyn_cast(Obj); + if (!Elf32LEObjFile) { + return false; + } + + std::vector DynRelSec = Obj->dynamic_relocation_sections(); + + for (const SectionRef &Section : DynRelSec) { + for (const RelocationRef &Reloc : Section.relocations()) { + DynRelocs.push_back(Reloc); + } + } + + // Get relocations of .got.plt section from .rela.plt if it exists. I do not + // see an API in ObjectFile class to get at these. + + // Find .got.plt and .rel.plt sections Note: A lot of verification and double + // checking done in the following code. + const ELFFile *ElfFile = Elf32LEObjFile->getELFFile(); + // Find .rel.plt + SectionRef DotGotDotPltSec, DotRelaDotPltSec; + for (const SectionRef Section : Obj->sections()) { + StringRef SecName; + Section.getName(SecName); + if (SecName.equals(".rel.plt")) { + DotRelaDotPltSec = Section; + } else if (SecName.equals(".got")) { + DotGotDotPltSec = Section; + } + } + + if (DotRelaDotPltSec.getObject() != nullptr) { + // Do some additional sanity checks + assert((DotGotDotPltSec.getObject() != nullptr) && + "Failed to find .got section"); + auto DotRelaDotPltShdr = ElfFile->getSection(DotRelaDotPltSec.getIndex()); + assert(DotRelaDotPltShdr && "Failed to find .rel.plt section"); + for (const RelocationRef &Reloc : DotRelaDotPltSec.relocations()) { + DynRelocs.push_back(Reloc); + } + } + return true; +} + +#ifdef __cplusplus +extern "C" { +#endif + +void InitializeARMModuleRaiser() { + ModuleRaiser *m = new ARMModuleRaiser(); + RaiserContext::ModuleRaiserRegistry.push_back(m); + return; +} + +#ifdef __cplusplus +} +#endif diff --git a/ARM/ARMModuleRaiser.h b/ARM/ARMModuleRaiser.h new file mode 100644 index 00000000..eaf14e30 --- /dev/null +++ b/ARM/ARMModuleRaiser.h @@ -0,0 +1,34 @@ +//===- ARMModuleRaiser.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of ARMModuleRaiser class for use by +// llvm-mctoll. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCTOLL_ARM_ARMMODULERAISER_H +#define LLVM_TOOLS_LLVM_MCTOLL_ARM_ARMMODULERAISER_H + +#include "ModuleRaiser.h" + +using namespace llvm; + +class ARMModuleRaiser : public ModuleRaiser { +public: + ARMModuleRaiser() : ModuleRaiser() { Arch = Triple::arm; } + + // Create a new MachineFunctionRaiser object and add it to the list of + // MachineFunction raiser objects of this module. + MachineFunctionRaiser * + CreateAndAddMachineFunctionRaiser(Function *f, const ModuleRaiser *mr, + uint64_t start, uint64_t end); + bool collectDynamicRelocations(); +}; + +#endif // LLVM_TOOLS_LLVM_MCTOLL_ARM_ARMMODULERAISER_H diff --git a/ARM/CMakeLists.txt b/ARM/CMakeLists.txt index d90c06d9..d61d28f9 100644 --- a/ARM/CMakeLists.txt +++ b/ARM/CMakeLists.txt @@ -9,6 +9,7 @@ if(NOT LLVM_MCTOLL_BUILT_STANDALONE) endif() add_llvm_library(ARMRaiser + ARMModuleRaiser.cpp ARMFunctionPrototype.cpp ARMEliminatePrologEpilog.cpp ARMMachineInstructionRaiser.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 89f38127..ac7ed9c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,3 +1,7 @@ +if(NOT (LLVM_TARGETS_TO_BUILD MATCHES "X86" OR LLVM_TARGETS_TO_BUILD MATCHES "ARM")) + return() +endif() + set(LLVM_MCTOLL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(LLVM_MCTOLL_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) @@ -9,17 +13,27 @@ llvm_map_components_to_libnames(llvm_libs ) set(LLVM_MCTOLL_LIB_DEPS ${llvm_libs}) +set(LLVM_MCTOLL_SUPPORTED_ARCHS "") if(LLVM_TARGETS_TO_BUILD MATCHES "X86") + set(LLVM_MCTOLL_SUPPORTED_ARCHS + "${LLVM_MCTOLL_SUPPORTED_ARCHS}MODULE_RAISER(X86)\n") add_subdirectory(X86) list(APPEND LLVM_MCTOLL_LIB_DEPS X86Raiser) endif() if(LLVM_TARGETS_TO_BUILD MATCHES "ARM") + set(LLVM_MCTOLL_SUPPORTED_ARCHS + "${LLVM_MCTOLL_SUPPORTED_ARCHS}MODULE_RAISER(ARM)\n") add_subdirectory(ARM) list(APPEND LLVM_MCTOLL_LIB_DEPS ARMRaiser) endif() +configure_file( + ${LLVM_MCTOLL_SOURCE_DIR}/Raisers.def.in + ${LLVM_INCLUDE_DIR}/Raisers.def + ) + add_subdirectory(test) add_llvm_tool(llvm-mctoll diff --git a/MachineFunctionRaiser.cpp b/MachineFunctionRaiser.cpp index 31ae4e4b..7618ecc5 100644 --- a/MachineFunctionRaiser.cpp +++ b/MachineFunctionRaiser.cpp @@ -13,60 +13,15 @@ //===----------------------------------------------------------------------===// #include "MachineFunctionRaiser.h" -#include "llvm/Object/ELFObjectFile.h" #include "llvm/Target/TargetMachine.h" -#ifdef __cplusplus -extern "C" { -#endif - -// ARM Raiser passes -MachineInstructionRaiser * -InitializeARMMachineInstructionRaiser(MachineFunction &machFunc, Module &m, - const ModuleRaiser *mr, - MCInstRaiser *mcir); - -// X86 Raiser passes -MachineInstructionRaiser * -InitializeX86MachineInstructionRaiser(MachineFunction &machFunc, Module &m, - const ModuleRaiser *mr, - MCInstRaiser *mcir); -#ifdef __cplusplus -} -#endif - void MachineFunctionRaiser::init(uint64_t start, uint64_t end) { mcInstRaiser = new MCInstRaiser(start, end); machineInstRaiser = nullptr; - auto arch = MR->getTargetMachine()->getTargetTriple().getArch(); - - // Double check supported architecture. - if (!MR->isSupportedArch()) { - outs() << arch << "Unsupported architecture\n"; - return; - } - - switch (arch) { - case Triple::x86_64: - machineInstRaiser = - InitializeX86MachineInstructionRaiser(MF, module, MR, mcInstRaiser); - break; - case Triple::arm: - machineInstRaiser = - InitializeARMMachineInstructionRaiser(MF, module, MR, mcInstRaiser); - break; - // Add default case to pacify the compiler warnings. - default: - outs() << "\n" << arch << " not yet supported for raising\n"; - } } bool MachineFunctionRaiser::runRaiserPasses() { bool success = false; - // Do not run raise binaries of an unsupported architecture. - if (!MR->isSupportedArch()) - return false; - // Raise MCInst to MachineInstr and Build CFG if (machineInstRaiser != nullptr) { // Raise MachineInstr to Instruction @@ -95,16 +50,6 @@ void MachineFunctionRaiser::cleanupRaisedFunction() { * reference MachineFunctionRaiser class that has a forward declaration in * ModuleRaiser.h. */ -// Create a new MachineFunctionRaiser object and add it to the list of -// MachineFunction raiser objects of this module. -MachineFunctionRaiser *ModuleRaiser::CreateAndAddMachineFunctionRaiser( - Function *f, const ModuleRaiser *mr, uint64_t start, uint64_t end) { - MachineFunctionRaiser *mfRaiser = new MachineFunctionRaiser( - M, mr->getMachineModuleInfo()->getOrCreateMachineFunction(*f), mr, start, - end); - mfRaiserVector.push_back(mfRaiser); - return mfRaiser; -} Function *ModuleRaiser::getFunctionAt(uint64_t Index) const { int64_t TextSecAddr = getTextSectionAddress(); @@ -234,60 +179,6 @@ bool ModuleRaiser::collectTextSectionRelocs(const SectionRef &TextSec) { return true; } -bool ModuleRaiser::collectDynamicRelocations() { - - if (!Obj->isELF()) { - return false; - } - - const ELF64LEObjectFile *Elf64LEObjFile = dyn_cast(Obj); - if (!Elf64LEObjFile) { - return false; - } - - std::vector DynRelSec = Obj->dynamic_relocation_sections(); - - for (const SectionRef &Section : DynRelSec) { - for (const RelocationRef &Reloc : Section.relocations()) { - DynRelocs.push_back(Reloc); - } - } - - // Get relocations of .got.plt section from .rela.plt if it exists. I do not - // see an API in ObjectFile class to get at these. - - // Find .got.plt and .rela.plt sections Note: A lot of verification and double - // checking done in the following code. - const ELFFile *ElfFile = Elf64LEObjFile->getELFFile(); - // Find .rela.plt - SectionRef DotGotDotPltSec, DotRelaDotPltSec; - for (const SectionRef Section : Obj->sections()) { - StringRef SecName; - Section.getName(SecName); - if (SecName.equals(".rela.plt")) { - DotRelaDotPltSec = Section; - } else if (SecName.equals(".got.plt")) { - DotGotDotPltSec = Section; - } - } - if (DotRelaDotPltSec.getObject() != nullptr) { - // If the binary has .got.plt section, read the dynamic relocations. - if (DotGotDotPltSec.getObject() != nullptr) { - auto DotRelaDotPltShdr = ElfFile->getSection(DotRelaDotPltSec.getIndex()); - // Perform some sanity checks - assert(DotRelaDotPltShdr && "Failed to find .rela.plt section"); - assert((DotRelaDotPltShdr.get()->sh_info == DotGotDotPltSec.getIndex()) && - ".rela.plt does not refer .got.plt section"); - assert((DotRelaDotPltShdr.get()->sh_type == ELF::SHT_RELA) && - "Unexpected type of section .rela.plt"); - for (const RelocationRef &Reloc : DotRelaDotPltSec.relocations()) { - DynRelocs.push_back(Reloc); - } - } - } - return true; -} - // Return text section address; or -1 if text section is not found int64_t ModuleRaiser::getTextSectionAddress() const { if (!Obj->isELF()) { @@ -318,3 +209,18 @@ void ModuleRaiser::addRODataValueAt(Value *v, uint64_t offset) const { GlobalRODataValues.emplace(offset, v); return; } + +#ifdef __cplusplus +extern "C" { +#endif + +#define MODULE_RAISER(TargetName) void Initialize##TargetName##ModuleRaiser(); +#include "Raisers.def" +#ifdef __cplusplus +} +#endif + +void ModuleRaiser::InitializeAllModuleRaisers() { +#define MODULE_RAISER(TargetName) Initialize##TargetName##ModuleRaiser(); +#include "Raisers.def" +} diff --git a/MachineFunctionRaiser.h b/MachineFunctionRaiser.h index 6c9bdc01..fbedbcc8 100644 --- a/MachineFunctionRaiser.h +++ b/MachineFunctionRaiser.h @@ -45,6 +45,9 @@ class MachineFunctionRaiser { MachineInstructionRaiser *getMachineInstrRaiser() { return machineInstRaiser; } + void setMachineInstrRaiser(MachineInstructionRaiser *r) { + machineInstRaiser = r; + } Function *getRaisedFunction() { return machineInstRaiser->getRaisedFunction(); } diff --git a/ModuleRaiser.h b/ModuleRaiser.h index f9a12fa8..f1bfe82b 100644 --- a/ModuleRaiser.h +++ b/ModuleRaiser.h @@ -15,6 +15,7 @@ #ifndef LLVM_TOOLS_LLVM_MCTOLL_MODULERAISER_H #define LLVM_TOOLS_LLVM_MCTOLL_MODULERAISER_H +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInstrAnalysis.h" @@ -33,23 +34,28 @@ using namespace object; // module. class ModuleRaiser { public: - ModuleRaiser(Module &m, const TargetMachine *tm, MachineModuleInfo *mmi, - const MCInstrAnalysis *mia, const MCInstrInfo *mii, - const ObjectFile *o, MCDisassembler *dis) - : M(m), TM(tm), MMI(mmi), MIA(mia), MII(mii), Obj(o), DisAsm(dis), - TextSectionIndex(-1) { - supportedArch = false; - auto arch = tm->getTargetTriple().getArch(); - switch (arch) { - case Triple::x86_64: - supportedArch = true; - break; - case Triple::arm: - supportedArch = true; - break; - default: - outs() << "\n" << arch << " not yet supported for raising\n"; - } + ModuleRaiser() + : M(nullptr), TM(nullptr), MMI(nullptr), MIA(nullptr), MII(nullptr), + Obj(nullptr), DisAsm(nullptr), TextSectionIndex(-1), + Arch(Triple::ArchType::UnknownArch), InfoSet(false) {} + + static void InitializeAllModuleRaisers(); + + void setModuleRaiserInfo(Module *m, const TargetMachine *tm, + MachineModuleInfo *mmi, const MCInstrAnalysis *mia, + const MCInstrInfo *mii, const ObjectFile *o, + MCDisassembler *dis) { + assert((InfoSet == false) && + "Module Raiser information can be set only once"); + M = m; + TM = tm; + MMI = mmi; + MIA = mia; + MII = mii; + Obj = o; + DisAsm = dis; + TextSectionIndex = -1; + InfoSet = true; } // Function to create a MachineFunctionRaiser corresponding to Function f. @@ -57,10 +63,9 @@ class ModuleRaiser { // creation of MachineFunction. The Function object representing raising // of MachineFunction is accessible by calling getRaisedFunction() // on the MachineFunctionRaiser object. - MachineFunctionRaiser *CreateAndAddMachineFunctionRaiser(Function *f, - const ModuleRaiser *, - uint64_t start, - uint64_t end); + virtual MachineFunctionRaiser * + CreateAndAddMachineFunctionRaiser(Function *f, const ModuleRaiser *, + uint64_t start, uint64_t end) = 0; MachineFunctionRaiser *getCurrentMachineFunctionRaiser() { if (mfRaiserVector.size() > 0) { @@ -78,20 +83,19 @@ class ModuleRaiser { } bool collectTextSectionRelocs(const SectionRef &); - bool collectDynamicRelocations(); + virtual bool collectDynamicRelocations() = 0; MachineFunction *getMachineFunction(Function *); // Member getters - Module &getModule() const { return M; } + Module *getModule() const { return M; } const TargetMachine *getTargetMachine() const { return TM; } MachineModuleInfo *getMachineModuleInfo() const { return MMI; } const MCInstrAnalysis *getMCInstrAnalysis() const { return MIA; } const MCInstrInfo *getMCInstrInfo() const { return MII; } const ObjectFile *getObjectFile() const { return Obj; } const MCDisassembler *getMCDisassembler() const { return DisAsm; } - - bool isSupportedArch() const { return supportedArch; } + Triple::ArchType getArchType() { return Arch; } bool runMachineFunctionPasses(); @@ -115,7 +119,7 @@ class ModuleRaiser { virtual ~ModuleRaiser() {} -private: +protected: // A sequential list of MachineFunctionRaiser objects created // as the instructions of the input binary are parsed. Each of // these correspond to a "machine function". A machine function @@ -138,16 +142,18 @@ class ModuleRaiser { mutable std::map GlobalRODataValues; // Commonly used data structures - Module &M; + Module *M; const TargetMachine *TM; MachineModuleInfo *MMI; const MCInstrAnalysis *MIA; const MCInstrInfo *MII; - bool supportedArch; const ObjectFile *Obj; MCDisassembler *DisAsm; // Index of text section whose instructions are raised int64_t TextSectionIndex; + Triple::ArchType Arch; + // Flag to indicate that fields are set. Resetting is not allowed/expected. + bool InfoSet; }; #endif // LLVM_TOOLS_LLVM_MCTOLL_MODULERAISER_H diff --git a/Raisers.def.in b/Raisers.def.in new file mode 100644 index 00000000..10ac922d --- /dev/null +++ b/Raisers.def.in @@ -0,0 +1,29 @@ +/*===- Raisers.def.in - Supported Raiser Architectures------*- C++ -*-===*| +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This file enumerates all of the architectures supported by *| +|* this build of the binary raiser llvm-mctoll. Clients of this file *| +|* should define the MODULE_RAISER macro to be a function-like *| +|* macro with a single parameter (the name of the target); including *| +|* this file will then *| +|* enumerate all of the raise source architectures. *| +|* *| +|* The set of architectures supported by the raiser is generated at *| +|* configuration time, at which point this header is generated. Do not *| +|* modify this header directly. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef MODULE_RAISER +# error Please define the macro MODULE_RAISER(TargetName) +#endif + +@LLVM_MCTOLL_SUPPORTED_ARCHS@ + +#undef MODULE_RAISER diff --git a/X86/CMakeLists.txt b/X86/CMakeLists.txt index 308e21eb..3fc01e78 100644 --- a/X86/CMakeLists.txt +++ b/X86/CMakeLists.txt @@ -9,6 +9,7 @@ if(NOT LLVM_MCTOLL_BUILT_STANDALONE) endif() add_llvm_library(X86Raiser + X86ModuleRaiser.cpp X86MachineInstructionRaiser.cpp DEPENDS diff --git a/X86/X86MachineInstructionRaiser.cpp b/X86/X86MachineInstructionRaiser.cpp index 6cb12200..4ece31f4 100644 --- a/X86/X86MachineInstructionRaiser.cpp +++ b/X86/X86MachineInstructionRaiser.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #include "X86MachineInstructionRaiser.h" #include "ExternalFunctions.h" +#include "MachineFunctionRaiser.h" #include "X86InstrBuilder.h" +#include "X86ModuleRaiser.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineInstr.h" @@ -479,7 +481,7 @@ Value *X86MachineInstructionRaiser::createPCRelativeAccesssValue( "Failed to find symbol associated with dynamic relocation."); // Find if a global value associated with symbol name is already // created - for (GlobalVariable &gv : MR->getModule().globals()) { + for (GlobalVariable &gv : MR->getModule()->globals()) { if (gv.getName().compare(Symname.get()) == 0) { memrefValue = &gv; } @@ -554,7 +556,7 @@ Value *X86MachineInstructionRaiser::createPCRelativeAccesssValue( } } Constant *GlobalInit = ConstantInt::get(GlobalValTy, symbVal); - auto GlobalVal = new GlobalVariable(MR->getModule(), GlobalValTy, + auto GlobalVal = new GlobalVariable(*(MR->getModule()), GlobalValTy, false /* isConstant */, linkage, GlobalInit, Symname->data()); // Don't use symbSize as it was modified. @@ -582,7 +584,7 @@ Value *X86MachineInstructionRaiser::createPCRelativeAccesssValue( "Failed to find symbol associated with text relocation."); // Find if a global value associated with symbol name is already // created - for (GlobalVariable &gv : MR->getModule().globals()) { + for (GlobalVariable &gv : MR->getModule()->globals()) { if (gv.getName().compare(Symname.get()) == 0) { memrefValue = &gv; } @@ -664,7 +666,7 @@ Value *X86MachineInstructionRaiser::createPCRelativeAccesssValue( } Constant *GlobalInit = ConstantInt::get(GlobalValTy, symInitVal); - auto GlobalVal = new GlobalVariable(MR->getModule(), GlobalValTy, + auto GlobalVal = new GlobalVariable(*(MR->getModule()), GlobalValTy, false /* isConstant */, linkage, GlobalInit, Symname->data()); // Don't use symSize as it was modified. @@ -787,7 +789,7 @@ Value *X86MachineInstructionRaiser::getStackAllocatedValue( Type *Ty = nullptr; unsigned int typeAlignment; LLVMContext &llvmContext(MF.getFunction().getContext()); - const DataLayout &dataLayout = MR->getModule().getDataLayout(); + const DataLayout &dataLayout = MR->getModule()->getDataLayout(); unsigned allocaAddrSpace = dataLayout.getAllocaAddrSpace(); unsigned stackObjectSize = getInstructionMemOpSize(mi.getOpcode()); switch (stackObjectSize) { @@ -954,7 +956,7 @@ Function *X86MachineInstructionRaiser::getTargetFunctionAtPLTOffset( // This is an undefined function symbol. Look through the list of // known glibc interfaces and construct a Function accordingly. CalledFunc = - ExternalFunctions::Create(*CalledFuncSymName, MR->getModule()); + ExternalFunctions::Create(*CalledFuncSymName, *(MR->getModule())); } // Found the section we are looking for break; @@ -994,7 +996,7 @@ const Value *X86MachineInstructionRaiser::getOrCreateGlobalRODataValueAtOffset( Constant *StrConstant = ConstantDataArray::getString(llvmContext, ROStringRef); auto GlobalStrConstVal = new GlobalVariable( - MR->getModule(), StrConstant->getType(), true /* isConstant */, + *(MR->getModule()), StrConstant->getType(), true /* isConstant */, GlobalValue::PrivateLinkage, StrConstant, "RO-String"); // Record the mapping between offset and global value MR->addRODataValueAt(GlobalStrConstVal, Offset); @@ -1024,7 +1026,7 @@ const Value *X86MachineInstructionRaiser::getOrCreateGlobalRODataValueAtOffset( "Failed to find symbol name for global address"); // Find if a global value associated with symbol name is // already created - for (GlobalVariable &gv : MR->getModule().globals()) { + for (GlobalVariable &gv : MR->getModule()->globals()) { if (gv.getName().compare(GlobalDataSymName.get()) == 0) { RODataValue = &gv; } @@ -1069,8 +1071,8 @@ const Value *X86MachineInstructionRaiser::getOrCreateGlobalRODataValueAtOffset( GlobalInit = ConstantInt::get(GlobalValTy, 0); } auto GlobalVal = new GlobalVariable( - MR->getModule(), GlobalValTy, false /* isConstant */, linkage, - GlobalInit, GlobalDataSymName.get()); + *(MR->getModule()), GlobalValTy, false /* isConstant */, + linkage, GlobalInit, GlobalDataSymName.get()); GlobalVal->setAlignment(GlobDataSymSectionAlignment); GlobalVal->setDSOLocal(true); RODataValue = GlobalVal; @@ -1125,7 +1127,7 @@ Value *X86MachineInstructionRaiser::getGlobalVariableValueAt( // Find if a global value associated with symbol name is already // created StringRef GlobalDataSymNameIndexStrRef(GlobalDataSymName.get()); - for (GlobalVariable &gv : MR->getModule().globals()) { + for (GlobalVariable &gv : MR->getModule()->globals()) { if (gv.getName().compare(GlobalDataSymNameIndexStrRef) == 0) { GlobalVariableValue = &gv; } @@ -1217,7 +1219,7 @@ Value *X86MachineInstructionRaiser::getGlobalVariableValueAt( GlobalInit = ConstantInt::get(GlobalValTy, 0); } auto GlobalVal = new GlobalVariable( - MR->getModule(), GlobalValTy, false /* isConstant */, linkage, + *(MR->getModule()), GlobalValTy, false /* isConstant */, linkage, GlobalInit, GlobalDataSymNameIndexStrRef); GlobalVal->setAlignment(GlobDataSymAlignment); GlobalVal->setDSOLocal(true); @@ -1562,21 +1564,21 @@ FunctionType *X86MachineInstructionRaiser::getRaisedFunctionPrototype() { // correct Function object being created now. // 1. Get the current function name StringRef functionName = MF.getFunction().getName(); - Module &module = MR->getModule(); + Module *module = MR->getModule(); // 2. Get the corresponding Function* registered in module - Function *tempFunctionPtr = module.getFunction(functionName); + Function *tempFunctionPtr = module->getFunction(functionName); assert(tempFunctionPtr != nullptr && "Function not found in module list"); // 4. Delete the tempFunc from module list to allow for the creation of // the real function to add the correct one to FunctionList of the // module. - module.getFunctionList().remove(tempFunctionPtr); + module->getFunctionList().remove(tempFunctionPtr); // 3. Now create a function type using the discovered argument // types and return value. FunctionType *FT = FunctionType::get(returnType, argTypeVector, false /* isVarArg*/); // 4. Create the real Function now that we have discovered the arguments. raisedFunction = Function::Create(FT, GlobalValue::ExternalLinkage, - functionName, &module); + functionName, module); // Set global linkage raisedFunction->setLinkage(GlobalValue::ExternalLinkage); @@ -1652,7 +1654,7 @@ Value *X86MachineInstructionRaiser::matchSSAValueToSrcRegSize( const MachineInstr &mi, unsigned SrcOpIndex, BasicBlock *curBlock) { unsigned SrcOpSize = getPhysRegOperandSize(mi, SrcOpIndex); Value *SrcOpValue = getRegValue(mi.getOperand(SrcOpIndex).getReg()); - const DataLayout &dataLayout = MR->getModule().getDataLayout(); + const DataLayout &dataLayout = MR->getModule()->getDataLayout(); // Generate the appropriate cast instruction if the sizes of the current // source value and that of the source register do not match. @@ -1758,7 +1760,7 @@ bool X86MachineInstructionRaiser::raisePushInstruction(const MachineInstr &mi) { // This is a register PUSH. If the source is register, create a slot on // the stack. if (mi.getOperand(0).isReg()) { - const DataLayout &dataLayout = MR->getModule().getDataLayout(); + const DataLayout &dataLayout = MR->getModule()->getDataLayout(); unsigned allocaAddrSpace = dataLayout.getAllocaAddrSpace(); // Create alloca instruction to allocate stack slot @@ -2820,7 +2822,7 @@ bool X86MachineInstructionRaiser::raiseMoveToMemInstr(const MachineInstr &mi, // Load the value from memory location LoadInst *loadInst = new LoadInst(memRefVal); loadInst->setAlignment( - memRefVal->getPointerAlignment(MR->getModule().getDataLayout())); + memRefVal->getPointerAlignment(MR->getModule()->getDataLayout())); curBlock->getInstList().push_back(loadInst); } @@ -3019,7 +3021,7 @@ bool X86MachineInstructionRaiser::raiseCompareMachineInstr( // Load the value from memory location LoadInst *loadInst = new LoadInst(memRefValue); loadInst->setAlignment( - memRefValue->getPointerAlignment(MR->getModule().getDataLayout())); + memRefValue->getPointerAlignment(MR->getModule()->getDataLayout())); curBlock->getInstList().push_back(loadInst); // save it at the appropriate index of operand value array if (memoryRefOpIndex == 0) { @@ -4515,7 +4517,7 @@ bool X86MachineInstructionRaiser::raiseMachineFunction() { // for the stack size of the function deduced from the machine code. bool X86MachineInstructionRaiser::adjustStackAllocatedObjects() { MachineFrameInfo &MFrameInfo = MF.getFrameInfo(); - const DataLayout &dataLayout = MR->getModule().getDataLayout(); + const DataLayout &dataLayout = MR->getModule()->getDataLayout(); // Map of stack offset and stack index std::map StackOffsetToIndexMap; std::map::iterator StackOffsetToIndexMapIter; @@ -4575,15 +4577,19 @@ bool X86MachineInstructionRaiser::adjustStackAllocatedObjects() { } bool X86MachineInstructionRaiser::raise() { return raiseMachineFunction(); } -#ifdef __cplusplus -extern "C" { -#endif -MachineInstructionRaiser * -InitializeX86MachineInstructionRaiser(MachineFunction &machFunc, Module &m, - const ModuleRaiser *mr, - MCInstRaiser *mcir) { - return new X86MachineInstructionRaiser(machFunc, mr, mcir); -} -#ifdef __cplusplus +/* NOTE : The following X86ModuleRaiser class function is defined here as they + * reference MachineFunctionRaiser class that has a forward declaration in + * ModuleRaiser.h. + */ +// Create a new MachineFunctionRaiser object and add it to the list of +// MachineFunction raiser objects of this module. +MachineFunctionRaiser *X86ModuleRaiser::CreateAndAddMachineFunctionRaiser( + Function *f, const ModuleRaiser *mr, uint64_t start, uint64_t end) { + MachineFunctionRaiser *mfRaiser = new MachineFunctionRaiser( + *M, mr->getMachineModuleInfo()->getOrCreateMachineFunction(*f), mr, start, + end); + mfRaiser->setMachineInstrRaiser(new X86MachineInstructionRaiser( + mfRaiser->getMachineFunction(), mr, mfRaiser->getMCInstRaiser())); + mfRaiserVector.push_back(mfRaiser); + return mfRaiser; } -#endif diff --git a/X86/X86ModuleRaiser.cpp b/X86/X86ModuleRaiser.cpp new file mode 100644 index 00000000..5e9e2bb0 --- /dev/null +++ b/X86/X86ModuleRaiser.cpp @@ -0,0 +1,89 @@ +//===- X86ModuleRaiser.h - Binary raiser utility llvm-mctoll --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of X86ModuleRaiser class for use by +// llvm-mctoll. +// +//===----------------------------------------------------------------------===// + +#include "X86ModuleRaiser.h" +#include "llvm/Object/ELFObjectFile.h" + +using namespace llvm; + +namespace RaiserContext { +extern SmallVector ModuleRaiserRegistry; +} + +bool X86ModuleRaiser::collectDynamicRelocations() { + if (!Obj->isELF()) { + return false; + } + + const ELF64LEObjectFile *Elf64LEObjFile = dyn_cast(Obj); + if (!Elf64LEObjFile) { + return false; + } + + std::vector DynRelSec = Obj->dynamic_relocation_sections(); + + for (const SectionRef &Section : DynRelSec) { + for (const RelocationRef &Reloc : Section.relocations()) { + DynRelocs.push_back(Reloc); + } + } + + // Get relocations of .got.plt section from .rela.plt if it exists. I do not + // see an API in ObjectFile class to get at these. + + // Find .got.plt and .rela.plt sections Note: A lot of verification and double + // checking done in the following code. + const ELFFile *ElfFile = Elf64LEObjFile->getELFFile(); + // Find .rela.plt + SectionRef DotGotDotPltSec, DotRelaDotPltSec; + for (const SectionRef Section : Obj->sections()) { + StringRef SecName; + Section.getName(SecName); + if (SecName.equals(".rela.plt")) { + DotRelaDotPltSec = Section; + } else if (SecName.equals(".got.plt")) { + DotGotDotPltSec = Section; + } + } + if (DotRelaDotPltSec.getObject() != nullptr) { + // If the binary has .got.plt section, read the dynamic relocations. + if (DotGotDotPltSec.getObject() != nullptr) { + auto DotRelaDotPltShdr = ElfFile->getSection(DotRelaDotPltSec.getIndex()); + // Perform some sanity checks + assert(DotRelaDotPltShdr && "Failed to find .rela.plt section"); + assert((DotRelaDotPltShdr.get()->sh_info == DotGotDotPltSec.getIndex()) && + ".rela.plt does not refer .got.plt section"); + assert((DotRelaDotPltShdr.get()->sh_type == ELF::SHT_RELA) && + "Unexpected type of section .rela.plt"); + for (const RelocationRef &Reloc : DotRelaDotPltSec.relocations()) { + DynRelocs.push_back(Reloc); + } + } + } + return true; +} + +#ifdef __cplusplus +extern "C" { +#endif + +void InitializeX86ModuleRaiser() { + ModuleRaiser *m = new X86ModuleRaiser(); + RaiserContext::ModuleRaiserRegistry.push_back(m); + return; +} + +#ifdef __cplusplus +} +#endif diff --git a/X86/X86ModuleRaiser.h b/X86/X86ModuleRaiser.h new file mode 100644 index 00000000..8514ba64 --- /dev/null +++ b/X86/X86ModuleRaiser.h @@ -0,0 +1,32 @@ +//===- X86ModuleRaiser.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of X86ModuleRaiser class for use by +// llvm-mctoll. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_MCTOLL_X86_X86MODULERAISER_H +#define LLVM_TOOLS_LLVM_MCTOLL_X86_X86MODULERAISER_H + +#include "ModuleRaiser.h" + +using namespace llvm; + +class X86ModuleRaiser : public ModuleRaiser { +public: + X86ModuleRaiser() : ModuleRaiser() { Arch = Triple::x86_64; }; + + MachineFunctionRaiser * + CreateAndAddMachineFunctionRaiser(Function *f, const ModuleRaiser *mr, + uint64_t start, uint64_t end); + bool collectDynamicRelocations(); +}; + +#endif // LLVM_TOOLS_LLVM_MCTOLL_X86_X86MODULERAISER_H diff --git a/llvm-mctoll.cpp b/llvm-mctoll.cpp index 08fd882f..51647a5c 100644 --- a/llvm-mctoll.cpp +++ b/llvm-mctoll.cpp @@ -443,9 +443,11 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) { TargetRegistry::lookupTarget(ArchName, TheTriple, Error); if (!TheTarget) { if (Obj) - report_error(Obj->getFileName(), "can't find target: " + Error); + report_error(Obj->getFileName(), "Support for raising " + + TheTriple.getArchName() + + " not included"); else - error("can't find target: " + Error); + error("Unsupported target " + TheTriple.getArchName()); } // Update the triple name and return the found target. @@ -1036,6 +1038,31 @@ static bool disasmSection(const ObjectFile *Obj, StringRef §ionName) { return false; } +namespace RaiserContext { +SmallVector ModuleRaiserRegistry; + +bool isSupportedArch(Triple::ArchType arch) { + for (auto m : ModuleRaiserRegistry) + if (m->getArchType() == arch) + return true; + + return false; +} + +ModuleRaiser *getModuleRaiser(const TargetMachine *tm) { + ModuleRaiser *mr = nullptr; + auto arch = tm->getTargetTriple().getArch(); + for (auto m : ModuleRaiserRegistry) + if (m->getArchType() == arch) { + mr = m; + break; + } + assert(nullptr != mr && "This arch has not yet supported for raising!\n"); + return mr; +} + +} // namespace RaiserContext + static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { if (StartAddress > StopAddress) error("Start address should be less than stop address"); @@ -1108,12 +1135,15 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { /* Set datalayout of the module to be the same as LLVMTargetMachine */ module.setDataLayout(Target->createDataLayout()); machineModuleInfo->doInitialization(module); - ModuleRaiser *moduleRaiser = - new ModuleRaiser(module, Target.get(), machineModuleInfo, MIA.get(), - MII.get(), Obj, DisAsm.get()); - - if (!moduleRaiser->isSupportedArch()) - return; + // Initialize all module raisers that are supported and are part of current + // LLVM build. + ModuleRaiser::InitializeAllModuleRaisers(); + // Get the module raiser for Target of the binary being raised + ModuleRaiser *moduleRaiser = RaiserContext::getModuleRaiser(Target.get()); + assert((moduleRaiser != nullptr) && "Failed to build module raiser"); + // Set data of module raiser + moduleRaiser->setModuleRaiserInfo(&module, Target.get(), machineModuleInfo, + MIA.get(), MII.get(), Obj, DisAsm.get()); // Collect dynamic relocations. moduleRaiser->collectDynamicRelocations(); @@ -1403,12 +1433,6 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) { } Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage, FunctionName, &module); - // While PassManager is running FunctionPasses, it will check if current - // Function is empty or not. If the Function is empty, it will be - // skipped. So add an empty BasicBlock to Functions at here to guarantee - // the corresponding MachineFunction can be run. - // TODO: This BasicBlock should be removed when add real BasicBlocks. - BasicBlock::Create(Func->getContext(), "Useless", Func); // New function symbol encountered. Record all targets collected to // current MachineFunctionRaiser before we start parsing the new