diff --git a/src/Makefile b/src/Makefile index c80c3c4497b44..1617281c73eeb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -44,7 +44,7 @@ SRCS := \ LLVMLINK := ifeq ($(JULIACODEGEN),LLVM) -SRCS += codegen disasm debuginfo llvm-simdloop llvm-gcroot +SRCS += codegen disasm debuginfo llvm-simdloop llvm-gcroot cgmemmgr FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir) LLVM_LIBS := all ifeq ($(USE_POLLY),1) diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp new file mode 100644 index 0000000000000..c1332f87e52a4 --- /dev/null +++ b/src/cgmemmgr.cpp @@ -0,0 +1,860 @@ +// This file is a part of Julia. License is MIT: http://julialang.org/license + +#include "llvm-version.h" +#include "platform.h" +#include "options.h" + +#ifdef USE_MCJIT +#include +#include "julia.h" +#include "julia_internal.h" + +#ifdef LLVM37 +#ifndef LLVM38 +# include +#endif +#ifdef _OS_LINUX_ +# include +# ifdef __NR_memfd_create +# include +# endif +#endif +#ifndef _OS_WINDOWS_ +# include +# include +# include +# include +# if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS) +# define MAP_ANONYMOUS MAP_ANON +# endif +#endif +#ifdef _OS_FREEBSD_ +# include +#endif + +namespace { + +static size_t get_block_size(size_t size) +{ + return (size > jl_page_size * 256 ? LLT_ALIGN(size, jl_page_size) : + jl_page_size * 256); +} + +// Wrapper function to mmap/munmap/mprotect pages... +static void *map_anon_page(size_t size) +{ +#ifdef _OS_WINDOWS_ + char *mem = (char*)VirtualAlloc(NULL, size + jl_page_size, + MEM_COMMIT, PAGE_READWRITE); + assert(mem && "Cannot allocate RW memory"); + mem = (char*)LLT_ALIGN(uintptr_t(mem), jl_page_size); +#else // _OS_WINDOWS_ + void *mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, + MAP_NORESERVE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + assert(mem != MAP_FAILED && "Cannot allocate RW memory"); +#endif // _OS_WINDOWS_ + return mem; +} + +static void unmap_page(void *ptr, size_t size) +{ +#ifdef _OS_WINDOWS_ + VirtualFree(ptr, size, MEM_DECOMMIT); +#else // _OS_WINDOWS_ + munmap(ptr, size); +#endif // _OS_WINDOWS_ +} + +#ifdef _OS_WINDOWS_ +enum class Prot : int { + RW = PAGE_READWRITE, + RX = PAGE_EXECUTE, + RO = PAGE_READONLY +}; + +static void protect_page(void *ptr, size_t size, Prot flags) +{ + DWORD old_prot; + if (!VirtualProtect(ptr, size, (DWORD)flags, &old_prot)) { + jl_safe_printf("Cannot protect page @%p of size %u to 0x%x (err 0x%x)\n", + ptr, (unsigned)size, (unsigned)flags, + (unsigned)GetLastError()); + abort(); + } +} +#else // _OS_WINDOWS_ +enum class Prot : int { + RW = PROT_READ | PROT_WRITE, + RX = PROT_READ | PROT_EXEC, + RO = PROT_READ +}; + +static void protect_page(void *ptr, size_t size, Prot flags) +{ + int ret = mprotect(ptr, size, (int)flags); + if (ret != 0) { + perror(__func__); + abort(); + } +} + +static bool check_fd_or_close(int fd) +{ + if (fd == -1) + return false; + // This can fail due to `noexec` mount option .... + fcntl(fd, F_SETFD, FD_CLOEXEC); + fchmod(fd, S_IRWXU); + ftruncate(fd, jl_page_size); + void *ptr = mmap(nullptr, jl_page_size, PROT_READ | PROT_EXEC, + MAP_SHARED, fd, 0); + if (ptr == MAP_FAILED) { + close(fd); + return false; + } + munmap(ptr, jl_page_size); + return true; +} +#endif // _OS_WINDOWS_ + +static intptr_t anon_hdl = -1; + +#ifdef _OS_WINDOWS_ +// As far as I can tell `CreateFileMapping` cannot be resized on windows. +// Also, creating big file mapping and then map pieces of it seems to +// consume too much global resources. Therefore, we use each file mapping +// as a block on windows +static void *create_shared_map(size_t size, size_t id) +{ + void *addr = MapViewOfFile((HANDLE)id, FILE_MAP_ALL_ACCESS, + 0, 0, size); + assert(addr && "Cannot map RW view"); + return addr; +} + +static intptr_t init_shared_map() +{ + anon_hdl = 0; + return 0; +} + +static void *alloc_shared_page(size_t size, size_t *id, bool exec) +{ + assert(size % jl_page_size == 0); + auto file_mode = exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; + HANDLE hdl = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, + file_mode, 0, size, NULL); + *id = (size_t)hdl; + auto map_mode = FILE_MAP_READ | (exec ? FILE_MAP_EXECUTE : 0); + void *addr = MapViewOfFile(hdl, map_mode, 0, 0, size); + assert(addr && "Cannot map RO view"); + return addr; +} +#else // _OS_WINDOWS_ +// For shared mapped region +static intptr_t get_anon_hdl(void) +{ + int fd = -1; + + // Linux and FreeBSD can create an anonymous fd without touching the + // file system. +# ifdef __NR_memfd_create + fd = syscall(__NR_memfd_create, "julia-codegen", MFD_CLOEXEC); + if (check_fd_or_close(fd)) + return fd; +# endif +# ifdef _OS_FREEBSD_ + fd = shm_open(SHM_ANON, O_RDWR, S_IRWXU); + if (check_fd_or_close(fd)) + return fd; +# endif + char shm_name[] = "julia-codegen-0123456789-0123456789/tmp///"; + pid_t pid = getpid(); + // `shm_open` can't be mapped exec on mac +# ifndef _OS_DARWIN_ + do { + snprintf(shm_name, sizeof(shm_name), + "julia-codegen-%d-%d", (int)pid, rand()); + fd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRWXU); + if (check_fd_or_close(fd)) { + shm_unlink(shm_name); + return fd; + } + } while (errno == EEXIST); +# endif + FILE *tmpf = tmpfile(); + if (tmpf) { + fd = dup(fileno(tmpf)); + fclose(tmpf); + if (check_fd_or_close(fd)) { + return fd; + } + } + snprintf(shm_name, sizeof(shm_name), + "/tmp/julia-codegen-%d-XXXXXX", (int)pid); + fd = mkstemp(shm_name); + if (check_fd_or_close(fd)) { + unlink(shm_name); + return fd; + } + return -1; +} + +static size_t map_offset = 0; +// Multiple of 128MB. +// Hopefully no one will set a ulimit for this to be a problem... +static constexpr size_t map_size_inc = 128 * 1024 * 1024; +static size_t map_size = 0; +static jl_mutex_t shared_map_lock; + +static void *create_shared_map(size_t size, size_t id) +{ + void *addr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, + anon_hdl, id); + assert(addr != MAP_FAILED && "Cannot map RW view"); + return addr; +} + +static intptr_t init_shared_map() +{ + anon_hdl = get_anon_hdl(); + if (anon_hdl == -1) + return -1; + map_offset = 0; + map_size = map_size_inc; + int ret = ftruncate(anon_hdl, map_size); + if (ret != 0) { + perror(__func__); + abort(); + } + return anon_hdl; +} + +static void *alloc_shared_page(size_t size, size_t *id, bool exec) +{ + assert(size % jl_page_size == 0); + size_t off = jl_atomic_fetch_add(&map_offset, size); + *id = off; + if (__unlikely(off + size > map_size)) { + JL_LOCK_NOGC(&shared_map_lock); + size_t old_size = map_size; + while (off + size > map_size) + map_size += map_size_inc; + if (old_size != map_size) { + int ret = ftruncate(anon_hdl, map_size); + if (ret != 0) { + perror(__func__); + abort(); + } + } + JL_UNLOCK_NOGC(&shared_map_lock); + } + return create_shared_map(size, off); +} +#endif // _OS_WINDOWS_ + +#ifdef _OS_LINUX_ +// Using `/proc/self/mem`, A.K.A. Keno's remote memory manager. + +static int self_mem_fd = -1; + +static int init_self_mem() +{ + int fd = open("/proc/self/mem", O_RDWR | O_SYNC | O_CLOEXEC); + if (fd == -1) + return -1; + // buffer to check if write works; + volatile uint64_t buff = 0; + uint64_t v = 0x12345678; + int ret = pwrite(fd, (void*)&v, sizeof(uint64_t), (uintptr_t)&buff); + if (ret != sizeof(uint64_t) || buff != 0x12345678) { + close(fd); + return -1; + } + self_mem_fd = fd; + return fd; +} + +static void write_self_mem(void *dest, void *ptr, size_t size) +{ + while (size > 0) { + ssize_t ret = pwrite(self_mem_fd, ptr, size, (uintptr_t)dest); + if (ret == size) + return; + if (ret == -1 && (errno == EAGAIN || errno == EINTR)) + continue; + assert(ret < size); + size -= ret; + ptr = (char*)ptr + ret; + dest = (char*)dest + ret; + } +} +#endif // _OS_LINUX_ + +using namespace llvm; + +// Allocation strategies +// * For RW data, no memory protection needed, use plain memory pool. +// * For RO data or code, +// +// The first allocation in the page always has write address equals to +// runtime address. +// +// 1. shared dual map +// +// Map an (unlinked) anonymous file as memory pool. +// After first allocation, write address points to the second map. +// The second map is set to unreadable and unwritable in finalization. +// +// 2. private dual map +// +// Same as above but use anonymous memory map as memory pool, +// and use low level OS api to set up the second map. +// +// 3. copying data into RO page bypassing page protection +// +// After first allocation, write address points to a temporary buffer. +// Requires copying data out of the temporary buffer in finalization. + +// Allocates at least 256 pages per block and keep up to 8 blocks in the free +// list. The block with the least free space is discarded when we need to +// allocate a new page. +// Unused full pages are free'd from the block before discarding so at most +// one page is wasted on each discarded blocks. There should be at most one +// block with more than 128 pages available so the discarded one must have +// less than 128 pages available and therefore at least 128 pages used. +// (Apart from fragmentation) this guarantees less than 1% of memory is wasted. + +// the `shared` type parameter is for Windows only.... +struct Block { + // runtime address + char *ptr{nullptr}; + size_t total{0}; + size_t avail{0}; + + Block(const Block&) = delete; + Block &operator=(const Block&) = delete; + + Block() = default; + + void *alloc(size_t size, size_t align) + { + size_t aligned_avail = avail & (-align); + if (aligned_avail < size) + return nullptr; + char *p = ptr + total - aligned_avail; + avail = aligned_avail - size; + return p; + } + void reset(void *addr, size_t size) + { + if (avail >= jl_page_size) { + uintptr_t end = uintptr_t(ptr) + total; + uintptr_t first_free = end - avail; + first_free = LLT_ALIGN(first_free, jl_page_size); + assert(first_free < end); + unmap_page((void*)first_free, end - first_free); + } + ptr = (char*)addr; + total = avail = size; + } +}; + +class RWAllocator { + static constexpr int nblocks = 8; + Block blocks[nblocks]{}; +public: + void *alloc(size_t size, size_t align) + { + size_t min_size = (size_t)-1; + int min_id = 0; + for (int i = 0;i < nblocks && blocks[i].ptr;i++) { + if (void *ptr = blocks[i].alloc(size, align)) + return ptr; + if (blocks[i].avail < min_size) { + min_size = blocks[i].avail; + min_id = i; + } + } + size_t block_size = get_block_size(size); + blocks[min_id].reset(map_anon_page(block_size), block_size); + return blocks[min_id].alloc(size, align); + } +}; + +struct SplitPtrBlock : public Block { + // Possible states + // Allocation: + // * Initial allocation: `state & InitAlloc` + // * Followup allocation: `(state & Alloc) && !(state & InitAlloc)` + enum State { + // This block has no page protection set yet + InitAlloc = (1 << 0), + // There is at least one allocation in this page since last finalization + Alloc = (1 << 1), + // `wr_ptr` can be directly used as write address. + WRInit = (1 << 2), + // With `WRInit` set, whether `wr_ptr` has write permission enabled. + WRReady = (1 << 3), + }; + + uintptr_t wr_ptr{0}; + uint32_t state{0}; + SplitPtrBlock() = default; + + void swap(SplitPtrBlock &other) + { + std::swap(ptr, other.ptr); + std::swap(total, other.total); + std::swap(avail, other.avail); + std::swap(wr_ptr, other.wr_ptr); + std::swap(state, other.state); + } + + SplitPtrBlock(SplitPtrBlock &&other) + : SplitPtrBlock() + { + swap(other); + } +}; + +struct Allocation { + // Address to write to (the one returned by the allocation function) + void *wr_addr; + // Runtime address + void *rt_addr; + size_t sz; + bool relocated; +}; + +template +class ROAllocator { +protected: + static constexpr int nblocks = 8; + SplitPtrBlock blocks[nblocks]; + // Blocks that are done allocating (removed from `blocks`) + // but might not have all the permissions set or data copied yet. + SmallVector completed; + virtual void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, + size_t size, size_t align) = 0; + virtual SplitPtrBlock alloc_block(size_t size) = 0; +public: + virtual ~ROAllocator() {} + virtual void finalize() + { + if (exec) { + for (auto &alloc: allocations) { + sys::Memory::InvalidateInstructionCache(alloc.rt_addr, + alloc.sz); + } + } + completed.clear(); + allocations.clear(); + } + // Allocations that have not been finalized yet. + SmallVector allocations; + void *alloc(size_t size, size_t align) + { + size_t min_size = (size_t)-1; + int min_id = 0; + for (int i = 0;i < nblocks && blocks[i].ptr;i++) { + auto &block = blocks[i]; + void *ptr = block.alloc(size, align); + if (ptr) { + void *wr_ptr; + if (block.state & SplitPtrBlock::InitAlloc) { + wr_ptr = ptr; + } + else { + wr_ptr = get_wr_ptr(block, ptr, size, align); + } + block.state |= SplitPtrBlock::Alloc; + allocations.push_back(Allocation{wr_ptr, ptr, size, false}); + return wr_ptr; + } + if (block.avail < min_size) { + min_size = block.avail; + min_id = i; + } + } + size_t block_size = get_block_size(size); + auto &block = blocks[min_id]; + auto new_block = alloc_block(block_size); + block.swap(new_block); + if (new_block.state) { + completed.push_back(std::move(new_block)); + } + else { + new_block.reset(nullptr, 0); + } + void *ptr = block.alloc(size, align); +#ifdef _OS_WINDOWS_ + block.state = SplitPtrBlock::Alloc; + void *wr_ptr = get_wr_ptr(block, ptr, size, align); + allocations.push_back(Allocation{wr_ptr, ptr, size, false}); + ptr = wr_ptr; +#else + block.state = SplitPtrBlock::Alloc | SplitPtrBlock::InitAlloc; + allocations.push_back(Allocation{ptr, ptr, size, false}); +#endif + return ptr; + } +}; + +template +class DualMapAllocator : public ROAllocator { +protected: + void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override + { + assert((char*)rt_ptr >= block.ptr && + (char*)rt_ptr < (block.ptr + block.total)); + if (!(block.state & SplitPtrBlock::WRInit)) { + block.wr_ptr = (uintptr_t)create_shared_map(block.total, + block.wr_ptr); + block.state |= SplitPtrBlock::WRInit; + } + if (!(block.state & SplitPtrBlock::WRReady)) { + protect_page((void*)block.wr_ptr, block.total, Prot::RW); + block.state |= SplitPtrBlock::WRReady; + } + return (char*)rt_ptr + (block.wr_ptr - uintptr_t(block.ptr)); + } + SplitPtrBlock alloc_block(size_t size) override + { + SplitPtrBlock new_block; + // use `wr_ptr` to record the id initially + auto ptr = alloc_shared_page(size, (size_t*)&new_block.wr_ptr, exec); + new_block.reset(ptr, size); + return new_block; + } + void finalize_block(SplitPtrBlock &block, bool reset) + { + // This function handles setting the block to the right mode + // and free'ing maps that are not needed anymore. + // If `reset` is `true`, we won't allocate in this block anymore and + // we should free up resources that is not needed at runtime. + if (!(block.state & SplitPtrBlock::Alloc)) { + // A block that is not used this time, check if we need to free it. + if ((block.state & SplitPtrBlock::WRInit) && reset) + unmap_page((void*)block.wr_ptr, block.total); + return; + } + // For a block we used this time + if (block.state & SplitPtrBlock::InitAlloc) { + // For an initial block, we have a single RW map. + // Need to map it to RO or RX. + assert(!(block.state & (SplitPtrBlock::WRReady | + SplitPtrBlock::WRInit))); + protect_page(block.ptr, block.total, exec ? Prot::RX : Prot::RO); + block.state = 0; + } + else { + // For other ones, the runtime address has the correct mode. + // Need to map the write address to RO. + assert(block.state & SplitPtrBlock::WRInit); + assert(block.state & SplitPtrBlock::WRReady); + if (reset) { + unmap_page((void*)block.wr_ptr, block.total); + } + else { + protect_page((void*)block.wr_ptr, block.total, Prot::RO); + block.state = SplitPtrBlock::WRInit; + } + } + } +public: + DualMapAllocator() + { + assert(anon_hdl != -1); + } + void finalize() override + { + for (auto &block : this->blocks) { + finalize_block(block, false); + } + for (auto &block : this->completed) { + finalize_block(block, true); + block.reset(nullptr, 0); + } + ROAllocator::finalize(); + } +}; + +#ifdef _OS_LINUX_ +template +class SelfMemAllocator : public ROAllocator { + SmallVector temp_buff; +protected: + void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, + size_t size, size_t align) override + { + assert(!(block.state & SplitPtrBlock::InitAlloc)); + for (auto &wr_block: temp_buff) { + if (void *ptr = wr_block.alloc(size, align)) { + return ptr; + } + } + temp_buff.emplace_back(); + Block &new_block = temp_buff.back(); + size_t block_size = get_block_size(size); + new_block.reset(map_anon_page(block_size), block_size); + return new_block.alloc(size, align); + } + SplitPtrBlock alloc_block(size_t size) override + { + SplitPtrBlock new_block; + new_block.reset(map_anon_page(size), size); + return new_block; + } + void finalize_block(SplitPtrBlock &block, bool reset) + { + if (!(block.state & SplitPtrBlock::Alloc)) + return; + if (block.state & SplitPtrBlock::InitAlloc) { + // for an initial block, we need to map it to ro or rx + assert(!(block.state & (SplitPtrBlock::WRReady | + SplitPtrBlock::WRInit))); + protect_page(block.ptr, block.total, exec ? Prot::RX : Prot::RO); + block.state = 0; + } + } +public: + SelfMemAllocator() + : ROAllocator(), + temp_buff() + { + assert(self_mem_fd != -1); + } + void finalize() override + { + for (auto &block : this->blocks) { + finalize_block(block, false); + } + for (auto &block : this->completed) { + finalize_block(block, true); + block.reset(nullptr, 0); + } + for (auto &alloc : this->allocations) { + if (alloc.rt_addr == alloc.wr_addr) + continue; + write_self_mem(alloc.rt_addr, alloc.wr_addr, alloc.sz); + } + // clear all the temp buffers except the first one + // (we expect only one) + bool cached = false; + for (auto &block : temp_buff) { + if (cached) { + munmap(block.ptr, block.total); + block.ptr = nullptr; + block.total = block.avail = 0; + } + else { + block.avail = block.total; + cached = true; + } + } + if (cached) + temp_buff.resize(1); + ROAllocator::finalize(); + } +}; +#endif // _OS_LINUX_ + +class RTDyldMemoryManagerJL : public SectionMemoryManager { + struct EHFrame { + uint8_t *addr; + size_t size; + }; + RTDyldMemoryManagerJL(const RTDyldMemoryManagerJL&) = delete; + void operator=(const RTDyldMemoryManagerJL&) = delete; + SmallVector pending_eh; + RWAllocator rw_alloc; + std::unique_ptr> ro_alloc; + std::unique_ptr> exe_alloc; + bool code_allocated; + +public: + RTDyldMemoryManagerJL() + : SectionMemoryManager(), + pending_eh(), + rw_alloc(), + ro_alloc(), + exe_alloc(), + code_allocated(false) + { +#ifdef _OS_LINUX_ + if (!ro_alloc && init_self_mem() != -1) { + ro_alloc.reset(new SelfMemAllocator()); + exe_alloc.reset(new SelfMemAllocator()); + } +#endif + if (!ro_alloc && init_shared_map() != -1) { + ro_alloc.reset(new DualMapAllocator()); + exe_alloc.reset(new DualMapAllocator()); + } + } + ~RTDyldMemoryManagerJL() override + { + } + void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, + size_t Size) override; + void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, + size_t Size) override; + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + StringRef SectionName) override; + uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, StringRef SectionName, + bool isReadOnly) override; +#ifdef LLVM38 + void notifyObjectLoaded(RuntimeDyld &Dyld, + const object::ObjectFile &Obj) override; +#endif + bool finalizeMemory(std::string *ErrMsg = nullptr) override; + template + void mapAddresses(DL &Dyld, Alloc &&allocator) + { + for (auto &alloc: allocator->allocations) { + if (alloc.rt_addr == alloc.wr_addr || alloc.relocated) + continue; + alloc.relocated = true; + Dyld.mapSectionAddress(alloc.wr_addr, (uintptr_t)alloc.rt_addr); + } + } + template + void mapAddresses(DL &Dyld) + { + if (!ro_alloc) + return; + mapAddresses(Dyld, ro_alloc); + mapAddresses(Dyld, exe_alloc); + } +#ifdef _OS_WINDOWS_ + template + void *lookupWriteAddressFor(void *rt_addr, Alloc &&allocator) + { + for (auto &alloc: allocator->allocations) { + if (alloc.rt_addr == rt_addr) { + return alloc.wr_addr; + } + } + return nullptr; + } + void *lookupWriteAddressFor(void *rt_addr) + { + if (!ro_alloc) + return rt_addr; + if (void *ptr = lookupWriteAddressFor(rt_addr, ro_alloc)) + return ptr; + if (void *ptr = lookupWriteAddressFor(rt_addr, exe_alloc)) + return ptr; + return rt_addr; + } +#endif // _OS_WINDOWS_ +}; + +uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size, + unsigned Alignment, + unsigned SectionID, + StringRef SectionName) +{ + // allocating more than one code section can confuse libunwind. + assert(!code_allocated); + code_allocated = true; + if (exe_alloc) + return (uint8_t*)exe_alloc->alloc(Size, Alignment); + return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID, + SectionName); +} + +uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size, + unsigned Alignment, + unsigned SectionID, + StringRef SectionName, + bool isReadOnly) +{ + if (!isReadOnly) + return (uint8_t*)rw_alloc.alloc(Size, Alignment); + if (ro_alloc) + return (uint8_t*)ro_alloc->alloc(Size, Alignment); + return SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID, + SectionName, isReadOnly); +} + +#ifdef LLVM38 +void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld, + const object::ObjectFile &Obj) +{ + if (!ro_alloc) { + assert(!exe_alloc); + SectionMemoryManager::notifyObjectLoaded(Dyld, Obj); + return; + } + assert(exe_alloc); + mapAddresses(Dyld); +} +#endif + +bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg) +{ + code_allocated = false; + if (ro_alloc) { + ro_alloc->finalize(); + assert(exe_alloc); + exe_alloc->finalize(); + for (auto &frame: pending_eh) + register_eh_frames(frame.addr, frame.size); + pending_eh.clear(); + return false; + } + else { + assert(!exe_alloc); + return SectionMemoryManager::finalizeMemory(ErrMsg); + } +} + +void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr, + uint64_t LoadAddr, + size_t Size) +{ + if (uintptr_t(Addr) == LoadAddr) { + register_eh_frames(Addr, Size); + } + else { + pending_eh.push_back(EHFrame{(uint8_t*)(uintptr_t)LoadAddr, Size}); + } +} + +void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr, + uint64_t LoadAddr, + size_t Size) +{ + deregister_eh_frames((uint8_t*)LoadAddr, Size); +} + +} + +#ifndef LLVM38 +void notifyObjectLoaded(RTDyldMemoryManager *memmgr, + llvm::orc::ObjectLinkingLayerBase::ObjSetHandleT H) +{ + ((RTDyldMemoryManagerJL*)memmgr)->mapAddresses(**H); +} +#endif + +#ifdef _OS_WINDOWS_ +void *lookupWriteAddressFor(RTDyldMemoryManager *memmgr, void *rt_addr) +{ + return ((RTDyldMemoryManagerJL*)memmgr)->lookupWriteAddressFor(rt_addr); +} +#endif + +#else // LLVM37 +typedef SectionMemoryManager RTDyldMemoryManagerJL; +#endif // LLVM37 + +RTDyldMemoryManager* createRTDyldMemoryManager() +{ + return new RTDyldMemoryManagerJL(); +} +#endif // USE_MCJIT diff --git a/src/codegen.cpp b/src/codegen.cpp index 9023196c1fafe..4d18e27392fa3 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -97,7 +97,6 @@ #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h" #elif defined(USE_MCJIT) #include -#include #include #include #else @@ -338,16 +337,9 @@ static GlobalVariable *jlRTLD_DEFAULT_var; static GlobalVariable *jlexe_var; static GlobalVariable *jldll_var; #if defined(_CPU_X86_64_) && !defined(USE_MCJIT) -extern JITMemoryManager *createJITMemoryManagerWin(); +JITMemoryManager *createJITMemoryManagerWin(); #endif #endif //_OS_WINDOWS_ -#if defined(_OS_DARWIN_) && defined(LLVM37) && defined(LLVM_SHLIB) -#define CUSTOM_MEMORY_MANAGER createRTDyldMemoryManagerOSX -extern RTDyldMemoryManager *createRTDyldMemoryManagerOSX(); -#elif defined(_OS_LINUX_) && defined(LLVM37) && defined(JL_UNW_HAS_FORMAT_IP) -#define CUSTOM_MEMORY_MANAGER createRTDyldMemoryManagerUnix -extern RTDyldMemoryManager *createRTDyldMemoryManagerUnix(); -#endif static Function *jltls_states_func; #ifndef JULIA_ENABLE_THREADING @@ -5758,10 +5750,8 @@ extern "C" void jl_init_codegen(void) eb .setEngineKind(EngineKind::JIT) #if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_) && !defined(USE_MCJIT) .setJITMemoryManager(createJITMemoryManagerWin()) -#elif defined(CUSTOM_MEMORY_MANAGER) - .setMCJITMemoryManager(std::unique_ptr{CUSTOM_MEMORY_MANAGER()}) -#elif defined(USE_ORCMCJIT) // ORCJIT forgets to create one if one isn't created for it - .setMCJITMemoryManager(std::unique_ptr{new SectionMemoryManager()}) +#elif defined(USE_MCJIT) + .setMCJITMemoryManager(std::unique_ptr{createRTDyldMemoryManager()}) #endif .setTargetOptions(options) #if (defined(_OS_LINUX_) && defined(_CPU_X86_64_)) diff --git a/src/codegen_internal.h b/src/codegen_internal.h index f939edc118807..07388aa5dcb31 100644 --- a/src/codegen_internal.h +++ b/src/codegen_internal.h @@ -1,5 +1,11 @@ // This file is a part of Julia. License is MIT: http://julialang.org/license +#if defined(LLVM38) && !defined(LLVM37) +# include +void notifyObjectLoaded(RTDyldMemoryManager *memmgr, + llvm::orc::ObjectLinkingLayerBase::ObjSetHandleT H); +#endif + // Declarations for disasm.cpp extern "C" void jl_dump_asm_internal(uintptr_t Fptr, size_t Fsize, int64_t slide, @@ -28,8 +34,16 @@ extern bool jl_dylib_DI_for_fptr(size_t pointer, const object::ObjectFile **obje bool onlySysImg, bool *isSysImg, void **saddr, char **name, char **filename); #ifdef USE_ORCJIT -extern JL_DLLEXPORT void ORCNotifyObjectEmitted(JITEventListener *Listener, - const object::ObjectFile &obj, - const object::ObjectFile &debugObj, - const RuntimeDyld::LoadedObjectInfo &L); +JL_DLLEXPORT void ORCNotifyObjectEmitted(JITEventListener *Listener, + const object::ObjectFile &obj, + const object::ObjectFile &debugObj, + const RuntimeDyld::LoadedObjectInfo &L, + RTDyldMemoryManager *memmgr); +#ifdef _OS_WINDOWS_ +void *lookupWriteAddressFor(RTDyldMemoryManager *memmgr, void *rt_addr); +#endif +#endif + +#ifdef USE_MCJIT +RTDyldMemoryManager* createRTDyldMemoryManager(void); #endif diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index 87241ef29a730..10184e317c85e 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -298,12 +298,13 @@ class JuliaJITEventListener: public JITEventListener virtual void NotifyObjectEmitted(const object::ObjectFile &obj, const RuntimeDyld::LoadedObjectInfo &L) { - return _NotifyObjectEmitted(obj,obj,L); + return _NotifyObjectEmitted(obj,obj,L,nullptr); } virtual void _NotifyObjectEmitted(const object::ObjectFile &obj, - const object::ObjectFile &debugObj, - const RuntimeDyld::LoadedObjectInfo &L) + const object::ObjectFile &debugObj, + const RuntimeDyld::LoadedObjectInfo &L, + RTDyldMemoryManager *memmgr) #else virtual void NotifyObjectEmitted(const ObjectImage &obj) #endif @@ -340,6 +341,7 @@ class JuliaJITEventListener: public JITEventListener uint64_t SectionAddrCheck = 0; // assert that all of the Sections are at the same location uint8_t *UnwindData = NULL; #if defined(_CPU_X86_64_) + uint64_t SectionLoadOffset = 1; // The real offset shouldn't be 1. uint8_t *catchjmp = NULL; for (const object::SymbolRef &sym_iter : debugObj.symbols()) { StringRef sName; @@ -400,25 +402,37 @@ class JuliaJITEventListener: public JITEventListener assert(SectionAddrCheck == SectionLoadAddr); else SectionAddrCheck = SectionLoadAddr; +#ifdef USE_ORCJIT + if (memmgr) + SectionAddr = + (uintptr_t)lookupWriteAddressFor(memmgr, + (void*)SectionLoadAddr); +#endif + if (SectionLoadOffset != 1) + assert(SectionLoadOffset == SectionAddr - SectionLoadAddr); + else + SectionLoadOffset = SectionAddr - SectionLoadAddr; } } assert(catchjmp); assert(UnwindData); assert(SectionAddrCheck); - catchjmp[0] = 0x48; - catchjmp[1] = 0xb8; // mov RAX, QWORD PTR [&_seh_exception_handle] - *(uint64_t*)(&catchjmp[2]) = (uint64_t)&_seh_exception_handler; - catchjmp[10] = 0xff; - catchjmp[11] = 0xe0; // jmp RAX - UnwindData[0] = 0x09; // version info, UNW_FLAG_EHANDLER - UnwindData[1] = 4; // size of prolog (bytes) - UnwindData[2] = 2; // count of unwind codes (slots) - UnwindData[3] = 0x05; // frame register (rbp) = rsp - UnwindData[4] = 4; // second instruction - UnwindData[5] = 0x03; // mov RBP, RSP - UnwindData[6] = 1; // first instruction - UnwindData[7] = 0x50; // push RBP - *(DWORD*)&UnwindData[8] = (DWORD)(catchjmp - (uint8_t*)SectionAddrCheck); // relative location of catchjmp + assert(SectionLoadOffset != 1); + catchjmp[SectionLoadOffset] = 0x48; + catchjmp[SectionLoadOffset + 1] = 0xb8; // mov RAX, QWORD PTR [&_seh_exception_handle] + *(uint64_t*)(&catchjmp[SectionLoadOffset + 2]) = + (uint64_t)&_seh_exception_handler; + catchjmp[SectionLoadOffset + 10] = 0xff; + catchjmp[SectionLoadOffset + 11] = 0xe0; // jmp RAX + UnwindData[SectionLoadOffset] = 0x09; // version info, UNW_FLAG_EHANDLER + UnwindData[SectionLoadOffset + 1] = 4; // size of prolog (bytes) + UnwindData[SectionLoadOffset + 2] = 2; // count of unwind codes (slots) + UnwindData[SectionLoadOffset + 3] = 0x05; // frame register (rbp) = rsp + UnwindData[SectionLoadOffset + 4] = 4; // second instruction + UnwindData[SectionLoadOffset + 5] = 0x03; // mov RBP, RSP + UnwindData[SectionLoadOffset + 6] = 1; // first instruction + UnwindData[SectionLoadOffset + 7] = 0x50; // push RBP + *(DWORD*)&UnwindData[SectionLoadOffset + 8] = (DWORD)(catchjmp - (uint8_t*)SectionAddrCheck); // relative location of catchjmp #endif // defined(_OS_X86_64_) #endif // defined(_OS_WINDOWS_) @@ -606,9 +620,10 @@ class JuliaJITEventListener: public JITEventListener JL_DLLEXPORT void ORCNotifyObjectEmitted(JITEventListener *Listener, const object::ObjectFile &obj, const object::ObjectFile &debugObj, - const RuntimeDyld::LoadedObjectInfo &L) + const RuntimeDyld::LoadedObjectInfo &L, + RTDyldMemoryManager *memmgr) { - ((JuliaJITEventListener*)Listener)->_NotifyObjectEmitted(obj,debugObj,L); + ((JuliaJITEventListener*)Listener)->_NotifyObjectEmitted(obj,debugObj,L,memmgr); } #endif @@ -1341,28 +1356,11 @@ static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f) * ourselves to ensure the right one gets picked. */ -#include "llvm/ExecutionEngine/SectionMemoryManager.h" -class RTDyldMemoryManagerOSX : public SectionMemoryManager -{ - RTDyldMemoryManagerOSX(const RTDyldMemoryManagerOSX&) = delete; - void operator=(const RTDyldMemoryManagerOSX&) = delete; - -public: - RTDyldMemoryManagerOSX() {}; - ~RTDyldMemoryManagerOSX() override {}; - void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override; - void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override; -}; - static void (*libc_register_frame)(void*) = NULL; static void (*libc_deregister_frame)(void*) = NULL; // This implementation handles frame registration for local targets. -// Memory managers for remote targets should re-implement this function -// and use the LoadAddr parameter. -void RTDyldMemoryManagerOSX::registerEHFrames(uint8_t *Addr, - uint64_t LoadAddr, - size_t Size) +void register_eh_frames(uint8_t *Addr, size_t Size) { // On OS X OS X __register_frame takes a single FDE as an argument. // See http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-April/061768.html @@ -1376,9 +1374,7 @@ void RTDyldMemoryManagerOSX::registerEHFrames(uint8_t *Addr, }); } -void RTDyldMemoryManagerOSX::deregisterEHFrames(uint8_t *Addr, - uint64_t LoadAddr, - size_t Size) +void deregister_eh_frames(uint8_t *Addr, size_t Size) { processFDEs((char*)Addr, Size, [](const char *Entry) { if (!libc_deregister_frame) { @@ -1390,27 +1386,8 @@ void RTDyldMemoryManagerOSX::deregisterEHFrames(uint8_t *Addr, }); } -RTDyldMemoryManager* createRTDyldMemoryManagerOSX() -{ - return new RTDyldMemoryManagerOSX(); -} - -#endif - -#if defined(_OS_LINUX_) && defined(LLVM37) && defined(JL_UNW_HAS_FORMAT_IP) +#elif defined(_OS_LINUX_) && defined(LLVM37) && defined(JL_UNW_HAS_FORMAT_IP) #include -#include "llvm/ExecutionEngine/SectionMemoryManager.h" -class RTDyldMemoryManagerUnix : public SectionMemoryManager -{ - RTDyldMemoryManagerUnix(const RTDyldMemoryManagerUnix&) = delete; - void operator=(const RTDyldMemoryManagerUnix&) = delete; - -public: - RTDyldMemoryManagerUnix() {}; - ~RTDyldMemoryManagerUnix() override {}; - void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override; - void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override; -}; struct unw_table_entry { @@ -1592,9 +1569,7 @@ static DW_EH_PE parseCIE(const uint8_t *Addr, const uint8_t *End) return DW_EH_PE_absptr; } -void RTDyldMemoryManagerUnix::registerEHFrames(uint8_t *Addr, - uint64_t LoadAddr, - size_t Size) +void register_eh_frames(uint8_t *Addr, size_t Size) { #ifndef _CPU_ARM_ // System unwinder @@ -1705,7 +1680,7 @@ void RTDyldMemoryManagerUnix::registerEHFrames(uint8_t *Addr, if (start < start_ip) start_ip = start; if (end_ip < (start + size)) - end_ip = start+size; + end_ip = start + size; table[cur_entry].fde_offset = safe_trunc((intptr_t)Entry - (intptr_t)Addr); start_ips[cur_entry] = start; @@ -1725,9 +1700,7 @@ void RTDyldMemoryManagerUnix::registerEHFrames(uint8_t *Addr, _U_dyn_register(di); } -void RTDyldMemoryManagerUnix::deregisterEHFrames(uint8_t *Addr, - uint64_t LoadAddr, - size_t Size) +void deregister_eh_frames(uint8_t *Addr, size_t Size) { #ifndef _CPU_ARM_ __deregister_frame(Addr); @@ -1737,9 +1710,14 @@ void RTDyldMemoryManagerUnix::deregisterEHFrames(uint8_t *Addr, // data structures). } -RTDyldMemoryManager* createRTDyldMemoryManagerUnix() +#else + +void register_eh_frames(uint8_t *Addr, size_t Size) +{ +} + +void deregister_eh_frames(uint8_t *Addr, size_t Size) { - return new RTDyldMemoryManagerUnix(); } #endif diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 26c2f32123e38..bc594847fb1c0 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -1,5 +1,7 @@ // This file is a part of Julia. License is MIT: http://julialang.org/license +#include + // Except for parts of this file which were copied from LLVM, under the UIUC license (marked below). // this defines the set of optimization passes defined for Julia at various optimization levels @@ -131,6 +133,11 @@ static void addOptimizationPasses(T *PM) #ifdef USE_ORCJIT +#ifndef LLVM38 +void notifyObjectLoaded(RTDyldMemoryManager *memmgr, + llvm::orc::ObjectLinkingLayerBase::ObjSetHandleT H); +#endif + // ------------------------ TEMPORARILY COPIED FROM LLVM ----------------- // This must be kept in sync with gdb/gdb/jit.h . extern "C" { @@ -190,17 +197,6 @@ void NotifyDebugger(jit_code_entry *JITCodeEntry) } // ------------------------ END OF TEMPORARY COPY FROM LLVM ----------------- -#if defined(_OS_DARWIN_) && defined(LLVM37) && defined(LLVM_SHLIB) -#define CUSTOM_MEMORY_MANAGER createRTDyldMemoryManagerOSX -extern RTDyldMemoryManager *createRTDyldMemoryManagerOSX(); -#elif defined(_OS_LINUX_) && defined(LLVM37) && defined(JL_UNW_HAS_FORMAT_IP) -#define CUSTOM_MEMORY_MANAGER createRTDyldMemoryManagerUnix -extern RTDyldMemoryManager *createRTDyldMemoryManagerUnix(); -#endif -#ifndef CUSTOM_MEMORY_MANAGER -#define CUSTOM_MEMORY_MANAGER() new SectionMemoryManager -#endif - #ifdef _OS_LINUX_ // Resolve non-lock free atomic functions in the libatomic library. // This is the library that provides support for c11/c++11 atomic operations. @@ -232,6 +228,9 @@ class JuliaOJIT { void operator()(ObjectLinkingLayerBase::ObjSetHandleT H, const ObjSetT &Objects, const LoadResult &LOS) { +#ifndef LLVM38 + notifyObjectLoaded(JIT.MemMgr, H); +#endif auto oit = Objects.begin(); auto lit = LOS.begin(); for (; oit != Objects.end(); ++oit, ++lit) { @@ -263,7 +262,7 @@ class JuliaOJIT { ORCNotifyObjectEmitted(JuliaListener.get(), *Object, *SavedObjects.back().getBinary(), - *LO); + *LO, JIT.MemMgr); // record all of the exported symbols defined in this object // in the primary hash table for the enclosing JIT @@ -322,7 +321,7 @@ class JuliaOJIT { : TM(TM), DL(TM.createDataLayout()), ObjStream(ObjBufferSV), - MemMgr(CUSTOM_MEMORY_MANAGER()), + MemMgr(createRTDyldMemoryManager()), ObjectLayer(DebugObjectRegistrar(*this)), CompileLayer( ObjectLayer, @@ -389,7 +388,7 @@ class JuliaOJIT { } - ModuleHandleT addModule(std::unique_ptr M) + void addModule(std::unique_ptr M) { #ifndef NDEBUG // validate the relocations for M @@ -434,7 +433,6 @@ class JuliaOJIT { // Force LLVM to emit the module so that we can register the symbols // in our lookup table. CompileLayer.emitAndFinalize(modset); - return modset; } void removeModule(ModuleHandleT H) diff --git a/src/julia_internal.h b/src/julia_internal.h index b41a54fcee4de..c13925b82334f 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -640,6 +640,9 @@ jl_value_t *jl_lookup_match(jl_value_t *a, jl_value_t *b, jl_svec_t **penv, jl_s unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type); +void register_eh_frames(uint8_t *Addr, size_t Size); +void deregister_eh_frames(uint8_t *Addr, size_t Size); + STATIC_INLINE void *jl_get_frame_addr(void) { #ifdef __GNUC__