diff --git a/src/Makefile b/src/Makefile
index e6c8ffcc7499d..5e47426e60330 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -55,7 +55,7 @@ ifeq ($(JULIACODEGEN),LLVM)
 CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd \
 	llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering llvm-ptls \
 	llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
-	llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \
+	llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers llvm-remove-addrspaces \
 	llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures pipeline llvm_api
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
 CG_LLVM_LIBS := all
diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp
deleted file mode 100644
index c78e6092ca5db..0000000000000
--- a/src/cgmemmgr.cpp
+++ /dev/null
@@ -1,965 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#include "llvm-version.h"
-#include "platform.h"
-
-#include <llvm/ExecutionEngine/SectionMemoryManager.h>
-#include "julia.h"
-#include "julia_internal.h"
-
-#ifdef _OS_LINUX_
-#  include <sys/syscall.h>
-#  include <sys/utsname.h>
-#  include <sys/resource.h>
-#endif
-#ifndef _OS_WINDOWS_
-#  include <sys/mman.h>
-#  include <sys/stat.h>
-#  include <fcntl.h>
-#  include <unistd.h>
-#  if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
-#    define MAP_ANONYMOUS MAP_ANON
-#  endif
-#endif
-#ifdef _OS_FREEBSD_
-#  include <sys/types.h>
-#  include <sys/resource.h>
-#endif
-#ifdef _OS_OPENBSD_
-#  include <sys/resource.h>
-#endif
-#include "julia_assert.h"
-
-namespace {
-
-static size_t get_block_size(size_t size)
-{
-    return (size > jl_page_size * 256 ? LLT_ALIGN(size, jl_page_size) :
-            jl_page_size * 256);
-}
-
-// Wrapper function to mmap/munmap/mprotect pages...
-static void *map_anon_page(size_t size)
-{
-#ifdef _OS_WINDOWS_
-    char *mem = (char*)VirtualAlloc(NULL, size + jl_page_size,
-                                    MEM_COMMIT, PAGE_READWRITE);
-    assert(mem && "Cannot allocate RW memory");
-    mem = (char*)LLT_ALIGN(uintptr_t(mem), jl_page_size);
-#else // _OS_WINDOWS_
-    void *mem = mmap(nullptr, size, PROT_READ | PROT_WRITE,
-                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-    assert(mem != MAP_FAILED && "Cannot allocate RW memory");
-#endif // _OS_WINDOWS_
-    return mem;
-}
-
-static void unmap_page(void *ptr, size_t size)
-{
-#ifdef _OS_WINDOWS_
-    VirtualFree(ptr, size, MEM_DECOMMIT);
-#else // _OS_WINDOWS_
-    munmap(ptr, size);
-#endif // _OS_WINDOWS_
-}
-
-#ifdef _OS_WINDOWS_
-enum class Prot : int {
-    RW = PAGE_READWRITE,
-    RX = PAGE_EXECUTE,
-    RO = PAGE_READONLY,
-    NO = PAGE_NOACCESS
-};
-
-static void protect_page(void *ptr, size_t size, Prot flags)
-{
-    DWORD old_prot;
-    if (!VirtualProtect(ptr, size, (DWORD)flags, &old_prot)) {
-        jl_safe_printf("Cannot protect page @%p of size %u to 0x%x (err 0x%x)\n",
-                       ptr, (unsigned)size, (unsigned)flags,
-                       (unsigned)GetLastError());
-        abort();
-    }
-}
-#else // _OS_WINDOWS_
-enum class Prot : int {
-    RW = PROT_READ | PROT_WRITE,
-    RX = PROT_READ | PROT_EXEC,
-    RO = PROT_READ,
-    NO = PROT_NONE
-};
-
-static void protect_page(void *ptr, size_t size, Prot flags)
-{
-    int ret = mprotect(ptr, size, (int)flags);
-    if (ret != 0) {
-        perror(__func__);
-        abort();
-    }
-}
-
-static bool check_fd_or_close(int fd)
-{
-    if (fd == -1)
-        return false;
-    int err = fcntl(fd, F_SETFD, FD_CLOEXEC);
-    assert(err == 0);
-    (void)err; // prevent compiler warning
-    if (fchmod(fd, S_IRWXU) != 0 ||
-        ftruncate(fd, jl_page_size) != 0) {
-        close(fd);
-        return false;
-    }
-    // This can fail due to `noexec` mount option ....
-    void *ptr = mmap(nullptr, jl_page_size, PROT_READ | PROT_EXEC,
-                     MAP_SHARED, fd, 0);
-    if (ptr == MAP_FAILED) {
-        close(fd);
-        return false;
-    }
-    munmap(ptr, jl_page_size);
-    return true;
-}
-#endif // _OS_WINDOWS_
-
-static intptr_t anon_hdl = -1;
-
-#ifdef _OS_WINDOWS_
-// As far as I can tell `CreateFileMapping` cannot be resized on windows.
-// Also, creating big file mapping and then map pieces of it seems to
-// consume too much global resources. Therefore, we use each file mapping
-// as a block on windows
-static void *create_shared_map(size_t size, size_t id)
-{
-    void *addr = MapViewOfFile((HANDLE)id, FILE_MAP_ALL_ACCESS,
-                               0, 0, size);
-    assert(addr && "Cannot map RW view");
-    return addr;
-}
-
-static intptr_t init_shared_map()
-{
-    anon_hdl = 0;
-    return 0;
-}
-
-static void *alloc_shared_page(size_t size, size_t *id, bool exec)
-{
-    assert(size % jl_page_size == 0);
-    DWORD file_mode = exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
-    HANDLE hdl = CreateFileMapping(INVALID_HANDLE_VALUE, NULL,
-                                   file_mode, 0, size, NULL);
-    *id = (size_t)hdl;
-    // We set the maximum permissions for this to the maximum for this file, and then
-    // VirtualProtect, such that the debugger can still access these
-    // pages and set breakpoints if it wants to.
-    DWORD map_mode = FILE_MAP_ALL_ACCESS | (exec ? FILE_MAP_EXECUTE : 0);
-    void *addr = MapViewOfFile(hdl, map_mode, 0, 0, size);
-    assert(addr && "Cannot map RO view");
-    DWORD protect_mode = exec ? PAGE_EXECUTE_READ : PAGE_READONLY;
-    VirtualProtect(addr, size, protect_mode, &file_mode);
-    return addr;
-}
-#else // _OS_WINDOWS_
-// For shared mapped region
-static intptr_t get_anon_hdl(void)
-{
-    int fd = -1;
-
-    // Linux and FreeBSD can create an anonymous fd without touching the
-    // file system.
-#  ifdef __NR_memfd_create
-    fd = syscall(__NR_memfd_create, "julia-codegen", 0);
-    if (check_fd_or_close(fd))
-        return fd;
-#  endif
-#  ifdef _OS_FREEBSD_
-    fd = shm_open(SHM_ANON, O_RDWR, S_IRWXU);
-    if (check_fd_or_close(fd))
-        return fd;
-#  endif
-    char shm_name[JL_PATH_MAX] = "julia-codegen-0123456789-0123456789/tmp///";
-    pid_t pid = getpid();
-    // `shm_open` can't be mapped exec on mac
-#  ifndef _OS_DARWIN_
-    do {
-        snprintf(shm_name, sizeof(shm_name),
-                 "julia-codegen-%d-%d", (int)pid, rand());
-        fd = shm_open(shm_name, O_RDWR | O_CREAT | O_EXCL, S_IRWXU);
-        if (check_fd_or_close(fd)) {
-            shm_unlink(shm_name);
-            return fd;
-        }
-    } while (errno == EEXIST);
-#  endif
-    FILE *tmpf = tmpfile();
-    if (tmpf) {
-        fd = dup(fileno(tmpf));
-        fclose(tmpf);
-        if (check_fd_or_close(fd)) {
-            return fd;
-        }
-    }
-    size_t len = sizeof(shm_name);
-    if (uv_os_tmpdir(shm_name, &len) != 0) {
-        // Unknown error; default to `/tmp`
-        snprintf(shm_name, sizeof(shm_name), "/tmp");
-        len = 4;
-    }
-    snprintf(shm_name + len, sizeof(shm_name) - len,
-             "/julia-codegen-%d-XXXXXX", (int)pid);
-    fd = mkstemp(shm_name);
-    if (check_fd_or_close(fd)) {
-        unlink(shm_name);
-        return fd;
-    }
-    return -1;
-}
-
-static _Atomic(size_t) map_offset{0};
-// Multiple of 128MB.
-// Hopefully no one will set a ulimit for this to be a problem...
-static constexpr size_t map_size_inc_default = 128 * 1024 * 1024;
-static size_t map_size = 0;
-static struct _make_shared_map_lock {
-    uv_mutex_t mtx;
-    _make_shared_map_lock() {
-        uv_mutex_init(&mtx);
-    };
-} shared_map_lock;
-
-static size_t get_map_size_inc()
-{
-    rlimit rl;
-    if (getrlimit(RLIMIT_FSIZE, &rl) != -1) {
-        if (rl.rlim_cur != RLIM_INFINITY) {
-            return std::min<size_t>(map_size_inc_default, rl.rlim_cur);
-        }
-        if (rl.rlim_max != RLIM_INFINITY) {
-            return std::min<size_t>(map_size_inc_default, rl.rlim_max);
-        }
-    }
-    return map_size_inc_default;
-}
-
-static void *create_shared_map(size_t size, size_t id)
-{
-    void *addr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED,
-                      anon_hdl, id);
-    assert(addr != MAP_FAILED && "Cannot map RW view");
-    return addr;
-}
-
-static intptr_t init_shared_map()
-{
-    anon_hdl = get_anon_hdl();
-    if (anon_hdl == -1)
-        return -1;
-    jl_atomic_store_relaxed(&map_offset, 0);
-    map_size = get_map_size_inc();
-    int ret = ftruncate(anon_hdl, map_size);
-    if (ret != 0) {
-        perror(__func__);
-        abort();
-    }
-    return anon_hdl;
-}
-
-static void *alloc_shared_page(size_t size, size_t *id, bool exec)
-{
-    assert(size % jl_page_size == 0);
-    size_t off = jl_atomic_fetch_add(&map_offset, size);
-    *id = off;
-    size_t map_size_inc = get_map_size_inc();
-    if (__unlikely(off + size > map_size)) {
-        uv_mutex_lock(&shared_map_lock.mtx);
-        size_t old_size = map_size;
-        while (off + size > map_size)
-            map_size += map_size_inc;
-        if (old_size != map_size) {
-            int ret = ftruncate(anon_hdl, map_size);
-            if (ret != 0) {
-                perror(__func__);
-                abort();
-            }
-        }
-        uv_mutex_unlock(&shared_map_lock.mtx);
-    }
-    return create_shared_map(size, off);
-}
-#endif // _OS_WINDOWS_
-
-#ifdef _OS_LINUX_
-// Using `/proc/self/mem`, A.K.A. Keno's remote memory manager.
-
-ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr)
-{
-    static_assert(sizeof(off_t) >= 8, "off_t is smaller than 64bits");
-#ifdef _P64
-    const uintptr_t sign_bit = uintptr_t(1) << 63;
-    if (__unlikely(sign_bit & addr)) {
-        // This case should not happen with default kernel on 64bit since the address belongs
-        // to kernel space (linear mapping).
-        // However, it seems possible to change this at kernel compile time.
-
-        // pwrite doesn't support offset with sign bit set but lseek does.
-        // This is obviously not thread-safe but none of the mem manager does anyway...
-        // From the kernel code, `lseek` with `SEEK_SET` can't fail.
-        // However, this can possibly confuse the glibc wrapper to think that
-        // we have invalid input value. Use syscall directly to be sure.
-        syscall(SYS_lseek, (long)fd, addr, (long)SEEK_SET);
-        // The return value can be -1 when the glibc syscall function
-        // think we have an error return with and `addr` that's too large.
-        // Ignore the return value for now.
-        return write(fd, buf, nbyte);
-    }
-#endif
-    return pwrite(fd, buf, nbyte, (off_t)addr);
-}
-
-// Do not call this directly.
-// Use `get_self_mem_fd` which has a guard to call this only once.
-static int _init_self_mem()
-{
-    struct utsname kernel;
-    uname(&kernel);
-    int major, minor;
-    if (-1 == sscanf(kernel.release, "%d.%d", &major, &minor))
-        return -1;
-    // Can't risk getting a memory block backed by transparent huge pages,
-    // which cause the kernel to freeze on systems that have the DirtyCOW
-    // mitigation patch, but are < 4.10.
-    if (!(major > 4 || (major == 4 && minor >= 10)))
-        return -1;
-#ifdef O_CLOEXEC
-    int fd = open("/proc/self/mem", O_RDWR | O_SYNC | O_CLOEXEC);
-    if (fd == -1)
-        return -1;
-#else
-    int fd = open("/proc/self/mem", O_RDWR | O_SYNC);
-    if (fd == -1)
-        return -1;
-    fcntl(fd, F_SETFD, FD_CLOEXEC);
-#endif
-
-    // Check if we can write to a RX page
-    void *test_pg = mmap(nullptr, jl_page_size, PROT_READ | PROT_EXEC,
-                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-    // We can ignore this though failure to allocate executable memory would be a bigger problem.
-    assert(test_pg != MAP_FAILED && "Cannot allocate executable memory");
-
-    const uint64_t v = 0xffff000012345678u;
-    int ret = pwrite_addr(fd, (const void*)&v, sizeof(uint64_t), (uintptr_t)test_pg);
-    if (ret != sizeof(uint64_t) || *(volatile uint64_t*)test_pg != v) {
-        munmap(test_pg, jl_page_size);
-        close(fd);
-        return -1;
-    }
-    munmap(test_pg, jl_page_size);
-    return fd;
-}
-
-static int get_self_mem_fd()
-{
-    static int fd = _init_self_mem();
-    return fd;
-}
-
-static void write_self_mem(void *dest, void *ptr, size_t size)
-{
-    while (size > 0) {
-        ssize_t ret = pwrite_addr(get_self_mem_fd(), ptr, size, (uintptr_t)dest);
-        if ((size_t)ret == size)
-            return;
-        if (ret == -1 && (errno == EAGAIN || errno == EINTR))
-            continue;
-        assert((size_t)ret < size);
-        size -= ret;
-        ptr = (char*)ptr + ret;
-        dest = (char*)dest + ret;
-    }
-}
-#endif // _OS_LINUX_
-
-using namespace llvm;
-
-// Allocation strategies
-// * For RW data, no memory protection needed, use plain memory pool.
-// * For RO data or code,
-//
-//   The first allocation in the page always has write address equals to
-//   runtime address.
-//
-//   1. shared dual map
-//
-//       Map an (unlinked) anonymous file as memory pool.
-//       After first allocation, write address points to the second map.
-//       The second map is set to unreadable and unwritable in finalization.
-//
-//   2. private dual map
-//
-//       Same as above but use anonymous memory map as memory pool,
-//       and use low level OS api to set up the second map.
-//
-//   3. copying data into RO page bypassing page protection
-//
-//       After first allocation, write address points to a temporary buffer.
-//       Requires copying data out of the temporary buffer in finalization.
-
-// Allocates at least 256 pages per block and keep up to 8 blocks in the free
-// list. The block with the least free space is discarded when we need to
-// allocate a new page.
-// Unused full pages are free'd from the block before discarding so at most
-// one page is wasted on each discarded blocks. There should be at most one
-// block with more than 128 pages available so the discarded one must have
-// less than 128 pages available and therefore at least 128 pages used.
-// (Apart from fragmentation) this guarantees less than 1% of memory is wasted.
-
-// the `shared` type parameter is for Windows only....
-struct Block {
-    // runtime address
-    char *ptr{nullptr};
-    size_t total{0};
-    size_t avail{0};
-
-    Block(const Block&) = delete;
-    Block &operator=(const Block&) = delete;
-    Block(Block &&other)
-        : ptr(other.ptr),
-          total(other.total),
-          avail(other.avail)
-    {
-        other.ptr = nullptr;
-        other.total = other.avail = 0;
-    }
-
-    Block() = default;
-
-    void *alloc(size_t size, size_t align)
-    {
-        size_t aligned_avail = avail & (-align);
-        if (aligned_avail < size)
-            return nullptr;
-        char *p = ptr + total - aligned_avail;
-        avail = aligned_avail - size;
-        return p;
-    }
-    void reset(void *addr, size_t size)
-    {
-        if (avail >= jl_page_size) {
-            uintptr_t end = uintptr_t(ptr) + total;
-            uintptr_t first_free = end - avail;
-            first_free = LLT_ALIGN(first_free, jl_page_size);
-            assert(first_free < end);
-            unmap_page((void*)first_free, end - first_free);
-        }
-        ptr = (char*)addr;
-        total = avail = size;
-    }
-};
-
-class RWAllocator {
-    static constexpr int nblocks = 8;
-    Block blocks[nblocks]{};
-public:
-    void *alloc(size_t size, size_t align)
-    {
-        size_t min_size = (size_t)-1;
-        int min_id = 0;
-        for (int i = 0;i < nblocks && blocks[i].ptr;i++) {
-            if (void *ptr = blocks[i].alloc(size, align))
-                return ptr;
-            if (blocks[i].avail < min_size) {
-                min_size = blocks[i].avail;
-                min_id = i;
-            }
-        }
-        size_t block_size = get_block_size(size);
-        blocks[min_id].reset(map_anon_page(block_size), block_size);
-        return blocks[min_id].alloc(size, align);
-    }
-};
-
-struct SplitPtrBlock : public Block {
-    // Possible states
-    // Allocation:
-    // * Initial allocation: `state & InitAlloc`
-    // * Followup allocation: `(state & Alloc) && !(state & InitAlloc)`
-    enum State {
-        // This block has no page protection set yet
-        InitAlloc = (1 << 0),
-        // There is at least one allocation in this page since last finalization
-        Alloc = (1 << 1),
-        // `wr_ptr` can be directly used as write address.
-        WRInit = (1 << 2),
-        // With `WRInit` set, whether `wr_ptr` has write permission enabled.
-        WRReady = (1 << 3),
-    };
-
-    uintptr_t wr_ptr{0};
-    uint32_t state{0};
-    SplitPtrBlock() = default;
-
-    void swap(SplitPtrBlock &other)
-    {
-        std::swap(ptr, other.ptr);
-        std::swap(total, other.total);
-        std::swap(avail, other.avail);
-        std::swap(wr_ptr, other.wr_ptr);
-        std::swap(state, other.state);
-    }
-
-    SplitPtrBlock(SplitPtrBlock &&other)
-        : SplitPtrBlock()
-    {
-        swap(other);
-    }
-};
-
-struct Allocation {
-    // Address to write to (the one returned by the allocation function)
-    void *wr_addr;
-    // Runtime address
-    void *rt_addr;
-    size_t sz;
-    bool relocated;
-};
-
-template<bool exec>
-class ROAllocator {
-protected:
-    static constexpr int nblocks = 8;
-    SplitPtrBlock blocks[nblocks];
-    // Blocks that are done allocating (removed from `blocks`)
-    // but might not have all the permissions set or data copied yet.
-    SmallVector<SplitPtrBlock, 16> completed;
-    virtual void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr,
-                             size_t size, size_t align) = 0;
-    virtual SplitPtrBlock alloc_block(size_t size) = 0;
-public:
-    virtual ~ROAllocator() {}
-    virtual void finalize()
-    {
-        for (auto &alloc: allocations) {
-            // ensure the mapped pages are consistent
-            sys::Memory::InvalidateInstructionCache(alloc.wr_addr,
-                                                    alloc.sz);
-            sys::Memory::InvalidateInstructionCache(alloc.rt_addr,
-                                                    alloc.sz);
-        }
-        completed.clear();
-        allocations.clear();
-    }
-    // Allocations that have not been finalized yet.
-    SmallVector<Allocation, 16> allocations;
-    void *alloc(size_t size, size_t align)
-    {
-        size_t min_size = (size_t)-1;
-        int min_id = 0;
-        for (int i = 0;i < nblocks && blocks[i].ptr;i++) {
-            auto &block = blocks[i];
-            void *ptr = block.alloc(size, align);
-            if (ptr) {
-                void *wr_ptr;
-                if (block.state & SplitPtrBlock::InitAlloc) {
-                    wr_ptr = ptr;
-                }
-                else {
-                    wr_ptr = get_wr_ptr(block, ptr, size, align);
-                }
-                block.state |= SplitPtrBlock::Alloc;
-                allocations.push_back(Allocation{wr_ptr, ptr, size, false});
-                return wr_ptr;
-            }
-            if (block.avail < min_size) {
-                min_size = block.avail;
-                min_id = i;
-            }
-        }
-        size_t block_size = get_block_size(size);
-        auto &block = blocks[min_id];
-        auto new_block = alloc_block(block_size);
-        block.swap(new_block);
-        if (new_block.state) {
-            completed.push_back(std::move(new_block));
-        }
-        else {
-            new_block.reset(nullptr, 0);
-        }
-        void *ptr = block.alloc(size, align);
-#ifdef _OS_WINDOWS_
-        block.state = SplitPtrBlock::Alloc;
-        void *wr_ptr = get_wr_ptr(block, ptr, size, align);
-        allocations.push_back(Allocation{wr_ptr, ptr, size, false});
-        ptr = wr_ptr;
-#else
-        block.state = SplitPtrBlock::Alloc | SplitPtrBlock::InitAlloc;
-        allocations.push_back(Allocation{ptr, ptr, size, false});
-#endif
-        return ptr;
-    }
-};
-
-template<bool exec>
-class DualMapAllocator : public ROAllocator<exec> {
-protected:
-    void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr, size_t, size_t) override
-    {
-        assert((char*)rt_ptr >= block.ptr &&
-               (char*)rt_ptr < (block.ptr + block.total));
-        if (!(block.state & SplitPtrBlock::WRInit)) {
-            block.wr_ptr = (uintptr_t)create_shared_map(block.total,
-                                                        block.wr_ptr);
-            block.state |= SplitPtrBlock::WRInit;
-        }
-        if (!(block.state & SplitPtrBlock::WRReady)) {
-            protect_page((void*)block.wr_ptr, block.total, Prot::RW);
-            block.state |= SplitPtrBlock::WRReady;
-        }
-        return (char*)rt_ptr + (block.wr_ptr - uintptr_t(block.ptr));
-    }
-    SplitPtrBlock alloc_block(size_t size) override
-    {
-        SplitPtrBlock new_block;
-        // use `wr_ptr` to record the id initially
-        auto ptr = alloc_shared_page(size, (size_t*)&new_block.wr_ptr, exec);
-        new_block.reset(ptr, size);
-        return new_block;
-    }
-    void finalize_block(SplitPtrBlock &block, bool reset)
-    {
-        // This function handles setting the block to the right mode
-        // and free'ing maps that are not needed anymore.
-        // If `reset` is `true`, we won't allocate in this block anymore and
-        // we should free up resources that is not needed at runtime.
-        if (!(block.state & SplitPtrBlock::Alloc)) {
-            // A block that is not used this time, check if we need to free it.
-            if ((block.state & SplitPtrBlock::WRInit) && reset)
-                unmap_page((void*)block.wr_ptr, block.total);
-            return;
-        }
-        // For a block we used this time
-        if (block.state & SplitPtrBlock::InitAlloc) {
-            // For an initial block, we have a single RW map.
-            // Need to map it to RO or RX.
-            assert(!(block.state & (SplitPtrBlock::WRReady |
-                                    SplitPtrBlock::WRInit)));
-            protect_page(block.ptr, block.total, exec ? Prot::RX : Prot::RO);
-            block.state = 0;
-        }
-        else {
-            // For other ones, the runtime address has the correct mode.
-            // Need to map the write address to RO.
-            assert(block.state & SplitPtrBlock::WRInit);
-            assert(block.state & SplitPtrBlock::WRReady);
-            if (reset) {
-                unmap_page((void*)block.wr_ptr, block.total);
-            }
-            else {
-                protect_page((void*)block.wr_ptr, block.total, Prot::NO);
-                block.state = SplitPtrBlock::WRInit;
-            }
-        }
-    }
-public:
-    DualMapAllocator()
-    {
-        assert(anon_hdl != -1);
-    }
-    void finalize() override
-    {
-        for (auto &block : this->blocks) {
-            finalize_block(block, false);
-        }
-        for (auto &block : this->completed) {
-            finalize_block(block, true);
-            block.reset(nullptr, 0);
-        }
-        ROAllocator<exec>::finalize();
-    }
-};
-
-#ifdef _OS_LINUX_
-template<bool exec>
-class SelfMemAllocator : public ROAllocator<exec> {
-    SmallVector<Block, 16> temp_buff;
-protected:
-    void *get_wr_ptr(SplitPtrBlock &block, void *rt_ptr,
-                     size_t size, size_t align) override
-    {
-        assert(!(block.state & SplitPtrBlock::InitAlloc));
-        for (auto &wr_block: temp_buff) {
-            if (void *ptr = wr_block.alloc(size, align)) {
-                return ptr;
-            }
-        }
-        temp_buff.emplace_back();
-        Block &new_block = temp_buff.back();
-        size_t block_size = get_block_size(size);
-        new_block.reset(map_anon_page(block_size), block_size);
-        return new_block.alloc(size, align);
-    }
-    SplitPtrBlock alloc_block(size_t size) override
-    {
-        SplitPtrBlock new_block;
-        new_block.reset(map_anon_page(size), size);
-        return new_block;
-    }
-    void finalize_block(SplitPtrBlock &block, bool reset)
-    {
-        if (!(block.state & SplitPtrBlock::Alloc))
-            return;
-        if (block.state & SplitPtrBlock::InitAlloc) {
-            // for an initial block, we need to map it to ro or rx
-            assert(!(block.state & (SplitPtrBlock::WRReady |
-                                    SplitPtrBlock::WRInit)));
-            protect_page(block.ptr, block.total, exec ? Prot::RX : Prot::RO);
-            block.state = 0;
-        }
-    }
-public:
-    SelfMemAllocator()
-        : ROAllocator<exec>(),
-          temp_buff()
-    {
-        assert(get_self_mem_fd() != -1);
-    }
-    void finalize() override
-    {
-        for (auto &block : this->blocks) {
-            finalize_block(block, false);
-        }
-        for (auto &block : this->completed) {
-            finalize_block(block, true);
-            block.reset(nullptr, 0);
-        }
-        for (auto &alloc : this->allocations) {
-            if (alloc.rt_addr == alloc.wr_addr)
-                continue;
-            write_self_mem(alloc.rt_addr, alloc.wr_addr, alloc.sz);
-        }
-        // clear all the temp buffers except the first one
-        // (we expect only one)
-        bool cached = false;
-        for (auto &block : temp_buff) {
-            if (cached) {
-                munmap(block.ptr, block.total);
-                block.ptr = nullptr;
-                block.total = block.avail = 0;
-            }
-            else {
-                block.avail = block.total;
-                cached = true;
-            }
-        }
-        if (cached)
-            temp_buff.resize(1);
-        ROAllocator<exec>::finalize();
-    }
-};
-#endif // _OS_LINUX_
-
-class RTDyldMemoryManagerJL : public SectionMemoryManager {
-    struct EHFrame {
-        uint8_t *addr;
-        size_t size;
-    };
-    RTDyldMemoryManagerJL(const RTDyldMemoryManagerJL&) = delete;
-    void operator=(const RTDyldMemoryManagerJL&) = delete;
-    SmallVector<EHFrame, 16> pending_eh;
-    RWAllocator rw_alloc;
-    std::unique_ptr<ROAllocator<false>> ro_alloc;
-    std::unique_ptr<ROAllocator<true>> exe_alloc;
-    bool code_allocated;
-    size_t total_allocated;
-
-public:
-    RTDyldMemoryManagerJL()
-        : SectionMemoryManager(),
-          pending_eh(),
-          rw_alloc(),
-          ro_alloc(),
-          exe_alloc(),
-          code_allocated(false),
-          total_allocated(0)
-    {
-#ifdef _OS_LINUX_
-        if (!ro_alloc && get_self_mem_fd() != -1) {
-            ro_alloc.reset(new SelfMemAllocator<false>());
-            exe_alloc.reset(new SelfMemAllocator<true>());
-        }
-#endif
-        if (!ro_alloc && init_shared_map() != -1) {
-            ro_alloc.reset(new DualMapAllocator<false>());
-            exe_alloc.reset(new DualMapAllocator<true>());
-        }
-    }
-    ~RTDyldMemoryManagerJL() override
-    {
-    }
-    size_t getTotalBytes() { return total_allocated; }
-    void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                          size_t Size) override;
-#if 0
-    // Disable for now since we are not actually using this.
-    void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                            size_t Size) override;
-#endif
-    uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
-                                 unsigned SectionID,
-                                 StringRef SectionName) override;
-    uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                                 unsigned SectionID, StringRef SectionName,
-                                 bool isReadOnly) override;
-    using SectionMemoryManager::notifyObjectLoaded;
-    void notifyObjectLoaded(RuntimeDyld &Dyld,
-                            const object::ObjectFile &Obj) override;
-    bool finalizeMemory(std::string *ErrMsg = nullptr) override;
-    template <typename DL, typename Alloc>
-    void mapAddresses(DL &Dyld, Alloc &&allocator)
-    {
-        for (auto &alloc: allocator->allocations) {
-            if (alloc.rt_addr == alloc.wr_addr || alloc.relocated)
-                continue;
-            alloc.relocated = true;
-            Dyld.mapSectionAddress(alloc.wr_addr, (uintptr_t)alloc.rt_addr);
-        }
-    }
-    template <typename DL>
-    void mapAddresses(DL &Dyld)
-    {
-        if (!ro_alloc)
-            return;
-        mapAddresses(Dyld, ro_alloc);
-        mapAddresses(Dyld, exe_alloc);
-    }
-#ifdef _OS_WINDOWS_
-    template <typename Alloc>
-    void *lookupWriteAddressFor(void *rt_addr, Alloc &&allocator)
-    {
-        for (auto &alloc: allocator->allocations) {
-            if (alloc.rt_addr == rt_addr) {
-                return alloc.wr_addr;
-            }
-        }
-        return nullptr;
-    }
-    void *lookupWriteAddressFor(void *rt_addr)
-    {
-        if (!ro_alloc)
-            return rt_addr;
-        if (void *ptr = lookupWriteAddressFor(rt_addr, ro_alloc))
-            return ptr;
-        if (void *ptr = lookupWriteAddressFor(rt_addr, exe_alloc))
-            return ptr;
-        return rt_addr;
-    }
-#endif // _OS_WINDOWS_
-};
-
-uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size,
-                                                    unsigned Alignment,
-                                                    unsigned SectionID,
-                                                    StringRef SectionName)
-{
-    // allocating more than one code section can confuse libunwind.
-#if !defined(_COMPILER_MSAN_ENABLED_) && !defined(_COMPILER_ASAN_ENABLED_)
-    // TODO: Figure out why msan and now asan too need this.
-    assert(!code_allocated);
-    code_allocated = true;
-#endif
-    total_allocated += Size;
-    jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
-    jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, Size);
-    if (exe_alloc)
-        return (uint8_t*)exe_alloc->alloc(Size, Alignment);
-    return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID,
-                                                     SectionName);
-}
-
-uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size,
-                                                    unsigned Alignment,
-                                                    unsigned SectionID,
-                                                    StringRef SectionName,
-                                                    bool isReadOnly)
-{
-    total_allocated += Size;
-    jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, Size);
-    jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, Size);
-    if (!isReadOnly)
-        return (uint8_t*)rw_alloc.alloc(Size, Alignment);
-    if (ro_alloc)
-        return (uint8_t*)ro_alloc->alloc(Size, Alignment);
-    return SectionMemoryManager::allocateDataSection(Size, Alignment, SectionID,
-                                                     SectionName, isReadOnly);
-}
-
-void RTDyldMemoryManagerJL::notifyObjectLoaded(RuntimeDyld &Dyld,
-                                               const object::ObjectFile &Obj)
-{
-    if (!ro_alloc) {
-        assert(!exe_alloc);
-        SectionMemoryManager::notifyObjectLoaded(Dyld, Obj);
-        return;
-    }
-    assert(exe_alloc);
-    mapAddresses(Dyld);
-}
-
-bool RTDyldMemoryManagerJL::finalizeMemory(std::string *ErrMsg)
-{
-    code_allocated = false;
-    if (ro_alloc) {
-        ro_alloc->finalize();
-        assert(exe_alloc);
-        exe_alloc->finalize();
-        for (auto &frame: pending_eh)
-            register_eh_frames(frame.addr, frame.size);
-        pending_eh.clear();
-        return false;
-    }
-    else {
-        assert(!exe_alloc);
-        return SectionMemoryManager::finalizeMemory(ErrMsg);
-    }
-}
-
-void RTDyldMemoryManagerJL::registerEHFrames(uint8_t *Addr,
-                                             uint64_t LoadAddr,
-                                             size_t Size)
-{
-    if (uintptr_t(Addr) == LoadAddr) {
-        register_eh_frames(Addr, Size);
-    }
-    else {
-        pending_eh.push_back(EHFrame{(uint8_t*)(uintptr_t)LoadAddr, Size});
-    }
-}
-
-#if 0
-void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr,
-                                               uint64_t LoadAddr,
-                                               size_t Size)
-{
-    deregister_eh_frames((uint8_t*)LoadAddr, Size);
-}
-#endif
-
-}
-
-#ifdef _OS_WINDOWS_
-void *lookupWriteAddressFor(RTDyldMemoryManager *memmgr, void *rt_addr)
-{
-    return ((RTDyldMemoryManagerJL*)memmgr)->lookupWriteAddressFor(rt_addr);
-}
-#endif
-
-RTDyldMemoryManager* createRTDyldMemoryManager()
-{
-    return new RTDyldMemoryManagerJL();
-}
-
-size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm)
-{
-    return ((RTDyldMemoryManagerJL*)mm)->getTotalBytes();
-}
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 65ea216e19dd1..03a46077f1ac6 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -10145,7 +10145,7 @@ static void init_jit_functions(void)
 }
 
 #ifdef JL_USE_INTEL_JITEVENTS
-char jl_using_intel_jitevents; // Non-zero if running under Intel VTune Amplifier
+char jl_using_intel_jitevents = 0; // Non-zero if running under Intel VTune Amplifier
 #endif
 
 #ifdef JL_USE_OPROFILE_JITEVENTS
@@ -10243,9 +10243,6 @@ extern "C" void jl_init_llvm(void)
 #if defined(JL_USE_INTEL_JITEVENTS) || \
     defined(JL_USE_OPROFILE_JITEVENTS) || \
     defined(JL_USE_PERF_JITEVENTS)
-#ifdef JL_USE_JITLINK
-#pragma message("JIT profiling support (JL_USE_*_JITEVENTS) not yet available on platforms that use JITLink")
-#else
     const char *jit_profiling = getenv("ENABLE_JITPROFILING");
 
 #if defined(JL_USE_INTEL_JITEVENTS)
@@ -10262,24 +10259,23 @@ extern "C" void jl_init_llvm(void)
 
 #if defined(JL_USE_PERF_JITEVENTS)
     if (jit_profiling && atoi(jit_profiling)) {
-        jl_using_perf_jitevents= 1;
+        jl_using_perf_jitevents = 1;
     }
 #endif
 
 #ifdef JL_USE_INTEL_JITEVENTS
     if (jl_using_intel_jitevents)
-        jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createIntelJITEventListener());
+        jl_ExecutionEngine->enableIntelJITEventListener();
 #endif
 
 #ifdef JL_USE_OPROFILE_JITEVENTS
     if (jl_using_oprofile_jitevents)
-        jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createOProfileJITEventListener());
+        jl_ExecutionEngine->enableOProfileJITEventListener();
 #endif
 
 #ifdef JL_USE_PERF_JITEVENTS
     if (jl_using_perf_jitevents)
-        jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createPerfJITEventListener());
-#endif
+        jl_ExecutionEngine->enablePerfJITEventListener();
 #endif
 #endif
 
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index b32f211c1ad8c..763f20506a1d8 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -14,6 +14,11 @@
 #include <llvm/ExecutionEngine/Orc/ExecutionUtils.h>
 #include <llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h>
 #include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h>
+#if JL_LLVM_VERSION >= 180000
+#include <llvm/ExecutionEngine/Orc/Debugging/DebugInfoSupport.h>
+#include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h>
+#include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h>
+#endif
 #include <llvm/ExecutionEngine/Orc/ExecutorProcessControl.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Support/DynamicLibrary.h>
@@ -138,11 +143,11 @@ void jl_dump_llvm_opt_impl(void *s)
     **jl_ExecutionEngine->get_dump_llvm_opt_stream() = (ios_t*)s;
 }
 
-static int jl_add_to_ee(
-        orc::ThreadSafeModule &M,
-        const StringMap<orc::ThreadSafeModule*> &NewExports,
-        DenseMap<orc::ThreadSafeModule*, int> &Queued,
-        SmallVectorImpl<orc::ThreadSafeModule*> &Stack) JL_NOTSAFEPOINT;
+//static int jl_add_to_ee(
+//        orc::ThreadSafeModule &M,
+//        const StringMap<orc::ThreadSafeModule*> &NewExports,
+//        DenseMap<orc::ThreadSafeModule*, int> &Queued,
+//        SmallVectorImpl<orc::ThreadSafeModule*> &Stack) JL_NOTSAFEPOINT;
 static void jl_decorate_module(Module &M) JL_NOTSAFEPOINT;
 static uint64_t getAddressForFunction(StringRef fname) JL_NOTSAFEPOINT;
 
@@ -237,7 +242,8 @@ static jl_callptr_t _jl_compile_codeinst(
         if (params.imaging_mode) {
             // Won't contain any PLT/dlsym calls, so no need to optimize those
             jl_ExecutionEngine->addModule(jl_get_globals_module(params.tsctx, params.DL, params.TargetTriple, params.global_targets));
-        } else {
+        }
+        else {
             StringMap<void*> NewGlobals;
             for (auto &global : params.global_targets) {
                 NewGlobals[global.second->getName()] = global.first;
@@ -253,31 +259,10 @@ static jl_callptr_t _jl_compile_codeinst(
             }
         }
 
-        // Collect the exported functions from the params.compiled_functions modules,
-        // which form dependencies on which functions need to be
-        // compiled first. Cycles of functions are compiled together.
-        // (essentially we compile a DAG of SCCs in reverse topological order,
-        // if we treat declarations of external functions as edges from declaration
-        // to definition)
-        StringMap<orc::ThreadSafeModule*> NewExports;
-        for (auto &def : params.compiled_functions) {
-            orc::ThreadSafeModule &TSM = std::get<0>(def.second);
-            //The underlying context object is still locked because params is not destroyed yet
-            auto M = TSM.getModuleUnlocked();
-            jl_ExecutionEngine->optimizeDLSyms(*M);
-            for (auto &F : M->global_objects()) {
-                if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-                    NewExports[F.getName()] = &TSM;
-                }
-            }
-        }
-        DenseMap<orc::ThreadSafeModule*, int> Queued;
-        SmallVector<orc::ThreadSafeModule*, 0> Stack;
         for (auto &def : params.compiled_functions) {
             // Add the results to the execution engine now
             orc::ThreadSafeModule &M = std::get<0>(def.second);
-            jl_add_to_ee(M, NewExports, Queued, Stack);
-            assert(Queued.empty() && Stack.empty() && !M);
+            jl_ExecutionEngine->addModule(std::move(M));
         }
         ++CompiledCodeinsts;
         MaxWorkqueueSize.updateMax(params.compiled_functions.size());
@@ -585,48 +570,6 @@ static auto countBasicBlocks(const Function &F) JL_NOTSAFEPOINT
 
 static constexpr size_t N_optlevels = 4;
 
-static Expected<orc::ThreadSafeModule> validateExternRelocations(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
-#if !defined(JL_NDEBUG) && !defined(JL_USE_JITLINK)
-    auto isIntrinsicFunction = [](GlobalObject &GO) JL_NOTSAFEPOINT {
-        auto F = dyn_cast<Function>(&GO);
-        if (!F)
-            return false;
-        return F->isIntrinsic() || F->getName().startswith("julia.");
-    };
-    // validate the relocations for M (only for RuntimeDyld, JITLink performs its own symbol validation)
-    auto Err = TSM.withModuleDo([isIntrinsicFunction](Module &M) JL_NOTSAFEPOINT {
-        Error Err = Error::success();
-        for (auto &GO : make_early_inc_range(M.global_objects())) {
-            if (!GO.isDeclarationForLinker())
-                continue;
-            if (GO.use_empty()) {
-                GO.eraseFromParent();
-                continue;
-            }
-            if (isIntrinsicFunction(GO))
-                continue;
-            auto sym = jl_ExecutionEngine->findUnmangledSymbol(GO.getName());
-            if (sym)
-                continue;
-            // TODO have we ever run into this check? It's been guaranteed to not
-            // fire in an assert build, since previously LLVM would abort due to
-            // not handling the error if we didn't find the unmangled symbol
-            if (SectionMemoryManager::getSymbolAddressInProcess(
-                            jl_ExecutionEngine->getMangledName(GO.getName()))) {
-                consumeError(sym.takeError());
-                continue;
-            }
-            Err = joinErrors(std::move(Err), sym.takeError());
-        }
-        return Err;
-    });
-    if (Err) {
-        return std::move(Err);
-    }
-#endif
-    return std::move(TSM);
-}
-
 static Expected<orc::ThreadSafeModule> selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
     TSM.withModuleDo([](Module &M) {
         size_t opt_level = std::max(static_cast<int>(jl_options.opt_level), 0);
@@ -657,18 +600,6 @@ static Expected<orc::ThreadSafeModule> selectOptLevel(orc::ThreadSafeModule TSM,
     return std::move(TSM);
 }
 
-static void recordDebugTSM(orc::MaterializationResponsibility &, orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT {
-    auto ptr = TSM.withModuleDo([](Module &M) JL_NOTSAFEPOINT {
-        auto md = M.getModuleFlag("julia.__jit_debug_tsm_addr");
-        if (!md)
-            return static_cast<orc::ThreadSafeModule *>(nullptr);
-        return reinterpret_cast<orc::ThreadSafeModule *>(cast<ConstantInt>(cast<ConstantAsMetadata>(md)->getValue())->getZExtValue());
-    });
-    if (ptr) {
-        *ptr = std::move(TSM);
-    }
-}
-
 void jl_register_jit_object(const object::ObjectFile &debugObj,
                             std::function<uint64_t(const StringRef &)> getLoadAddress,
                             std::function<void *(void *)> lookupWriteAddress);
@@ -923,112 +854,6 @@ class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar {
     }
 };
 
-RTDyldMemoryManager* createRTDyldMemoryManager(void);
-
-// A simple forwarding class, since OrcJIT v2 needs a unique_ptr, while we have a shared_ptr
-class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
-private:
-    std::shared_ptr<RuntimeDyld::MemoryManager> MemMgr;
-
-public:
-    ForwardingMemoryManager(std::shared_ptr<RuntimeDyld::MemoryManager> MemMgr) : MemMgr(MemMgr) {}
-    virtual ~ForwardingMemoryManager() = default;
-    virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
-                                     unsigned SectionID,
-                                     StringRef SectionName) override {
-        return MemMgr->allocateCodeSection(Size, Alignment, SectionID, SectionName);
-    }
-    virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                                     unsigned SectionID,
-                                     StringRef SectionName,
-                                     bool IsReadOnly) override {
-        return MemMgr->allocateDataSection(Size, Alignment, SectionID, SectionName, IsReadOnly);
-    }
-#if JL_LLVM_VERSION >= 160000
-    virtual void reserveAllocationSpace(uintptr_t CodeSize, Align CodeAlign,
-                                        uintptr_t RODataSize, Align RODataAlign,
-                                        uintptr_t RWDataSize, Align RWDataAlign) override {
-        return MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, RWDataSize, RWDataAlign);
-    }
-#else
-    virtual void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
-                                        uintptr_t RODataSize,
-                                        uint32_t RODataAlign,
-                                        uintptr_t RWDataSize,
-                                        uint32_t RWDataAlign) override {
-        return MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, RWDataSize, RWDataAlign);
-    }
-#endif
-    virtual bool needsToReserveAllocationSpace() override {
-        return MemMgr->needsToReserveAllocationSpace();
-    }
-    virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
-                                  size_t Size) override {
-        return MemMgr->registerEHFrames(Addr, LoadAddr, Size);
-    }
-    virtual void deregisterEHFrames() override {
-        return MemMgr->deregisterEHFrames();
-    }
-    virtual bool finalizeMemory(std::string *ErrMsg = nullptr) override {
-        return MemMgr->finalizeMemory(ErrMsg);
-    }
-    virtual void notifyObjectLoaded(RuntimeDyld &RTDyld,
-                                    const object::ObjectFile &Obj) override {
-        return MemMgr->notifyObjectLoaded(RTDyld, Obj);
-    }
-};
-
-
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-void *lookupWriteAddressFor(RTDyldMemoryManager *MemMgr, void *rt_addr);
-#endif
-
-void registerRTDyldJITObject(const object::ObjectFile &Object,
-                             const RuntimeDyld::LoadedObjectInfo &L,
-                             const std::shared_ptr<RTDyldMemoryManager> &MemMgr)
-{
-    auto SavedObject = L.getObjectForDebug(Object).takeBinary();
-    // If the debug object is unavailable, save (a copy of) the original object
-    // for our backtraces.
-    // This copy seems unfortunate, but there doesn't seem to be a way to take
-    // ownership of the original buffer.
-    if (!SavedObject.first) {
-        auto NewBuffer =
-            MemoryBuffer::getMemBufferCopy(Object.getData(), Object.getFileName());
-        auto NewObj =
-            cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef()));
-        SavedObject = std::make_pair(std::move(NewObj), std::move(NewBuffer));
-    }
-    const object::ObjectFile *DebugObj = SavedObject.first.release();
-    SavedObject.second.release();
-
-    StringMap<object::SectionRef> loadedSections;
-    // Use the original Object, not the DebugObject, as this is used for the
-    // RuntimeDyld::LoadedObjectInfo lookup.
-    for (const object::SectionRef &lSection : Object.sections()) {
-        auto sName = lSection.getName();
-        if (sName) {
-            bool inserted = loadedSections.insert(std::make_pair(*sName, lSection)).second;
-            assert(inserted);
-            (void)inserted;
-        }
-    }
-    auto getLoadAddress = [loadedSections = std::move(loadedSections),
-                           &L](const StringRef &sName) -> uint64_t {
-        auto search = loadedSections.find(sName);
-        if (search == loadedSections.end())
-            return 0;
-        return L.getSectionLoadAddress(search->second);
-    };
-
-    jl_register_jit_object(*DebugObj, getLoadAddress,
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-        [MemMgr](void *p) { return lookupWriteAddressFor(MemMgr.get(), p); }
-#else
-        nullptr
-#endif
-    );
-}
 namespace {
     static std::unique_ptr<TargetMachine> createTargetMachine() JL_NOTSAFEPOINT {
         TargetOptions options = TargetOptions();
@@ -1597,29 +1422,15 @@ JuliaOJIT::JuliaOJIT()
         #endif
         return orc::ThreadSafeContext(std::move(ctx));
     }),
-#ifdef JL_USE_JITLINK
     MemMgr(createJITLinkMemoryManager()),
     ObjectLayer(ES, *MemMgr),
-#else
-    MemMgr(createRTDyldMemoryManager()),
-    ObjectLayer(
-            ES,
-            [this]() {
-                std::unique_ptr<RuntimeDyld::MemoryManager> result(new ForwardingMemoryManager(MemMgr));
-                return result;
-            }
-        ),
-#endif
-    LockLayer(ObjectLayer),
-    CompileLayer(ES, LockLayer, std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)),
+    CompileLayer(ES, ObjectLayer, std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)),
     JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(SharedBytes, RLST_mutex))),
     OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT<N_optlevels>(*TM, PrintLLVMTimers, llvm_printing_mutex))),
     OptSelLayer(ES, OptimizeLayer, orc::IRTransformLayer::TransformFunction(selectOptLevel)),
-    DepsVerifyLayer(ES, OptSelLayer, orc::IRTransformLayer::TransformFunction(validateExternRelocations)),
-    ExternalCompileLayer(ES, LockLayer,
+    ExternalCompileLayer(ES, ObjectLayer,
         std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM))
 {
-#ifdef JL_USE_JITLINK
 # if defined(LLVM_SHLIB)
     // When dynamically linking against LLVM, use our custom EH frame registration code
     // also used with RTDyld to inform both our and the libc copy of libunwind.
@@ -1632,15 +1443,6 @@ JuliaOJIT::JuliaOJIT()
 
     ObjectLayer.addPlugin(std::make_unique<JLDebuginfoPlugin>());
     ObjectLayer.addPlugin(std::make_unique<JLMemoryUsagePlugin>(total_size));
-#else
-    ObjectLayer.setNotifyLoaded(
-        [this](orc::MaterializationResponsibility &MR,
-               const object::ObjectFile &Object,
-               const RuntimeDyld::LoadedObjectInfo &LO) {
-            registerRTDyldJITObject(Object, LO, MemMgr);
-        });
-#endif
-    CompileLayer.setNotifyCompiled(recordDebugTSM);
 
     std::string ErrorStr;
 
@@ -1801,51 +1603,11 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
 {
     JL_TIMING(LLVM_JIT, JIT_Total);
     ++ModulesAdded;
-    orc::SymbolLookupSet NewExports;
-    orc::ThreadSafeModule CurrentlyCompiling;
-    TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
-        for (auto &F : M.global_values()) {
-            if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-                auto Name = ES.intern(getMangledName(F.getName()));
-                NewExports.add(std::move(Name));
-            }
-        }
-        assert(!verifyLLVMIR(M));
-        auto jit_debug_tsm_addr = ConstantInt::get(Type::getIntNTy(M.getContext(), sizeof(void*) * CHAR_BIT), (uintptr_t) &CurrentlyCompiling);
-        M.addModuleFlag(Module::Error, "julia.__jit_debug_tsm_addr", jit_debug_tsm_addr);
-    });
 
-    // TODO: what is the performance characteristics of this?
-    auto Err = DepsVerifyLayer.add(JD, std::move(TSM));
+    auto Err = OptSelLayer.add(JD, std::move(TSM));
     if (Err) {
         ES.reportError(std::move(Err));
         errs() << "Failed to add module to JIT!\n";
-        if (CurrentlyCompiling) {
-            CurrentlyCompiling.withModuleDo([](Module &M) JL_NOTSAFEPOINT { errs() << "Dumping failing module\n" << M << "\n"; });
-        } else {
-            errs() << "Module unavailable to be printed\n";
-        }
-        abort();
-    }
-    // force eager compilation (for now), due to memory management specifics
-    // (can't handle compilation recursion)
-    auto Lookups = ES.lookup({{&JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly}}, NewExports);
-    if (!Lookups) {
-        ES.reportError(Lookups.takeError());
-        errs() << "Failed to lookup symbols in module!\n";
-        if (CurrentlyCompiling) {
-            CurrentlyCompiling.withModuleDo([](Module &M) JL_NOTSAFEPOINT { errs() << "Dumping failing module\n" << M << "\n"; });
-        } else {
-            errs() << "Module unavailable to be printed\n";
-        }
-    }
-    for (auto &Sym : *Lookups) {
-        #if JL_LLVM_VERSION >= 170000
-        assert(Sym.second.getAddress());
-        #else
-        assert(Sym.second);
-        #endif
-        (void) Sym;
     }
 }
 
@@ -1870,7 +1632,7 @@ Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM,
 
 Error JuliaOJIT::addObjectFile(orc::JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
     assert(Obj && "Can not add null object");
-    return LockLayer.add(JD.getDefaultResourceTracker(), std::move(Obj));
+    return ObjectLayer.add(JD.getDefaultResourceTracker(), std::move(Obj));
 }
 
 #if JL_LLVM_VERSION >= 170000
@@ -1989,43 +1751,69 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *cod
     return *fname;
 }
 
-
-#ifdef JL_USE_JITLINK
-extern "C" orc::shared::CWrapperFunctionResult
-llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size);
+#if JL_LLVM_VERSION >= 170000
+#define addAbsoluteToMap(map,name) \
+    (GDBFunctions[mangle(#name)] = {ExecutorAddr::fromPtr(&name), JITSymbolFlags::Exported | JITSymbolFlags::Callable}, orc::ExecutorAddr::fromPtr(&name))
+#else
+#define addAbsoluteToMap(map,name) \
+    (GDBFunctions[mangle(#name)] = JITEvaluatedSymbol::fromPointer(&name, JITSymbolFlags::Exported | JITSymbolFlags::Callable), orc::ExecutorAddr::fromPtr(&name))
+#endif
 
 void JuliaOJIT::enableJITDebuggingSupport()
 {
     orc::SymbolMap GDBFunctions;
-    #if JL_LLVM_VERSION >= 170000
-    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBAllocAction")] = {ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBAllocAction), JITSymbolFlags::Exported | JITSymbolFlags::Callable};
-    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBWrapper")] = {ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBWrapper), JITSymbolFlags::Exported | JITSymbolFlags::Callable};
-    #else
-    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBAllocAction")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBAllocAction, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
-    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBWrapper")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBWrapper, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
-    #endif
+    addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBAllocAction);
+    auto registerJITLoaderGDBWrapper = addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBWrapper);
     cantFail(JD.define(orc::absoluteSymbols(GDBFunctions)));
     if (TM->getTargetTriple().isOSBinFormatMachO())
         ObjectLayer.addPlugin(cantFail(orc::GDBJITDebugInfoRegistrationPlugin::Create(ES, JD, TM->getTargetTriple())));
 #ifndef _COMPILER_ASAN_ENABLED_ // TODO: Fix duplicated sections spam #51794
     else if (TM->getTargetTriple().isOSBinFormatELF())
         //EPCDebugObjectRegistrar doesn't take a JITDylib, so we have to directly provide the call address
-        ObjectLayer.addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(ES, std::make_unique<orc::EPCDebugObjectRegistrar>(ES, orc::ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBWrapper))));
+        ObjectLayer.addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(ES, std::make_unique<orc::EPCDebugObjectRegistrar>(ES, registerJITLoaderGDBWrapper)));
 #endif
 }
-#else
-void JuliaOJIT::enableJITDebuggingSupport()
+
+void JuliaOJIT::enableIntelJITEventListener()
 {
-    RegisterJITEventListener(JITEventListener::createGDBRegistrationListener());
+#if JL_LLVM_VERSION >= 180000
+    if (TT.isOSBinFormatELF()) {
+        orc::SymbolMap VTuneFunctions;
+        auto RegisterImplAddr = addAbsoluteToMap(VTuneFunctions,llvm_orc_registerVTuneImpl);
+        auto UnregisterImplAddr = addAbsoluteToMap(VTuneFunctions,llvm_orc_unregisterVTuneImpl);
+        ObjLayer.addPlugin(cantFail(DebugInfoPreservationPlugin::Create()));
+        //ObjLayer.addPlugin(cantFail(VTuneSupportPlugin::Create(ES.getExecutorProcessControl(),
+        //                           JD, /*EmitDebugInfo=*/true,
+        //                           /*TestMode=*/false)));
+        bool EmitDebugInfo = true;
+        ObjLayer.addPlugin(std::make_unique<VTuneSupportPlugin>(
+            ES.getExecutorProcessControl(), RegisterImplAddr, UnregisterImplAddr, EmitDebugInfo));
+    }
+#endif
 }
 
-void JuliaOJIT::RegisterJITEventListener(JITEventListener *L)
+void JuliaOJIT::enableOProfileJITEventListener()
 {
-    if (!L)
-        return;
-    this->ObjectLayer.registerJITEventListener(*L);
 }
+
+void JuliaOJIT::enablePerfJITEventListener()
+{
+#if JL_LLVM_VERSION >= 180000
+    orc::SymbolMap PerfFunctions;
+    auto StartAddr = addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderPerfStart);
+    auto EndAddr = addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderPerfEnd);
+    auto ImplAddr = addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderPerfImpl);
+    cantFail(JD.define(orc::absoluteSymbols(PerfFunctions)));
+    if (TM->getTargetTriple().isOSBinFormatELF()) {
+        ObjLayer.addPlugin(cantFail(DebugInfoPreservationPlugin::Create()));
+        //ObjLayer.addPlugin(cantFail(PerfSupportPlugin::Create(
+        //    ES.getExecutorProcessControl(), *JD, true, true)));
+        bool EmitDebugInfo = true, EmitUnwindInfo = true;
+        ObjLayer.addPlugin(std::make_unique<PerfSupportPlugin>(
+            ES.getExecutorProcessControl(), StartAddr, EndAddr, ImplAddr, EmitDebugInfo, EmitUnwindInfo));
+    }
 #endif
+}
 
 const DataLayout& JuliaOJIT::getDataLayout() const
 {
@@ -2044,19 +1832,10 @@ std::string JuliaOJIT::getMangledName(const GlobalValue *GV)
     return getMangledName(GV->getName());
 }
 
-#ifdef JL_USE_JITLINK
 size_t JuliaOJIT::getTotalBytes() const
 {
     return total_size.load(std::memory_order_relaxed);
 }
-#else
-size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT;
-
-size_t JuliaOJIT::getTotalBytes() const
-{
-    return getRTDyldMemoryManagerTotalBytes(MemMgr.get());
-}
-#endif
 
 void JuliaOJIT::printTimers()
 {
@@ -2258,72 +2037,6 @@ static void jl_decorate_module(Module &M) {
     }
 }
 
-// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
-static int jl_add_to_ee(
-        orc::ThreadSafeModule &M,
-        const StringMap<orc::ThreadSafeModule*> &NewExports,
-        DenseMap<orc::ThreadSafeModule*, int> &Queued,
-        SmallVectorImpl<orc::ThreadSafeModule*> &Stack)
-{
-    // First check if the TSM is empty (already compiled)
-    if (!M)
-        return 0;
-    // Next check and record if it is on the stack somewhere
-    {
-        auto &Id = Queued[&M];
-        if (Id)
-            return Id;
-        Stack.push_back(&M);
-        Id = Stack.size();
-    }
-    // Finally work out the SCC
-    int depth = Stack.size();
-    int MergeUp = depth;
-    SmallVector<orc::ThreadSafeModule*, 0> Children;
-    M.withModuleDo([&](Module &m) JL_NOTSAFEPOINT {
-        for (auto &F : m.global_objects()) {
-            if (F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-                auto Callee = NewExports.find(F.getName());
-                if (Callee != NewExports.end()) {
-                    auto *CM = Callee->second;
-                    if (*CM && CM != &M) {
-                        auto Down = Queued.find(CM);
-                        if (Down != Queued.end())
-                            MergeUp = std::min(MergeUp, Down->second);
-                        else
-                            Children.push_back(CM);
-                    }
-                }
-            }
-        }
-    });
-    assert(MergeUp > 0);
-    for (auto *CM : Children) {
-        int Down = jl_add_to_ee(*CM, NewExports, Queued, Stack);
-        assert(Down <= (int)Stack.size());
-        if (Down)
-            MergeUp = std::min(MergeUp, Down);
-    }
-    if (MergeUp < depth)
-        return MergeUp;
-    while (1) {
-        // Not in a cycle (or at the top of it)
-        // remove SCC state and merge every CM from the cycle into M
-        orc::ThreadSafeModule *CM = Stack.back();
-        auto it = Queued.find(CM);
-        assert(it->second == (int)Stack.size());
-        Queued.erase(it);
-        Stack.pop_back();
-        if ((int)Stack.size() < depth) {
-            assert(&M == CM);
-            break;
-        }
-        jl_merge_module(M, std::move(*CM));
-    }
-    jl_ExecutionEngine->addModule(std::move(M));
-    return 0;
-}
-
 static uint64_t getAddressForFunction(StringRef fname)
 {
     auto addr = jl_ExecutionEngine->getFunctionAddress(fname);
diff --git a/src/jitlayers.h b/src/jitlayers.h
index 4f05db50f1388..d025fdd85cebb 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -29,37 +29,12 @@
 #include <stack>
 #include <queue>
 
-
-// As of LLVM 13, there are two runtime JIT linker implementations, the older
-// RuntimeDyld (used via orc::RTDyldObjectLinkingLayer) and the newer JITLink
-// (used via orc::ObjectLinkingLayer).
-//
-// JITLink is not only more flexible (which isn't of great importance for us, as
-// we do only single-threaded in-process codegen), but crucially supports using
-// the Small code model, where the linker needs to fix up relocations between
-// object files that end up far apart in address space. RuntimeDyld can't do
-// that and relies on the Large code model instead, which is broken on
-// aarch64-darwin (macOS on ARM64), and not likely to ever be supported there
-// (see https://bugs.llvm.org/show_bug.cgi?id=52029).
-//
-// However, JITLink is a relatively young library and lags behind in platform
-// and feature support (e.g. Windows, JITEventListeners for various profilers,
-// etc.). Thus, we currently only use JITLink where absolutely required, that is,
-// for Mac/aarch64 and Linux/aarch64.
-// #define JL_FORCE_JITLINK
-
 #if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_)
 # define HAS_SANITIZER
 #endif
 // The sanitizers don't play well with our memory manager
 
-#if defined(JL_FORCE_JITLINK) || defined(_CPU_AARCH64_) || defined(HAS_SANITIZER)
-# define JL_USE_JITLINK
-#endif
-
 # include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
-# include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
-# include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
 
 using namespace llvm;
 
@@ -354,33 +329,11 @@ using SharedBytesT = StringSet<MaxAlignedAllocImpl<sizeof(StringSet<>::MapEntryT
 
 class JuliaOJIT {
 public:
-#ifdef JL_USE_JITLINK
     typedef orc::ObjectLinkingLayer ObjLayerT;
-#else
-    typedef orc::RTDyldObjectLinkingLayer ObjLayerT;
-#endif
-    struct LockLayerT : public orc::ObjectLayer {
-
-        LockLayerT(orc::ObjectLayer &BaseLayer) JL_NOTSAFEPOINT : orc::ObjectLayer(BaseLayer.getExecutionSession()), BaseLayer(BaseLayer) {}
-        ~LockLayerT() JL_NOTSAFEPOINT = default;
-
-        void emit(std::unique_ptr<orc::MaterializationResponsibility> R,
-                            std::unique_ptr<MemoryBuffer> O) override {
-            JL_TIMING(LLVM_JIT, JIT_Link);
-#ifndef JL_USE_JITLINK
-            std::lock_guard<std::mutex> lock(EmissionMutex);
-#endif
-            BaseLayer.emit(std::move(R), std::move(O));
-        }
-    private:
-        orc::ObjectLayer &BaseLayer;
-        std::mutex EmissionMutex;
-    };
     typedef orc::IRCompileLayer CompileLayerT;
     typedef orc::IRTransformLayer JITPointersLayerT;
     typedef orc::IRTransformLayer OptimizeLayerT;
     typedef orc::IRTransformLayer OptSelLayerT;
-    typedef orc::IRTransformLayer DepsVerifyLayerT;
     typedef object::OwningBinary<object::ObjectFile> OwningObj;
     template
     <typename ResourceT, size_t max = 0,
@@ -495,21 +448,15 @@ class JuliaOJIT {
 
     struct DLSymOptimizer;
 
-private:
-    // Custom object emission notification handler for the JuliaOJIT
-    template <typename ObjT, typename LoadResult>
-    void registerObject(const ObjT &Obj, const LoadResult &LO);
-
 public:
 
     JuliaOJIT() JL_NOTSAFEPOINT;
     ~JuliaOJIT() JL_NOTSAFEPOINT;
 
     void enableJITDebuggingSupport() JL_NOTSAFEPOINT;
-#ifndef JL_USE_JITLINK
-    // JITLink doesn't support old JITEventListeners (yet).
-    void RegisterJITEventListener(JITEventListener *L) JL_NOTSAFEPOINT;
-#endif
+    void enableIntelJITEventListener() JL_NOTSAFEPOINT;
+    void enableOProfileJITEventListener() JL_NOTSAFEPOINT;
+    void enablePerfJITEventListener() JL_NOTSAFEPOINT;
 
     orc::SymbolStringPtr mangle(StringRef Name) JL_NOTSAFEPOINT;
     void addGlobalMapping(StringRef Name, uint64_t Addr) JL_NOTSAFEPOINT;
@@ -601,19 +548,13 @@ class JuliaOJIT {
 
     ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPool;
 
-#ifndef JL_USE_JITLINK
-    const std::shared_ptr<RTDyldMemoryManager> MemMgr;
-#else
     std::atomic<size_t> total_size{0};
     const std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr;
-#endif
     ObjLayerT ObjectLayer;
-    LockLayerT LockLayer;
     CompileLayerT CompileLayer;
     JITPointersLayerT JITPointersLayer;
     OptimizeLayerT OptimizeLayer;
     OptSelLayerT OptSelLayer;
-    DepsVerifyLayerT DepsVerifyLayer;
     CompileLayerT ExternalCompileLayer;
 };
 extern JuliaOJIT *jl_ExecutionEngine;