diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8383e8d91..22962462b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -119,18 +119,28 @@ jobs: system-processor: arm triple: arm-linux-gnueabihf rtld: ld-linux-armhf.so.3 + ld-flavour: lld - name: arm64 system-processor: aarch64 triple: aarch64-linux-gnu rtld: ld-linux-aarch64.so.1 + ld-flavour: lld - name: ppc64el system-processor: powerpc64le triple: powerpc64le-linux-gnu rtld: ld64.so.2 + ld-flavour: lld + - name: riscv64 + system-processor: riscv64 + triple: riscv64-linux-gnu + rtld: ld-linux-riscv64-lp64d.so.1 + extra-packages: binutils-riscv64-linux-gnu + ld-flavour: bfd + ld: /usr/bin/riscv64-linux-gnu-ld.bfd # Don't abort runners if a single one fails fail-fast: false runs-on: ubuntu-latest - name: Cross-build for ${{ matrix.arch.triple }} + name: ${{matrix.build-type}} cross-build for ${{ matrix.arch.triple }} steps: - uses: actions/checkout@v2 - name: Install cross-compile toolchain and QEMU @@ -141,6 +151,7 @@ jobs: sudo add-apt-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" sudo apt update sudo apt install libstdc++-9-dev-${{ matrix.arch.name }}-cross qemu-user ninja-build clang-13 lld-13 + sudo apt install ${{matrix.arch.extra-packages}} # The default PowerPC qemu configuration uses the wrong page size. # Wrap it in a script that fixes this. sudo update-binfmts --disable qemu-ppc64le @@ -161,6 +172,8 @@ jobs: -DSNMALLOC_QEMU_WORKAROUND=ON -DSNMALLOC_STATIC_LIBRARY=OFF -DCMAKE_TOOLCHAIN_FILE=ci/Toolchain.cmake + -DSNMALLOC_LINKER=${{matrix.arch.ld}} + -DSNMALLOC_LINKER_FLAVOUR=${{matrix.arch.ld-flavour}} - name: Build working-directory: ${{github.workspace}}/build run: NINJA_STATUS="%p [%f:%s/%t] %o/s, %es" ninja diff --git a/CMakeLists.txt b/CMakeLists.txt index 534331e7c..b10635e41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -245,10 +245,21 @@ if(NOT SNMALLOC_HEADER_ONLY_LIBRARY) set(${result} ${dirlist} PARENT_SCOPE) endfunction() - set(CMAKE_REQUIRED_LINK_OPTIONS -fuse-ld=lld) - check_cxx_source_compiles("int main() { return 1; }" LLD_WORKS) - if (LLD_WORKS) - message(STATUS "Using LLD to link snmalloc shims") + if(NOT (DEFINED SNMALLOC_LINKER_FLAVOUR) OR ("${SNMALLOC_LINKER_FLAVOUR}" MATCHES "^$")) + # Linker not specified externally; probe to see if we can make lld work + set(CMAKE_REQUIRED_LINK_OPTIONS -fuse-ld=lld) + check_cxx_source_compiles("int main() { return 1; }" LLD_WORKS) + if (LLD_WORKS) + message(STATUS "Using LLD to link snmalloc shims") + endif() + elseif(SNMALLOC_LINKER_FLAVOUR STREQUAL "lld") + # Linker specified externally to be lld; assume it works and that the flags + # have also been set for us + set(LLD_WORKS TRUE) + else() + # Linker specified externally as something other than lld; presume it + # doesn't work and don't add its flags, below + set(LLD_WORKS FALSE) endif() function(add_shim name type) diff --git a/ci/Toolchain.cmake b/ci/Toolchain.cmake index e3e041c83..2b5613c47 100644 --- a/ci/Toolchain.cmake +++ b/ci/Toolchain.cmake @@ -7,7 +7,11 @@ set(CMAKE_C_COMPILER clang-13) set(CMAKE_C_COMPILER_TARGET ${triple}) set(CMAKE_CXX_COMPILER clang++-13) set(CMAKE_CXX_COMPILER_TARGET ${triple}) -set(CROSS_LINKER_FLAGS "-fuse-ld=lld -Wl,--dynamic-linker=/usr/${triple}/lib/$ENV{RTLD_NAME},-rpath,/usr/${triple}/lib") + +set(CROSS_LINKER_FLAGS "-fuse-ld=${SNMALLOC_LINKER_FLAVOUR} -Wl,--dynamic-linker=/usr/${triple}/lib/$ENV{RTLD_NAME},-rpath,/usr/${triple}/lib") +if((DEFINED SNMALLOC_LINKER) AND NOT ("${SNMALLOC_LINKER}" MATCHES "^$")) + string(APPEND CROSS_LINKER_FLAGS " --ld-path=${SNMALLOC_LINKER}") +endif() set(CMAKE_EXE_LINKER_FLAGS ${CROSS_LINKER_FLAGS}) set(CMAKE_SHARED_LINKER_FLAGS ${CROSS_LINKER_FLAGS}) set(CMAKE_MODULE_LINKER_FLAGS ${CROSS_LINKER_FLAGS}) diff --git a/src/aal/aal.h b/src/aal/aal.h index 7e2af3755..fcff4dc80 100644 --- a/src/aal/aal.h +++ b/src/aal/aal.h @@ -32,6 +32,10 @@ # define PLATFORM_IS_SPARC #endif +#if defined(__riscv) +# define PLATFORM_IS_RISCV +#endif + namespace snmalloc { /** @@ -195,27 +199,15 @@ namespace snmalloc static SNMALLOC_FAST_PATH CapPtr capptr_bound(CapPtr a, size_t size) noexcept { - // Impose constraints on bounds annotations. - static_assert(BIn::spatial >= capptr::dimension::Spatial::Chunk); - static_assert(capptr_is_spatial_refinement()); + static_assert( + BIn::spatial > capptr::dimension::Spatial::Alloc, + "Refusing to re-bound Spatial::Alloc CapPtr"); + static_assert( + capptr::is_spatial_refinement(), + "capptr_bound must preserve non-spatial CapPtr dimensions"); UNUSED(size); - return CapPtr(a.template as_static().unsafe_capptr); - } - - /** - * For architectures which do not enforce StrictProvenance, there's nothing - * to be done, so just return the pointer unmodified with new annotation. - */ - template< - typename T, - SNMALLOC_CONCEPT(capptr::ConceptBound) BOut, - SNMALLOC_CONCEPT(capptr::ConceptBound) BIn> - static SNMALLOC_FAST_PATH CapPtr - capptr_rebound(CapPtr a, CapPtr r) noexcept - { - UNUSED(a); - return CapPtr(r.unsafe_capptr); + return CapPtr(a.template as_static().unsafe_ptr()); } }; } // namespace snmalloc @@ -230,11 +222,21 @@ namespace snmalloc # include "aal_powerpc.h" #elif defined(PLATFORM_IS_SPARC) # include "aal_sparc.h" +#elif defined(PLATFORM_IS_RISCV) +# include "aal_riscv.h" +#endif + +#if defined(__CHERI_PURE_CAPABILITY__) +# include "aal_cheri.h" #endif namespace snmalloc { +#if defined(__CHERI_PURE_CAPABILITY__) + using Aal = AAL_Generic>; +#else using Aal = AAL_Generic>; +#endif template constexpr static bool aal_supports = (AAL::aal_features & F) == F; diff --git a/src/aal/aal_cheri.h b/src/aal/aal_cheri.h new file mode 100644 index 000000000..866e0ad14 --- /dev/null +++ b/src/aal/aal_cheri.h @@ -0,0 +1,52 @@ +#pragma once + +#include "../ds/defines.h" + +#include + +namespace snmalloc +{ + /** + * A mixin AAL that applies CHERI to a `Base` architecture. Gives + * architectural teeth to the capptr_bound primitive. + */ + template + class AAL_CHERI : public Base + { + public: + /** + * CHERI pointers are not integers and come with strict provenance + * requirements. + */ + static constexpr uint64_t aal_features = + (Base::aal_features & ~IntegerPointers) | StrictProvenance; + + /** + * On CHERI-aware compilers, ptraddr_t is an integral type that is wide + * enough to hold any address that may be contained within a memory + * capability. It does not carry provenance: it is not a capability, but + * merely an address. + */ + typedef ptraddr_t address_t; + + template< + typename T, + SNMALLOC_CONCEPT(capptr::ConceptBound) BOut, + SNMALLOC_CONCEPT(capptr::ConceptBound) BIn, + typename U = T> + static SNMALLOC_FAST_PATH CapPtr + capptr_bound(CapPtr a, size_t size) noexcept + { + static_assert( + BIn::spatial > capptr::dimension::Spatial::Alloc, + "Refusing to re-bound Spatial::Alloc CapPtr"); + static_assert( + capptr::is_spatial_refinement(), + "capptr_bound must preserve non-spatial CapPtr dimensions"); + SNMALLOC_ASSERT(__builtin_cheri_tag_get(a.unsafe_ptr())); + + void* pb = __builtin_cheri_bounds_set_exact(a.unsafe_ptr(), size); + return CapPtr(static_cast(pb)); + } + }; +} // namespace snmalloc diff --git a/src/aal/aal_concept.h b/src/aal/aal_concept.h index 2a14a94b4..15eed19bc 100644 --- a/src/aal/aal_concept.h +++ b/src/aal/aal_concept.h @@ -53,12 +53,6 @@ namespace snmalloc { AAL::template capptr_bound(auth, sz) } noexcept -> ConceptSame>; - - /** - * Construct a copy of auth with its target set to that of ret. - */ - { AAL::capptr_rebound(auth, ret) } noexcept - -> ConceptSame>; }; template diff --git a/src/aal/aal_consts.h b/src/aal/aal_consts.h index 24b31ff73..8990a41df 100644 --- a/src/aal/aal_consts.h +++ b/src/aal/aal_consts.h @@ -33,5 +33,6 @@ namespace snmalloc X86, X86_SGX, Sparc, + RISCV }; } // namespace snmalloc diff --git a/src/aal/aal_riscv.h b/src/aal/aal_riscv.h new file mode 100644 index 000000000..2d2f7a4f1 --- /dev/null +++ b/src/aal/aal_riscv.h @@ -0,0 +1,54 @@ +#pragma once + +#if __riscv_xlen == 64 +# define SNMALLOC_VA_BITS_64 +#elif __riscv_xlen == 32 +# define SNMALLOC_VA_BITS_32 +#endif + +namespace snmalloc +{ + /** + * RISC-V architecture layer, phrased as generically as possible. Specific + * implementations may need to adjust some of these. + */ + class AAL_RISCV + { + public: + static constexpr uint64_t aal_features = IntegerPointers; + + static constexpr size_t smallest_page_size = 0x1000; + + static constexpr AalName aal_name = RISCV; + + static void inline pause() + { + /* + * The "Zihintpause" extension claims to be the right thing to do here, + * and it is expected to be used in analogous places, e.g., Linux's + * cpu_relax(), but... + * + * its specification is somewhat unusual, in that it talks about the rate + * at which a HART's instructions retire rather than the rate at which + * they are dispatched (Intel's PAUSE instruction explicitly promises + * that it "de-pipelines" the spin-wait loop, for example) or anything + * about memory semantics (Intel's PAUSE docs talk about a possible + * memory order violation and pipeline flush upon loop exit). + * + * we don't yet have examples of what implementations have done. + * + * it's not yet understood by C frontends or assembler, meaning we'd have + * to spell it out by hand, as + * __asm__ volatile(".byte 0xF; .byte 0x0; .byte 0x0; .byte 0x1"); + * + * All told, we just leave this function empty for the moment. The good + * news is that, if and when we do add a PAUSE, the instruction is encoded + * by stealing some dead space of the FENCE instruction and so should be + * available everywhere even if it doesn't do anything on a particular + * microarchitecture. + */ + } + }; + + using AAL_Arch = AAL_RISCV; +} diff --git a/src/backend/address_space.h b/src/backend/address_space.h index e197e7709..02c48f0b5 100644 --- a/src/backend/address_space.h +++ b/src/backend/address_space.h @@ -53,12 +53,10 @@ namespace snmalloc SNMALLOC_ASSERT(size >= sizeof(void*)); /* - * For sufficiently large allocations with platforms that support - * aligned allocations and architectures that don't require - * StrictProvenance, try asking the platform first. + * For sufficiently large allocations with platforms that support aligned + * allocations, try asking the platform directly. */ - if constexpr ( - pal_supports && !aal_supports) + if constexpr (pal_supports) { if (size >= PAL::minimum_alloc_size) { diff --git a/src/backend/address_space_core.h b/src/backend/address_space_core.h index d3088c0fe..f8b725162 100644 --- a/src/backend/address_space_core.h +++ b/src/backend/address_space_core.h @@ -20,6 +20,16 @@ namespace snmalloc * * It cannot unreserve memory, so this does not require the * usual complexity of a buddy allocator. + * + * TODO: This manages pieces of memory smaller than (1U << MIN_CHUNK_BITS) to + * source Metaslab and LocalCache objects. On CHERI, where ASLR and guard + * pages are not needed, it may be worth switching to a design where we + * bootstrap allocators with at least two embedded Metaslab-s that can be used + * to construct slabs for LocalCache and, of course, additional Metaslab + * objects. That would let us stop splitting memory below that threshold + * here, and may reduce address space fragmentation or address space committed + * to Metaslab objects in perpetuity; it could also make {set,get}_next less + * scary. */ class AddressSpaceManagerCore { @@ -77,13 +87,16 @@ namespace snmalloc { if (align_bits >= MIN_CHUNK_BITS) { - // The pagemap stores MetaEntrys, abuse the metaslab field to be the + // The pagemap stores `MetaEntry`s; abuse the metaslab field to be the // next block in the stack of blocks. // // The pagemap entries here have nullptr (i.e., fake_large_remote) as // their remote, and so other accesses to the pagemap (by // external_pointer, for example) will not attempt to follow this // "Metaslab" pointer. + // + // dealloc() can reject attempts to free such MetaEntry-s due to the + // zero sizeclass. MetaEntry t(reinterpret_cast(next.unsafe_ptr()), nullptr, 0); Pagemap::set_metaentry(local_state, address_cast(base), 1, t); return; @@ -112,7 +125,7 @@ namespace snmalloc const MetaEntry& t = Pagemap::template get_metaentry( local_state, address_cast(base)); return capptr::Chunk( - reinterpret_cast(t.get_metaslab())); + reinterpret_cast(t.get_metaslab_no_remote())); } return base->next; diff --git a/src/backend/backend.h b/src/backend/backend.h index 36328f462..1cc9c0bd9 100644 --- a/src/backend/backend.h +++ b/src/backend/backend.h @@ -87,6 +87,8 @@ namespace snmalloc return {p, nullptr}; } + meta->meta_common.chunk = p; + MetaEntry t(meta, remote, sizeclass); Pagemap::set_metaentry(local_state, address_cast(p), size, t); return {p, meta}; diff --git a/src/backend/pagemap.h b/src/backend/pagemap.h index 9c770f051..fe1f9e76d 100644 --- a/src/backend/pagemap.h +++ b/src/backend/pagemap.h @@ -294,7 +294,7 @@ namespace snmalloc void set(address_t p, T t) { #ifdef SNMALLOC_TRACING - std::cout << "Pagemap.Set " << (void*)p << std::endl; + std::cout << "Pagemap.Set " << (void*)(uintptr_t)p << std::endl; #endif if constexpr (has_bounds) { diff --git a/src/ds/address.h b/src/ds/address.h index 90b0bb456..13c6ed848 100644 --- a/src/ds/address.h +++ b/src/ds/address.h @@ -8,11 +8,8 @@ namespace snmalloc { /** - * The type used for an address. Currently, all addresses are assumed to be - * provenance-carrying values and so it is possible to cast back from the - * result of arithmetic on an address_t. Eventually, this will want to be - * separated into two types, one for raw addresses and one for addresses that - * can be cast back to pointers. + * The type used for an address. On CHERI, this is not a provenance-carrying + * value and so cannot be converted back to a pointer. */ using address_t = Aal::address_t; diff --git a/src/ds/ptrwrap.h b/src/ds/ptrwrap.h index 6a0b024e1..8b91a01bb 100644 --- a/src/ds/ptrwrap.h +++ b/src/ds/ptrwrap.h @@ -48,7 +48,7 @@ namespace snmalloc /** * On some platforms (e.g., CHERI), pointers can be checked to see whether * they authorize control of the address space. See the PAL's - * capptr_export(). + * capptr_to_user_address_control(). */ enum class AddressSpaceControl { @@ -188,47 +188,58 @@ namespace snmalloc */ using AllocWild = Alloc::with_wildness; } // namespace bounds - } // namespace capptr - /** - * Determine whether BI is a spatial refinement of BO. - * Chunk and ChunkD are considered eqivalent here. - */ - template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BI, - SNMALLOC_CONCEPT(capptr::ConceptBound) BO> - SNMALLOC_CONSTEVAL bool capptr_is_spatial_refinement() - { - if (BI::address_space_control != BO::address_space_control) - { - return false; - } + /** + * Compute the AddressSpaceControl::User variant of a capptr::bound + * annotation. This is used by the PAL's capptr_to_user_address_control + * function to compute its return value's annotation. + */ + template + using user_address_control_type = + typename B::template with_address_space_control< + dimension::AddressSpaceControl::User>; - if (BI::wildness != BO::wildness) + /** + * Determine whether BI is a spatial refinement of BO. + * Chunk and ChunkD are considered eqivalent here. + */ + template< + SNMALLOC_CONCEPT(capptr::ConceptBound) BI, + SNMALLOC_CONCEPT(capptr::ConceptBound) BO> + SNMALLOC_CONSTEVAL bool is_spatial_refinement() { - return false; - } + if (BI::address_space_control != BO::address_space_control) + { + return false; + } - switch (BI::spatial) - { - using namespace capptr::dimension; - case Spatial::Chunk: - return true; + if (BI::wildness != BO::wildness) + { + return false; + } - case Spatial::Alloc: - return BO::spatial == Spatial::Alloc; + switch (BI::spatial) + { + using namespace capptr::dimension; + case Spatial::Chunk: + return true; + + case Spatial::Alloc: + return BO::spatial == Spatial::Alloc; + } } - } + } // namespace capptr /** * A pointer annotated with a "phantom type parameter" carrying a static * summary of its StrictProvenance metadata. */ template - struct CapPtr + class CapPtr { T* unsafe_capptr; + public: /** * nullptr is implicitly constructable at any bounds type */ @@ -348,7 +359,7 @@ namespace snmalloc inline SNMALLOC_FAST_PATH capptr::Alloc capptr_chunk_is_alloc(capptr::ChunkUser p) { - return capptr::Alloc(p.unsafe_capptr); + return capptr::Alloc(p.unsafe_ptr()); } /** @@ -358,7 +369,17 @@ namespace snmalloc */ inline SNMALLOC_FAST_PATH void* capptr_reveal(capptr::Alloc p) { - return p.unsafe_capptr; + return p.unsafe_ptr(); + } + + /** + * Like capptr_reveal, but sometimes we do mean to reveal wild pointers + * (specifically in external_pointer, where we're revealing something + * architecturally derived from a user pointer). + */ + inline SNMALLOC_FAST_PATH void* capptr_reveal_wild(capptr::AllocWild p) + { + return p.unsafe_ptr(); } /** @@ -383,7 +404,7 @@ namespace snmalloc return CapPtr< T, typename B::template with_wildness>( - p.unsafe_capptr); + p.unsafe_ptr()); } /** @@ -396,10 +417,11 @@ namespace snmalloc * will expose or consume only CapPtr with the same bounds annotation. */ template - struct AtomicCapPtr + class AtomicCapPtr { std::atomic unsafe_capptr; + public: /** * nullptr is constructable at any bounds type */ @@ -438,7 +460,7 @@ namespace snmalloc CapPtr desired, std::memory_order order = std::memory_order_seq_cst) noexcept { - this->unsafe_capptr.store(desired.unsafe_capptr, order); + this->unsafe_capptr.store(desired.unsafe_ptr(), order); } SNMALLOC_FAST_PATH CapPtr exchange( @@ -446,7 +468,7 @@ namespace snmalloc std::memory_order order = std::memory_order_seq_cst) noexcept { return CapPtr( - this->unsafe_capptr.exchange(desired.unsafe_capptr, order)); + this->unsafe_capptr.exchange(desired.unsafe_ptr(), order)); } SNMALLOC_FAST_PATH bool operator==(const AtomicCapPtr& rhs) const diff --git a/src/mem/corealloc.h b/src/mem/corealloc.h index 0105df0f5..b0fc28eed 100644 --- a/src/mem/corealloc.h +++ b/src/mem/corealloc.h @@ -176,7 +176,7 @@ namespace snmalloc * - Allocating stub in the message queue * Note this is not performance critical as very infrequently called. */ - void* small_alloc_one(size_t size) + capptr::Alloc small_alloc_one(size_t size) { SNMALLOC_ASSERT(attached_cache != nullptr); auto domesticate = @@ -285,7 +285,8 @@ namespace snmalloc [local_state](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { return capptr_domesticate(local_state, p); }; - void* p = finish_alloc_no_zero(fl.take(key, domesticate), sizeclass); + capptr::Alloc p = + finish_alloc_no_zero(fl.take(key, domesticate), sizeclass); #ifdef SNMALLOC_CHECK_CLIENT // Check free list is well-formed on platforms with @@ -320,12 +321,16 @@ namespace snmalloc // have the whole chunk. auto start_of_slab = pointer_align_down( p, snmalloc::sizeclass_to_slab_size(sizeclass)); - // TODO Add bounds correctly here - chunk_record->chunk = capptr::Chunk(start_of_slab); + + SNMALLOC_ASSERT( + address_cast(start_of_slab) == + address_cast(chunk_record->meta_common.chunk)); #ifdef SNMALLOC_TRACING - std::cout << "Slab " << start_of_slab << " is unused, Object sizeclass " - << sizeclass << std::endl; + std::cout << "Slab " << start_of_slab.unsafe_ptr() + << " is unused, Object sizeclass " << sizeclass << std::endl; +#else + UNUSED(start_of_slab); #endif return chunk_record; } @@ -654,7 +659,7 @@ namespace snmalloc } template - SNMALLOC_SLOW_PATH void* + SNMALLOC_SLOW_PATH capptr::Alloc small_alloc(sizeclass_t sizeclass, freelist::Iter<>& fast_free_list) { size_t rsize = sizeclass_to_size(sizeclass); @@ -716,7 +721,7 @@ namespace snmalloc } template - SNMALLOC_SLOW_PATH void* small_alloc_slow( + SNMALLOC_SLOW_PATH capptr::Alloc small_alloc_slow( sizeclass_t sizeclass, freelist::Iter<>& fast_free_list, size_t rsize) { // No existing free list get a new slab. diff --git a/src/mem/freelist.h b/src/mem/freelist.h index 7b6e3ca5e..4640534ef 100644 --- a/src/mem/freelist.h +++ b/src/mem/freelist.h @@ -44,7 +44,7 @@ namespace snmalloc /** * This function is used to sign back pointers in the free list. */ - inline static uintptr_t + inline static address_t signed_prev(address_t curr, address_t next, const FreeListKey& key) { auto c = curr; diff --git a/src/mem/localalloc.h b/src/mem/localalloc.h index b90cca2d4..43423af9d 100644 --- a/src/mem/localalloc.h +++ b/src/mem/localalloc.h @@ -167,7 +167,7 @@ namespace snmalloc * passed to the core allocator. */ template - SNMALLOC_SLOW_PATH void* alloc_not_small(size_t size) + SNMALLOC_SLOW_PATH capptr::Alloc alloc_not_small(size_t size) { if (size == 0) { @@ -203,12 +203,12 @@ namespace snmalloc chunk.unsafe_ptr(), size); } - return chunk.unsafe_ptr(); + return capptr_chunk_is_alloc(capptr_to_user_address_control(chunk)); }); } template - SNMALLOC_FAST_PATH void* small_alloc(size_t size) + SNMALLOC_FAST_PATH capptr::Alloc small_alloc(size_t size) { // SNMALLOC_ASSUME(size <= sizeclass_to_size(NUM_SIZECLASSES)); auto domesticate = [this](freelist::QueuePtr p) @@ -276,8 +276,10 @@ namespace snmalloc return; } - // Recheck what kind of dealloc we should do incase, the allocator we - // get from lazy_init is the originating allocator. + // Recheck what kind of dealloc we should do in case the allocator we get + // from lazy_init is the originating allocator. (TODO: but note that this + // can't suddenly become a large deallocation; the only distinction is + // between being ours to handle and something to post to a Remote.) lazy_init( [&](CoreAlloc*, CapPtr p) { dealloc(p.unsafe_ptr()); // TODO don't double count statistics @@ -430,11 +432,10 @@ namespace snmalloc { // Small allocations are more likely. Improve // branch prediction by placing this case first. - return small_alloc(size); + return capptr_reveal(small_alloc(size)); } - // TODO capptr_reveal? - return alloc_not_small(size); + return capptr_reveal(alloc_not_small(size)); #endif } @@ -508,31 +509,37 @@ namespace snmalloc // Large deallocation or null. if (likely(p_tame != nullptr)) { + size_t entry_sizeclass = entry.get_sizeclass(); + // Check this is managed by this pagemap. - check_client(entry.get_sizeclass() != 0, "Not allocated by snmalloc."); + // + // TODO: Should this be tested even in the !CHECK_CLIENT case? Things + // go fairly pear-shaped, with the ASM's ranges[] getting cross-linked + // with a ChunkAllocator's chunk_stack[0], which seems bad. + check_client(entry_sizeclass != 0, "Not allocated by snmalloc."); - size_t size = bits::one_at_bit(entry.get_sizeclass()); + size_t size = bits::one_at_bit(entry_sizeclass); + size_t slab_sizeclass = + metaentry_chunk_sizeclass_to_slab_sizeclass(entry_sizeclass); // Check for start of allocation. check_client( pointer_align_down(p_tame, size) == p_tame, "Not start of an allocation."); - size_t slab_sizeclass = large_size_to_chunk_sizeclass(size); # ifdef SNMALLOC_TRACING std::cout << "Large deallocation: " << size << " chunk sizeclass: " << slab_sizeclass << std::endl; +# else + UNUSED(size); # endif - ChunkRecord* slab_record = - reinterpret_cast(entry.get_metaslab()); - /* - * StrictProvenance TODO: this is a subversive amplification. p_tame is - * tame but Alloc-bounded, but we're coercing it to Chunk-bounded. We - * should, instead, not be storing ->chunk here, but should be keeping - * a CapPtr to this region internally even while it's - * allocated. - */ - slab_record->chunk = capptr::Chunk(p_tame.unsafe_ptr()); + + auto slab_record = + static_cast(entry.get_metaslab_no_remote()); + + SNMALLOC_ASSERT( + address_cast(slab_record->meta_common.chunk) == address_cast(p_tame)); + check_init( []( CoreAlloc* core_alloc, @@ -592,9 +599,10 @@ namespace snmalloc // be implicit domestication through the `SharedStateHandle::Pagemap` or // we could just leave well enough alone. - // Note that this should return 0 for nullptr. + // Note that alloc_size should return 0 for nullptr. // Other than nullptr, we know the system will be initialised as it must // be called with something we have already allocated. + // // To handle this case we require the uninitialised pagemap contain an // entry for the first chunk of memory, that states it represents a // large object, so we can pull the check for null off the fast path. @@ -628,26 +636,28 @@ namespace snmalloc // be implicit domestication through the `SharedStateHandle::Pagemap` or // we could just leave well enough alone. - // TODO bring back the CHERI bits. Wes to review if required. + capptr::AllocWild p = capptr_from_client(p_raw); + MetaEntry entry = SharedStateHandle::Pagemap::template get_metaentry( - core_alloc->backend_state_ptr(), address_cast(p_raw)); + core_alloc->backend_state_ptr(), address_cast(p)); auto sizeclass = entry.get_sizeclass(); if (likely(entry.get_remote() != SharedStateHandle::fake_large_remote)) { auto rsize = sizeclass_to_size(sizeclass); - auto offset = - address_cast(p_raw) & (sizeclass_to_slab_size(sizeclass) - 1); + auto offset = address_cast(p) & (sizeclass_to_slab_size(sizeclass) - 1); auto start_offset = round_by_sizeclass(sizeclass, offset); if constexpr (location == Start) { UNUSED(rsize); - return pointer_offset(p_raw, start_offset - offset); + return capptr_reveal_wild(pointer_offset(p, start_offset - offset)); } else if constexpr (location == End) - return pointer_offset(p_raw, rsize + start_offset - offset - 1); + return capptr_reveal_wild( + pointer_offset(p, rsize + start_offset - offset - 1)); else - return pointer_offset(p_raw, rsize + start_offset - offset); + return capptr_reveal_wild( + pointer_offset(p, rsize + start_offset - offset)); } // Sizeclass zero of a large allocation is used for not managed by us. @@ -655,13 +665,13 @@ namespace snmalloc { // This is a large allocation, find start by masking. auto rsize = bits::one_at_bit(sizeclass); - auto start = pointer_align_down(p_raw, rsize); + auto start = pointer_align_down(p, rsize); if constexpr (location == Start) - return start; + return capptr_reveal_wild(start); else if constexpr (location == End) - return pointer_offset(start, rsize - 1); + return capptr_reveal_wild(pointer_offset(start, rsize - 1)); else - return pointer_offset(start, rsize); + return capptr_reveal_wild(pointer_offset(start, rsize)); } #else UNUSED(p_raw); diff --git a/src/mem/localcache.h b/src/mem/localcache.h index 1ee55a866..b7c2be79a 100644 --- a/src/mem/localcache.h +++ b/src/mem/localcache.h @@ -12,25 +12,24 @@ namespace snmalloc { using Stats = AllocStats; - inline static SNMALLOC_FAST_PATH void* + inline static SNMALLOC_FAST_PATH capptr::Alloc finish_alloc_no_zero(freelist::HeadPtr p, sizeclass_t sizeclass) { SNMALLOC_ASSERT(Metaslab::is_start_of_object(sizeclass, address_cast(p))); UNUSED(sizeclass); - auto r = capptr_reveal(p.as_void()); - - return r; + return p.as_void(); } template - inline static SNMALLOC_FAST_PATH void* + inline static SNMALLOC_FAST_PATH capptr::Alloc finish_alloc(freelist::HeadPtr p, sizeclass_t sizeclass) { auto r = finish_alloc_no_zero(p, sizeclass); if constexpr (zero_mem == YesZero) - SharedStateHandle::Pal::zero(r, sizeclass_to_size(sizeclass)); + SharedStateHandle::Pal::zero( + r.unsafe_ptr(), sizeclass_to_size(sizeclass)); // TODO: Should this be zeroing the free Object state, in the non-zeroing // case? @@ -105,7 +104,7 @@ namespace snmalloc typename SharedStateHandle, typename Slowpath, typename Domesticator> - SNMALLOC_FAST_PATH void* + SNMALLOC_FAST_PATH capptr::Alloc alloc(Domesticator domesticate, size_t size, Slowpath slowpath) { auto& key = entropy.get_free_list_key(); diff --git a/src/mem/metaslab.h b/src/mem/metaslab.h index 614dd00a4..c68e95b79 100644 --- a/src/mem/metaslab.h +++ b/src/mem/metaslab.h @@ -9,13 +9,24 @@ namespace snmalloc { - class Slab; + /** + * A guaranteed type-stable sub-structure of all metadata referenced by the + * Pagemap. Use-specific structures (Metaslab, ChunkRecord) are expected to + * have this at offset zero so that, even in the face of concurrent mutation + * and reuse of the memory backing that metadata, the types of these fields + * remain fixed. + */ + struct MetaCommon + { + capptr::Chunk chunk; + }; // The Metaslab represent the status of a single slab. - // This can be either a short or a standard slab. class alignas(CACHELINE_SIZE) Metaslab { public: + MetaCommon meta_common; + // Used to link metaslabs together in various other data-structures. Metaslab* next{nullptr}; @@ -184,14 +195,29 @@ namespace snmalloc } }; - struct RemoteAllocator; + static_assert(std::is_standard_layout_v); + static_assert( + offsetof(Metaslab, meta_common) == 0, + "ChunkRecord and Metaslab must share a common prefix"); /** * Entry stored in the pagemap. */ class MetaEntry { - Metaslab* meta{nullptr}; + Metaslab* meta{nullptr}; // may also be ChunkRecord* + + /** + * A bit-packed pointer to the owning allocator (if any), and the sizeclass + * of this chunk. The sizeclass here is itself a union between two cases: + * + * * log_2(size), at least MIN_CHUNK_BITS, for large allocations. + * + * * a value in [0, NUM_SIZECLASSES] for small allocations. These may be + * directly passed to the sizeclass (not slab_sizeclass) functions of + * sizeclasstable.h + * + */ uintptr_t remote_and_sizeclass{0}; public: @@ -203,10 +229,12 @@ namespace snmalloc * the second argument of this must always be the return value from * `get_remote_and_sizeclass`. */ + SNMALLOC_FAST_PATH MetaEntry(Metaslab* meta, uintptr_t remote_and_sizeclass) : meta(meta), remote_and_sizeclass(remote_and_sizeclass) {} + SNMALLOC_FAST_PATH MetaEntry(Metaslab* meta, RemoteAllocator* remote, sizeclass_t sizeclass) : meta(meta) { @@ -215,8 +243,24 @@ namespace snmalloc pointer_offset(reinterpret_cast(remote), sizeclass); } - [[nodiscard]] Metaslab* get_metaslab() const + /** + * Return the Metaslab field as a void*, guarded by an assert that there is + * no remote that owns this chunk. + */ + [[nodiscard]] SNMALLOC_FAST_PATH void* get_metaslab_no_remote() const + { + SNMALLOC_ASSERT(get_remote() == nullptr); + return static_cast(meta); + } + + /** + * Return the Metaslab metadata associated with this chunk, guarded by an + * assert that this chunk is being used as a slab (i.e., has an associated + * owning allocator). + */ + [[nodiscard]] SNMALLOC_FAST_PATH Metaslab* get_metaslab() const { + SNMALLOC_ASSERT(get_remote() != nullptr); return meta; } @@ -226,20 +270,23 @@ namespace snmalloc * only safe use for this is to pass it to the two-argument constructor of * this class. */ - uintptr_t get_remote_and_sizeclass() + [[nodiscard]] SNMALLOC_FAST_PATH uintptr_t get_remote_and_sizeclass() { return remote_and_sizeclass; } - [[nodiscard]] RemoteAllocator* get_remote() const + [[nodiscard]] SNMALLOC_FAST_PATH RemoteAllocator* get_remote() const { return reinterpret_cast( pointer_align_down(remote_and_sizeclass)); } - [[nodiscard]] sizeclass_t get_sizeclass() const + [[nodiscard]] SNMALLOC_FAST_PATH sizeclass_t get_sizeclass() const { - return remote_and_sizeclass & (alignof(RemoteAllocator) - 1); + // TODO: perhaps remove static_cast with resolution of + // https://github.com/CTSRD-CHERI/llvm-project/issues/588 + return static_cast(remote_and_sizeclass) & + (alignof(RemoteAllocator) - 1); } }; diff --git a/src/mem/sizeclasstable.h b/src/mem/sizeclasstable.h index c406ac1f2..ebd091fa6 100644 --- a/src/mem/sizeclasstable.h +++ b/src/mem/sizeclasstable.h @@ -165,6 +165,16 @@ namespace snmalloc return bits::one_at_bit(MIN_CHUNK_BITS + sizeclass); } + /** + * For large allocations, the metaentry stores the raw log_2 of the size, + * which must be shifted into the index space of slab_sizeclass-es. + */ + inline static size_t + metaentry_chunk_sizeclass_to_slab_sizeclass(sizeclass_t sizeclass) + { + return sizeclass - MIN_CHUNK_BITS; + } + inline constexpr static uint16_t sizeclass_to_slab_object_count(sizeclass_t sizeclass) { diff --git a/src/mem/slaballocator.h b/src/mem/slaballocator.h index aa4d47fcf..01b0a704b 100644 --- a/src/mem/slaballocator.h +++ b/src/mem/slaballocator.h @@ -17,9 +17,13 @@ namespace snmalloc */ struct ChunkRecord { + MetaCommon meta_common; std::atomic next; - capptr::Chunk chunk; }; + static_assert(std::is_standard_layout_v); + static_assert( + offsetof(ChunkRecord, meta_common) == 0, + "ChunkRecord and Metaslab must share a common prefix"); /** * How many slab sizes that can be provided. @@ -30,7 +34,7 @@ namespace snmalloc * Used to ensure the per slab meta data is large enough for both use cases. */ static_assert( - sizeof(Metaslab) >= sizeof(ChunkRecord), "We conflat these two types."); + sizeof(Metaslab) >= sizeof(ChunkRecord), "We conflate these two types."); /** * This is the global state required for the chunk allocator. @@ -97,7 +101,7 @@ namespace snmalloc if (chunk_record != nullptr) { - auto slab = chunk_record->chunk; + auto slab = chunk_record->meta_common.chunk; state.memory_in_stacks -= slab_size; auto meta = reinterpret_cast(chunk_record); #ifdef SNMALLOC_TRACING @@ -138,8 +142,8 @@ namespace snmalloc { auto& state = SharedStateHandle::get_slab_allocator_state(&local_state); #ifdef SNMALLOC_TRACING - std::cout << "Return slab:" << p->chunk.unsafe_ptr() << " slab_sizeclass " - << slab_sizeclass << " size " + std::cout << "Return slab:" << p->meta_common.chunk.unsafe_ptr() + << " slab_sizeclass " << slab_sizeclass << " size " << slab_sizeclass_to_size(slab_sizeclass) << " memory in stacks " << state.memory_in_stacks << std::endl; #endif diff --git a/src/pal/pal.h b/src/pal/pal.h index a2ad11556..096df25bf 100644 --- a/src/pal/pal.h +++ b/src/pal/pal.h @@ -69,16 +69,6 @@ namespace snmalloc // Used to keep Superslab metadata committed. static constexpr size_t OS_PAGE_SIZE = Pal::page_size; - /** - * Compute the AddressSpaceControl::User variant of a capptr::bound - * annotation. This is used by the PAL's capptr_export function to compute - * its return value's annotation. - */ - template - using capptr_user_address_control_type = - typename B::template with_address_space_control< - capptr::dimension::AddressSpaceControl::User>; - /** * Perform platform-specific adjustment of return pointers. * @@ -92,10 +82,10 @@ namespace snmalloc SNMALLOC_CONCEPT(capptr::ConceptBound) B> static inline typename std::enable_if_t< !aal_supports, - CapPtr>> + CapPtr>> capptr_to_user_address_control(CapPtr p) { - return CapPtr>(p.unsafe_capptr); + return CapPtr>(p.unsafe_ptr()); } template< @@ -105,7 +95,7 @@ namespace snmalloc SNMALLOC_CONCEPT(capptr::ConceptBound) B> static SNMALLOC_FAST_PATH typename std::enable_if_t< aal_supports, - CapPtr>> + CapPtr>> capptr_to_user_address_control(CapPtr p) { return PAL::capptr_to_user_address_control(p); @@ -128,7 +118,7 @@ namespace snmalloc { static_assert( !page_aligned || B::spatial >= capptr::dimension::Spatial::Chunk); - PAL::template zero(p.unsafe_capptr, sz); + PAL::template zero(p.unsafe_ptr(), sz); } static_assert( diff --git a/src/pal/pal_consts.h b/src/pal/pal_consts.h index 0cb9e81d4..8497e3305 100644 --- a/src/pal/pal_consts.h +++ b/src/pal/pal_consts.h @@ -14,8 +14,13 @@ namespace snmalloc * - using_readonly * - not_using * model. + * + * TODO: There is a known bug in CheriBSD that means round-tripping through + * PROT_NONE sheds capability load and store permissions (while restoring data + * read/write, for added excitement). For the moment, just force this down on + * CHERI. */ -#ifdef SNMALLOC_CHECK_CLIENT +#if defined(SNMALLOC_CHECK_CLIENT) && !defined(__CHERI_PURE_CAPABILITY__) static constexpr bool PalEnforceAccess = true; #else static constexpr bool PalEnforceAccess = false; diff --git a/src/pal/pal_freebsd.h b/src/pal/pal_freebsd.h index 13d74aec2..e17df290d 100644 --- a/src/pal/pal_freebsd.h +++ b/src/pal/pal_freebsd.h @@ -3,6 +3,13 @@ #if defined(__FreeBSD__) && !defined(_KERNEL) # include "pal_bsd_aligned.h" +// On CHERI platforms, we need to know the value of CHERI_PERM_CHERIABI_VMMAP. +// This pollutes the global namespace a little, sadly, but I think only with +// symbols that begin with CHERI_, which is as close to namespaces as C offers. +# if defined(__CHERI_PURE_CAPABILITY__) +# include +# endif + namespace snmalloc { /** @@ -24,6 +31,38 @@ namespace snmalloc * add new features that they should add any required feature flags. */ static constexpr uint64_t pal_features = PALBSD_Aligned::pal_features; + + /** + * FreeBSD uses atypically small address spaces on its 64 bit RISC machines. + * Problematically, these are so small that if we used the default + * address_bits (48), we'd try to allocate the whole AS (or larger!) for the + * Pagemap itself! + */ + static constexpr size_t address_bits = (Aal::bits == 32) ? + Aal::address_bits : + (Aal::aal_name == RISCV ? 38 : Aal::address_bits); + // TODO, if we ever backport to MIPS, this should yield 39 there. + +# if defined(__CHERI_PURE_CAPABILITY__) + static_assert( + aal_supports, + "CHERI purecap support requires StrictProvenance AAL"); + + /** + * On CheriBSD, exporting a pointer means stripping it of the authority to + * manage the address space it references by clearing the CHERIABI_VMMAP + * permission bit. + */ + template + static SNMALLOC_FAST_PATH CapPtr> + capptr_to_user_address_control(CapPtr p) + { + return CapPtr>( + __builtin_cheri_perms_and( + p.unsafe_ptr(), + ~static_cast(CHERI_PERM_CHERIABI_VMMAP))); + } +# endif }; } // namespace snmalloc #endif diff --git a/src/pal/pal_posix.h b/src/pal/pal_posix.h index 55d96a5e2..3f3d16366 100644 --- a/src/pal/pal_posix.h +++ b/src/pal/pal_posix.h @@ -146,7 +146,10 @@ namespace snmalloc static void print_stack_trace() { -#ifdef SNMALLOC_BACKTRACE_HEADER + // TODO: the backtrace mechanism does not yet work on CHERI, and causes + // tests which expect to be able to hook abort() to fail. Skip it until + // https://github.com/CTSRD-CHERI/cheribsd/issues/962 is fixed. +#if defined(SNMALLOC_BACKTRACE_HEADER) && !defined(__CHERI_PURE_CAPABILITY__) constexpr int SIZE = 1024; void* buffer[SIZE]; auto nptrs = backtrace(buffer, SIZE); diff --git a/src/test/func/domestication/domestication.cc b/src/test/func/domestication/domestication.cc index 26bc096d1..a1607de47 100644 --- a/src/test/func/domestication/domestication.cc +++ b/src/test/func/domestication/domestication.cc @@ -69,7 +69,7 @@ namespace snmalloc /* Verify that a pointer points into the region managed by this config */ template - static SNMALLOC_FAST_PATH CapPtr< + static CapPtr< T, typename B::template with_wildness> capptr_domesticate(typename Backend::LocalState*, CapPtr p) @@ -119,6 +119,8 @@ int main() auto alloc1 = new Alloc(); + // Allocate from alloc1; the size doesn't matter a whole lot, it just needs to + // be a small object and so definitely owned by this allocator rather. auto p = alloc1->alloc(48); std::cout << "Allocated p " << p << std::endl; @@ -137,7 +139,9 @@ int main() snmalloc::CustomGlobals::domesticate_trace = true; snmalloc::CustomGlobals::domesticate_count = 0; - auto q = alloc1->alloc(56); + // Open a new slab, so that slow path will pick up the message queue. That + // means this should be a sizeclass we've not used before, even internally. + auto q = alloc1->alloc(512); std::cout << "Allocated q " << q << std::endl; snmalloc::CustomGlobals::domesticate_trace = false; diff --git a/src/test/func/memcpy/func-memcpy.cc b/src/test/func/memcpy/func-memcpy.cc index 1727d3825..202b303d1 100644 --- a/src/test/func/memcpy/func-memcpy.cc +++ b/src/test/func/memcpy/func-memcpy.cc @@ -135,8 +135,13 @@ int main() // Skip the checks that expect bounds checks to fail when we are not the // malloc implementation. # if !defined(SNMALLOC_PASS_THROUGH) - // Some sizes to check for out-of-bounds access - std::initializer_list sizes = {16, 1024, 2 * 1024 * 1024}; + // Some sizes to check for out-of-bounds access. As we are only able to + // catch overflows past the end of the sizeclass-padded allocation, make + // sure we don't try to test on smaller allocations. + std::initializer_list sizes = {MIN_ALLOC_SIZE, 1024, 2 * 1024 * 1024}; + static_assert( + MIN_ALLOC_SIZE < 1024, + "Can't detect overflow except at sizeclass boundaries"); for (auto sz : sizes) { // Check in bounds