Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Core] Improve CowData and Memory metadata alignment. #1383

Merged
merged 1 commit into from
Feb 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 34 additions & 12 deletions include/godot_cpp/core/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,6 @@

#include <type_traits>

#ifndef PAD_ALIGN
#define PAD_ALIGN 16 //must always be greater than this at much
#endif

// p_dummy argument is added to avoid conflicts with the engine functions when both engine and GDExtension are built as a static library on iOS.
void *operator new(size_t p_size, const char *p_dummy, const char *p_description); ///< operator new that takes a description and uses MemoryStaticPool
void *operator new(size_t p_size, const char *p_dummy, void *(*p_allocfunc)(size_t p_size)); ///< operator new that takes a description and uses MemoryStaticPool
Expand All @@ -69,6 +65,18 @@ class Memory {
Memory();

public:
// Alignment: ↓ max_align_t ↓ uint64_t ↓ max_align_t
// ┌─────────────────┬──┬────────────────┬──┬───────────...
// │ uint64_t │░░│ uint64_t │░░│ T[]
// │ alloc size │░░│ element count │░░│ data
// └─────────────────┴──┴────────────────┴──┴───────────...
// Offset: ↑ SIZE_OFFSET ↑ ELEMENT_OFFSET ↑ DATA_OFFSET
// Note: "alloc size" is used and set by the engine and is never accessed or changed for the extension.

static constexpr size_t SIZE_OFFSET = 0;
static constexpr size_t ELEMENT_OFFSET = ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t) == 0) ? (SIZE_OFFSET + sizeof(uint64_t)) : ((SIZE_OFFSET + sizeof(uint64_t)) + alignof(uint64_t) - ((SIZE_OFFSET + sizeof(uint64_t)) % alignof(uint64_t)));
static constexpr size_t DATA_OFFSET = ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t) == 0) ? (ELEMENT_OFFSET + sizeof(uint64_t)) : ((ELEMENT_OFFSET + sizeof(uint64_t)) + alignof(max_align_t) - ((ELEMENT_OFFSET + sizeof(uint64_t)) % alignof(max_align_t)));

static void *alloc_static(size_t p_bytes, bool p_pad_align = false);
static void *realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align = false);
static void free_static(void *p_ptr, bool p_pad_align = false);
Expand Down Expand Up @@ -99,7 +107,7 @@ struct Comparator {

template <class T>
void memdelete(T *p_class, typename std::enable_if<!std::is_base_of_v<godot::Wrapped, T>>::type * = nullptr) {
if (!std::is_trivially_destructible<T>::value) {
if constexpr (!std::is_trivially_destructible_v<T>) {
p_class->~T();
}

Expand All @@ -113,7 +121,7 @@ void memdelete(T *p_class) {

template <class T, class A>
void memdelete_allocator(T *p_class) {
if (!std::is_trivially_destructible<T>::value) {
if constexpr (!std::is_trivially_destructible_v<T>) {
p_class->~T();
}

Expand All @@ -136,6 +144,10 @@ class DefaultTypedAllocator {

#define memnew_arr(m_class, m_count) memnew_arr_template<m_class>(m_count)

_FORCE_INLINE_ uint64_t *_get_element_count_ptr(uint8_t *p_ptr) {
return (uint64_t *)(p_ptr - Memory::DATA_OFFSET + Memory::ELEMENT_OFFSET);
}

template <typename T>
T *memnew_arr_template(size_t p_elements, const char *p_descr = "") {
if (p_elements == 0) {
Expand All @@ -145,12 +157,14 @@ T *memnew_arr_template(size_t p_elements, const char *p_descr = "") {
same strategy used by std::vector, and the Vector class, so it should be safe.*/

size_t len = sizeof(T) * p_elements;
uint64_t *mem = (uint64_t *)Memory::alloc_static(len, true);
uint8_t *mem = (uint8_t *)Memory::alloc_static(len, true);
T *failptr = nullptr; // Get rid of a warning.
ERR_FAIL_NULL_V(mem, failptr);
*(mem - 1) = p_elements;

if (!std::is_trivially_destructible<T>::value) {
uint64_t *_elem_count_ptr = _get_element_count_ptr(mem);
*(_elem_count_ptr) = p_elements;

if constexpr (!std::is_trivially_destructible_v<T>) {
T *elems = (T *)mem;

/* call operator new */
Expand All @@ -162,12 +176,20 @@ T *memnew_arr_template(size_t p_elements, const char *p_descr = "") {
return (T *)mem;
}

template <typename T>
size_t memarr_len(const T *p_class) {
uint8_t *ptr = (uint8_t *)p_class;
uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr);
return *(_elem_count_ptr);
}

template <typename T>
void memdelete_arr(T *p_class) {
uint64_t *ptr = (uint64_t *)p_class;
uint8_t *ptr = (uint8_t *)p_class;

if (!std::is_trivially_destructible<T>::value) {
uint64_t elem_count = *(ptr - 1);
if constexpr (!std::is_trivially_destructible_v<T>) {
uint64_t *_elem_count_ptr = _get_element_count_ptr(ptr);
uint64_t elem_count = *(_elem_count_ptr);

for (uint64_t i = 0; i < elem_count; i++) {
p_class[i].~T();
Expand Down
99 changes: 65 additions & 34 deletions include/godot_cpp/templates/cowdata.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class VMap;
template <class T>
class CharStringT;

SAFE_NUMERIC_TYPE_PUN_GUARANTEES(uint64_t)
static_assert(std::is_trivially_destructible_v<std::atomic<uint64_t>>);

// Silence a false positive warning (see GH-52119).
#if defined(__GNUC__) && !defined(__clang__)
Expand Down Expand Up @@ -96,26 +96,47 @@ class CowData {
return ++x;
}

static constexpr USize ALLOC_PAD = sizeof(USize) * 2; // For size and atomic refcount.
// Alignment: ↓ max_align_t ↓ USize ↓ max_align_t
// ┌────────────────────┬──┬─────────────┬──┬───────────...
// │ SafeNumeric<USize> │░░│ USize │░░│ T[]
// │ ref. count │░░│ data size │░░│ data
// └────────────────────┴──┴─────────────┴──┴───────────...
// Offset: ↑ REF_COUNT_OFFSET ↑ SIZE_OFFSET ↑ DATA_OFFSET

static constexpr size_t REF_COUNT_OFFSET = 0;
static constexpr size_t SIZE_OFFSET = ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) % alignof(USize) == 0) ? (REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) : ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) + alignof(USize) - ((REF_COUNT_OFFSET + sizeof(SafeNumeric<USize>)) % alignof(USize)));
static constexpr size_t DATA_OFFSET = ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t) == 0) ? (SIZE_OFFSET + sizeof(USize)) : ((SIZE_OFFSET + sizeof(USize)) + alignof(max_align_t) - ((SIZE_OFFSET + sizeof(USize)) % alignof(max_align_t)));

mutable T *_ptr = nullptr;

// internal helpers

static _FORCE_INLINE_ SafeNumeric<USize> *_get_refcount_ptr(uint8_t *p_ptr) {
return (SafeNumeric<USize> *)(p_ptr + REF_COUNT_OFFSET);
}

static _FORCE_INLINE_ USize *_get_size_ptr(uint8_t *p_ptr) {
return (USize *)(p_ptr + SIZE_OFFSET);
}

static _FORCE_INLINE_ T *_get_data_ptr(uint8_t *p_ptr) {
return (T *)(p_ptr + DATA_OFFSET);
}

_FORCE_INLINE_ SafeNumeric<USize> *_get_refcount() const {
if (!_ptr) {
return nullptr;
}

return reinterpret_cast<SafeNumeric<USize> *>(_ptr) - 2;
return (SafeNumeric<USize> *)((uint8_t *)_ptr - DATA_OFFSET + REF_COUNT_OFFSET);
}

_FORCE_INLINE_ USize *_get_size() const {
if (!_ptr) {
return nullptr;
}

return reinterpret_cast<USize *>(_ptr) - 1;
return (USize *)((uint8_t *)_ptr - DATA_OFFSET + SIZE_OFFSET);
}

_FORCE_INLINE_ USize _get_alloc_size(USize p_elements) const {
Expand Down Expand Up @@ -240,7 +261,7 @@ void CowData<T>::_unref(void *p_data) {
}
// clean up

if (!std::is_trivially_destructible<T>::value) {
if constexpr (!std::is_trivially_destructible_v<T>) {
USize *count = _get_size();
T *data = (T *)(count + 1);

Expand All @@ -251,7 +272,7 @@ void CowData<T>::_unref(void *p_data) {
}

// free mem
Memory::free_static(((uint8_t *)p_data) - ALLOC_PAD, false);
Memory::free_static(((uint8_t *)p_data) - DATA_OFFSET, false);
}

template <class T>
Expand All @@ -267,26 +288,27 @@ typename CowData<T>::USize CowData<T>::_copy_on_write() {
/* in use by more than me */
USize current_size = *_get_size();

USize *mem_new = (USize *)Memory::alloc_static(_get_alloc_size(current_size) + ALLOC_PAD, false);
mem_new += 2;
uint8_t *mem_new = (uint8_t *)Memory::alloc_static(_get_alloc_size(current_size) + DATA_OFFSET, false);
ERR_FAIL_NULL_V(mem_new, 0);

new (mem_new - 2) SafeNumeric<USize>(1); //refcount
*(mem_new - 1) = current_size; //size
SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
USize *_size_ptr = _get_size_ptr(mem_new);
T *_data_ptr = _get_data_ptr(mem_new);

T *_data = (T *)(mem_new);
new (_refc_ptr) SafeNumeric<USize>(1); //refcount
*(_size_ptr) = current_size; //size

// initialize new elements
if (std::is_trivially_copyable<T>::value) {
memcpy(mem_new, _ptr, current_size * sizeof(T));

if constexpr (std::is_trivially_copyable_v<T>) {
memcpy((uint8_t *)_data_ptr, _ptr, current_size * sizeof(T));
} else {
for (USize i = 0; i < current_size; i++) {
memnew_placement(&_data[i], T(_ptr[i]));
memnew_placement(&_data_ptr[i], T(_ptr[i]));
}
}

_unref(_ptr);
_ptr = _data;
_ptr = _data_ptr;

rc = 1;
}
Expand Down Expand Up @@ -322,27 +344,33 @@ Error CowData<T>::resize(Size p_size) {
if (alloc_size != current_alloc_size) {
if (current_size == 0) {
// alloc from scratch
USize *ptr = (USize *)Memory::alloc_static(alloc_size + ALLOC_PAD, false);
ptr += 2;
ERR_FAIL_NULL_V(ptr, ERR_OUT_OF_MEMORY);
*(ptr - 1) = 0; //size, currently none
new (ptr - 2) SafeNumeric<USize>(1); //refcount
uint8_t *mem_new = (uint8_t *)Memory::alloc_static(alloc_size + DATA_OFFSET, false);
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);

SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
USize *_size_ptr = _get_size_ptr(mem_new);
T *_data_ptr = _get_data_ptr(mem_new);

_ptr = (T *)ptr;
new (_refc_ptr) SafeNumeric<USize>(1); //refcount
*(_size_ptr) = 0; //size, currently none

_ptr = _data_ptr;
} else {
USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false);
ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY);
_ptrnew += 2;
new (_ptrnew - 2) SafeNumeric<USize>(rc); //refcount
uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false);
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);

SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
T *_data_ptr = _get_data_ptr(mem_new);

_ptr = (T *)(_ptrnew);
new (_refc_ptr) SafeNumeric<USize>(rc); //refcount

_ptr = _data_ptr;
}
}

// construct the newly created elements

if (!std::is_trivially_constructible<T>::value) {
if constexpr (!std::is_trivially_constructible_v<T>) {
for (Size i = *_get_size(); i < p_size; i++) {
memnew_placement(&_ptr[i], T);
}
Expand All @@ -353,7 +381,7 @@ Error CowData<T>::resize(Size p_size) {
*_get_size() = p_size;

} else if (p_size < current_size) {
if (!std::is_trivially_destructible<T>::value) {
if constexpr (!std::is_trivially_destructible_v<T>) {
// deinitialize no longer needed elements
for (USize i = p_size; i < *_get_size(); i++) {
T *t = &_ptr[i];
Expand All @@ -362,12 +390,15 @@ Error CowData<T>::resize(Size p_size) {
}

if (alloc_size != current_alloc_size) {
USize *_ptrnew = (USize *)Memory::realloc_static(((uint8_t *)_ptr) - ALLOC_PAD, alloc_size + ALLOC_PAD, false);
ERR_FAIL_NULL_V(_ptrnew, ERR_OUT_OF_MEMORY);
_ptrnew += 2;
new (_ptrnew - 2) SafeNumeric<USize>(rc); //refcount
uint8_t *mem_new = (uint8_t *)Memory::realloc_static(((uint8_t *)_ptr) - DATA_OFFSET, alloc_size + DATA_OFFSET, false);
ERR_FAIL_NULL_V(mem_new, ERR_OUT_OF_MEMORY);

SafeNumeric<USize> *_refc_ptr = _get_refcount_ptr(mem_new);
T *_data_ptr = _get_data_ptr(mem_new);

new (_refc_ptr) SafeNumeric<USize>(rc); //refcount

_ptr = (T *)(_ptrnew);
_ptr = _data_ptr;
}

*_get_size() = p_size;
Expand Down
12 changes: 6 additions & 6 deletions src/core/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ void *Memory::alloc_static(size_t p_bytes, bool p_pad_align) {
bool prepad = p_pad_align;
#endif

void *mem = internal::gdextension_interface_mem_alloc(p_bytes + (prepad ? PAD_ALIGN : 0));
void *mem = internal::gdextension_interface_mem_alloc(p_bytes + (prepad ? DATA_OFFSET : 0));
ERR_FAIL_NULL_V(mem, nullptr);

if (prepad) {
uint8_t *s8 = (uint8_t *)mem;
return s8 + PAD_ALIGN;
return s8 + DATA_OFFSET;
} else {
return mem;
}
Expand All @@ -69,10 +69,10 @@ void *Memory::realloc_static(void *p_memory, size_t p_bytes, bool p_pad_align) {
#endif

if (prepad) {
mem -= PAD_ALIGN;
mem = (uint8_t *)internal::gdextension_interface_mem_realloc(mem, p_bytes + PAD_ALIGN);
mem -= DATA_OFFSET;
mem = (uint8_t *)internal::gdextension_interface_mem_realloc(mem, p_bytes + DATA_OFFSET);
ERR_FAIL_NULL_V(mem, nullptr);
return mem + PAD_ALIGN;
return mem + DATA_OFFSET;
} else {
return (uint8_t *)internal::gdextension_interface_mem_realloc(mem, p_bytes);
}
Expand All @@ -88,7 +88,7 @@ void Memory::free_static(void *p_ptr, bool p_pad_align) {
#endif

if (prepad) {
mem -= PAD_ALIGN;
mem -= DATA_OFFSET;
}
internal::gdextension_interface_mem_free(mem);
}
Expand Down
Loading