Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update caching for transforms to fixing all leaks reported by compute-sanitizer #542

Merged
merged 2 commits into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 140 additions & 99 deletions include/matx/core/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,143 @@ struct matxPointerAttr_t {
inline detail::matxMemoryStats_t matxMemoryStats; ///< Statistics object
inline std::shared_mutex memory_mtx; ///< Mutex protecting updates from map

struct MemTracker {
std::unordered_map<void *, detail::matxPointerAttr_t> allocationMap;

auto size() {
return allocationMap.size();
}

auto deallocate(void *ptr) {
MATX_NVTX_START("", matx::MATX_NVTX_LOG_INTERNAL)

std::unique_lock lck(memory_mtx);
auto iter = allocationMap.find(ptr);

if (iter == allocationMap.end()) {
#ifdef MATX_DISABLE_MEM_TRACK_CHECK
// This error can occur in situations where the user includes MatX in multiple translation units
// and a deallocation occurs in a different one than it was allocated. Allow the user to ignore
// these cases if they know the issue.
MATX_THROW(matxInvalidParameter, "Couldn't find pointer in allocation cache");
#endif
return;
}

size_t bytes = iter->second.size;
matxMemoryStats.currentBytesAllocated -= bytes;

switch (iter->second.kind) {
case MATX_MANAGED_MEMORY:
[[fallthrough]];
case MATX_DEVICE_MEMORY:
cudaFree(ptr);
break;
case MATX_HOST_MEMORY:
cudaFreeHost(ptr);
break;
case MATX_HOST_MALLOC_MEMORY:
free(ptr);
break;
case MATX_ASYNC_DEVICE_MEMORY:
cudaFreeAsync(ptr, iter->second.stream);
break;
default:
MATX_THROW(matxInvalidType, "Invalid memory type");
}

allocationMap.erase(ptr);
}

void allocate(void **ptr, size_t bytes,
matxMemorySpace_t space = MATX_MANAGED_MEMORY,
cudaStream_t stream = 0) {
[[maybe_unused]] cudaError_t err = cudaSuccess;

MATX_NVTX_START("", matx::MATX_NVTX_LOG_INTERNAL)

if (ptr == nullptr) {
MATX_THROW(matxInvalidParameter, "nullptr on allocate");
}

*ptr = nullptr;

switch (space) {
case MATX_MANAGED_MEMORY:
err = cudaMallocManaged(ptr, bytes);
MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
break;
case MATX_HOST_MEMORY:
err = cudaMallocHost(ptr, bytes);
MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
break;
case MATX_HOST_MALLOC_MEMORY:
*ptr = malloc(bytes);
break;
case MATX_DEVICE_MEMORY:
err = cudaMalloc(ptr, bytes);
MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
break;
case MATX_ASYNC_DEVICE_MEMORY:
err = cudaMallocAsync(ptr, bytes, stream);
MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
break;
case MATX_INVALID_MEMORY:
MATX_THROW(matxInvalidType, "Invalid memory kind when allocating!");
break;
};

if (*ptr == nullptr) {
MATX_THROW(matxOutOfMemory, "Failed to allocate memory");
}

std::unique_lock lck(memory_mtx);
matxMemoryStats.currentBytesAllocated += bytes;
matxMemoryStats.totalBytesAllocated += bytes;
matxMemoryStats.maxBytesAllocated = std::max(
matxMemoryStats.maxBytesAllocated, matxMemoryStats.currentBytesAllocated);
allocationMap[*ptr] = {bytes, space, stream};
}

bool is_allocated(void *ptr) {
if (ptr == nullptr) {
return false;
}

std::unique_lock lck(memory_mtx);
auto iter = allocationMap.find(ptr);

return iter != allocationMap.end();
}

matxMemorySpace_t get_pointer_kind(void *ptr) {
if (ptr == nullptr) {
return MATX_INVALID_MEMORY;
}

std::unique_lock lck(memory_mtx);
auto iter = allocationMap.find(ptr);

if (iter != allocationMap.end()) {
return iter->second.kind;
}

return MATX_INVALID_MEMORY;
}

~MemTracker() {
while (allocationMap.size()) {
deallocate(allocationMap.begin()->first);
}
}
};



__attribute__ ((visibility ("default")))
__MATX_INLINE__ std::unordered_map<void *, detail::matxPointerAttr_t> &GetAllocMap() {
static std::unordered_map<void *, detail::matxPointerAttr_t> allocationMap;
return allocationMap;
__MATX_INLINE__ MemTracker &GetAllocMap() {
static MemTracker tracker;
return tracker;
}

/**
Expand Down Expand Up @@ -140,14 +271,7 @@ inline void matxGetMemoryStats(size_t *current, size_t *total, size_t *max)
* @return True if allocator
*/
inline bool IsAllocated(void *ptr) {
if (ptr == nullptr) {
return false;
}

std::unique_lock lck(memory_mtx);
auto iter = GetAllocMap().find(ptr);

return iter != GetAllocMap().end();
return GetAllocMap().is_allocated(ptr);
}

/**
Expand All @@ -164,19 +288,7 @@ inline bool IsAllocated(void *ptr) {
**/
inline matxMemorySpace_t GetPointerKind(void *ptr)
{
if (ptr == nullptr) {
return MATX_INVALID_MEMORY;
}

std::unique_lock lck(memory_mtx);
auto iter = GetAllocMap().find(ptr);

if (iter != GetAllocMap().end()) {
return iter->second.kind;
}


return MATX_INVALID_MEMORY;
return GetAllocMap().get_pointer_kind(ptr);
}

/**
Expand Down Expand Up @@ -209,90 +321,17 @@ inline void matxAlloc(void **ptr, size_t bytes,
matxMemorySpace_t space = MATX_MANAGED_MEMORY,
cudaStream_t stream = 0)
{
[[maybe_unused]] cudaError_t err = cudaSuccess;

MATX_NVTX_START("", matx::MATX_NVTX_LOG_INTERNAL)

switch (space) {
case MATX_MANAGED_MEMORY:
err = cudaMallocManaged(ptr, bytes);
MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
break;
case MATX_HOST_MEMORY:
err = cudaMallocHost(ptr, bytes);
MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
break;
case MATX_HOST_MALLOC_MEMORY:
*ptr = malloc(bytes);
break;
case MATX_DEVICE_MEMORY:
err = cudaMalloc(ptr, bytes);
MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
break;
case MATX_ASYNC_DEVICE_MEMORY:
err = cudaMallocAsync(ptr, bytes, stream);
MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
break;
case MATX_INVALID_MEMORY:
MATX_THROW(matxInvalidType, "Invalid memory kind when allocating!");
break;
};

MATX_ASSERT(ptr != nullptr, matxOutOfMemory);

std::unique_lock lck(memory_mtx);
matxMemoryStats.currentBytesAllocated += bytes;
matxMemoryStats.totalBytesAllocated += bytes;
matxMemoryStats.maxBytesAllocated = std::max(
matxMemoryStats.maxBytesAllocated, matxMemoryStats.currentBytesAllocated);
GetAllocMap()[*ptr] = {bytes, space, stream};
return GetAllocMap().allocate(ptr, bytes, space, stream);
}


inline void matxFree(void *ptr)
{
MATX_NVTX_START("", matx::MATX_NVTX_LOG_INTERNAL)

if (ptr == nullptr) {
return;
}

std::unique_lock lck(memory_mtx);
auto iter = GetAllocMap().find(ptr);

if (iter == GetAllocMap().end()) {
#ifdef MATX_DISABLE_MEM_TRACK_CHECK
// This error can occur in situations where the user includes MatX in multiple translation units
// and a deallocation occurs in a different one than it was allocated. Allow the user to ignore
// these cases if they know the issue.
MATX_THROW(matxInvalidParameter, "Couldn't find pointer in allocation cache");
#endif
return;
}

size_t bytes = iter->second.size;
matxMemoryStats.currentBytesAllocated -= bytes;

switch (iter->second.kind) {
case MATX_MANAGED_MEMORY:
[[fallthrough]];
case MATX_DEVICE_MEMORY:
cudaFree(ptr);
break;
case MATX_HOST_MEMORY:
cudaFreeHost(ptr);
break;
case MATX_HOST_MALLOC_MEMORY:
free(ptr);
break;
case MATX_ASYNC_DEVICE_MEMORY:
cudaFreeAsync(ptr, iter->second.stream);
break;
default:
MATX_THROW(matxInvalidType, "Invalid memory type");
}

GetAllocMap().erase(iter);
return GetAllocMap().deallocate(ptr);
}


Expand Down Expand Up @@ -330,4 +369,6 @@ struct matx_allocator {
}
};



} // end namespace matx
Loading