NVIDIA · cliffburdick · Jan 4, 2024 · Dec 28, 2023 · Jan 4, 2024
diff --git a/include/matx/core/allocator.h b/include/matx/core/allocator.h
@@ -83,12 +83,143 @@ struct matxPointerAttr_t {
 inline detail::matxMemoryStats_t matxMemoryStats; ///< Statistics object
 inline std::shared_mutex memory_mtx; ///< Mutex protecting updates from map
 
+struct MemTracker {
+  std::unordered_map<void *, detail::matxPointerAttr_t> allocationMap;
+
+  auto size() {
+    return allocationMap.size();
+  }
+
+  auto deallocate(void *ptr) {
+    MATX_NVTX_START("", matx::MATX_NVTX_LOG_INTERNAL)
+
+    std::unique_lock lck(memory_mtx);
+    auto iter = allocationMap.find(ptr);
+
+    if (iter == allocationMap.end()) {
+  #ifdef MATX_DISABLE_MEM_TRACK_CHECK
+      // This error can occur in situations where the user includes MatX in multiple translation units
+      // and a deallocation occurs in a different one than it was allocated. Allow the user to ignore
+      // these cases if they know the issue.
+      MATX_THROW(matxInvalidParameter, "Couldn't find pointer in allocation cache");
+  #endif    
+      return;
+    }
+
+    size_t bytes = iter->second.size;
+    matxMemoryStats.currentBytesAllocated -= bytes;
+
+    switch (iter->second.kind) {
+    case MATX_MANAGED_MEMORY:
+      [[fallthrough]];
+    case MATX_DEVICE_MEMORY:
+      cudaFree(ptr);
+      break;
+    case MATX_HOST_MEMORY:
+      cudaFreeHost(ptr);
+      break;
+    case MATX_HOST_MALLOC_MEMORY:
+      free(ptr);
+      break;
+    case MATX_ASYNC_DEVICE_MEMORY:
+      cudaFreeAsync(ptr, iter->second.stream);
+      break;
+    default:
+      MATX_THROW(matxInvalidType, "Invalid memory type");
+    }
+
+    allocationMap.erase(ptr);    
+  }
+
+  void allocate(void **ptr, size_t bytes,
+                      matxMemorySpace_t space = MATX_MANAGED_MEMORY,
+                      cudaStream_t stream = 0) {
+    [[maybe_unused]] cudaError_t err = cudaSuccess;
+
+    MATX_NVTX_START("", matx::MATX_NVTX_LOG_INTERNAL)
+
+    if (ptr == nullptr) {
+      MATX_THROW(matxInvalidParameter, "nullptr on allocate");
+    }
+
+    *ptr = nullptr;
+
+    switch (space) {
+    case MATX_MANAGED_MEMORY:
+      err = cudaMallocManaged(ptr, bytes);
+      MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
+      break;
+    case MATX_HOST_MEMORY:
+      err = cudaMallocHost(ptr, bytes);
+      MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
+      break;
+    case MATX_HOST_MALLOC_MEMORY:
+      *ptr = malloc(bytes);
+      break;
+    case MATX_DEVICE_MEMORY:
+      err = cudaMalloc(ptr, bytes);
+      MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
+      break;
+    case MATX_ASYNC_DEVICE_MEMORY:
+      err = cudaMallocAsync(ptr, bytes, stream);
+      MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
+      break;
+    case MATX_INVALID_MEMORY:
+      MATX_THROW(matxInvalidType, "Invalid memory kind when allocating!");
+      break;
+    };
+
+    if (*ptr == nullptr) {
+      MATX_THROW(matxOutOfMemory, "Failed to allocate memory");
+    }
+
+    std::unique_lock lck(memory_mtx);
+    matxMemoryStats.currentBytesAllocated += bytes;
+    matxMemoryStats.totalBytesAllocated += bytes;
+    matxMemoryStats.maxBytesAllocated = std::max(
+        matxMemoryStats.maxBytesAllocated, matxMemoryStats.currentBytesAllocated);
+    allocationMap[*ptr] = {bytes, space, stream};
+  }
+
+  bool is_allocated(void *ptr) {
+    if (ptr == nullptr) {
+      return false;
+    }
+
+    std::unique_lock lck(memory_mtx);
+    auto iter = allocationMap.find(ptr);
+
+    return iter != allocationMap.end();    
+  }
+
+  matxMemorySpace_t get_pointer_kind(void *ptr) {
+    if (ptr == nullptr) {
+      return MATX_INVALID_MEMORY;
+    }
+
+    std::unique_lock lck(memory_mtx);
+    auto iter = allocationMap.find(ptr);
+
+    if (iter != allocationMap.end()) {
+      return iter->second.kind;
+    }
+
+    return MATX_INVALID_MEMORY;
+  }
+
+  ~MemTracker() {
+    while (allocationMap.size()) {
+      deallocate(allocationMap.begin()->first);
+    }
+  }
+};
+
 
 
 __attribute__ ((visibility ("default")))
-__MATX_INLINE__ std::unordered_map<void *, detail::matxPointerAttr_t> &GetAllocMap() {
-  static std::unordered_map<void *, detail::matxPointerAttr_t> allocationMap;
-  return allocationMap;
+__MATX_INLINE__ MemTracker &GetAllocMap() {
+  static MemTracker tracker;
+  return tracker;
 }
 
 /**
@@ -140,14 +271,7 @@ inline void matxGetMemoryStats(size_t *current, size_t *total, size_t *max)
  * @return True if allocator
  */
 inline bool IsAllocated(void *ptr) {
-  if (ptr == nullptr) {
-    return false;
-  }
-
-  std::unique_lock lck(memory_mtx);
-  auto iter = GetAllocMap().find(ptr);
-
-  return iter != GetAllocMap().end();
+  return GetAllocMap().is_allocated(ptr);
 }
 
 /**
@@ -164,19 +288,7 @@ inline bool IsAllocated(void *ptr) {
  **/
 inline matxMemorySpace_t GetPointerKind(void *ptr)
 {
-  if (ptr == nullptr) {
-    return MATX_INVALID_MEMORY;
-  }
-
-  std::unique_lock lck(memory_mtx);
-  auto iter = GetAllocMap().find(ptr);
-
-  if (iter != GetAllocMap().end()) {
-    return iter->second.kind;
-  }
-
-
-  return MATX_INVALID_MEMORY;
+  return GetAllocMap().get_pointer_kind(ptr);
 }
 
 /**
@@ -209,90 +321,17 @@ inline void matxAlloc(void **ptr, size_t bytes,
                       matxMemorySpace_t space = MATX_MANAGED_MEMORY,
                       cudaStream_t stream = 0)
 {
-  [[maybe_unused]] cudaError_t err = cudaSuccess;
-
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_INTERNAL)
 
-  switch (space) {
-  case MATX_MANAGED_MEMORY:
-    err = cudaMallocManaged(ptr, bytes);
-    MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
-    break;
-  case MATX_HOST_MEMORY:
-    err = cudaMallocHost(ptr, bytes);
-    MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
-    break;
-  case MATX_HOST_MALLOC_MEMORY:
-    *ptr = malloc(bytes);
-    break;
-  case MATX_DEVICE_MEMORY:
-    err = cudaMalloc(ptr, bytes);
-    MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
-    break;
-  case MATX_ASYNC_DEVICE_MEMORY:
-    err = cudaMallocAsync(ptr, bytes, stream);
-    MATX_ASSERT(err == cudaSuccess, matxOutOfMemory);
-    break;
-  case MATX_INVALID_MEMORY:
-    MATX_THROW(matxInvalidType, "Invalid memory kind when allocating!");
-    break;
-  };
-
-  MATX_ASSERT(ptr != nullptr, matxOutOfMemory);
-
-  std::unique_lock lck(memory_mtx);
-  matxMemoryStats.currentBytesAllocated += bytes;
-  matxMemoryStats.totalBytesAllocated += bytes;
-  matxMemoryStats.maxBytesAllocated = std::max(
-      matxMemoryStats.maxBytesAllocated, matxMemoryStats.currentBytesAllocated);
-  GetAllocMap()[*ptr] = {bytes, space, stream};
+  return GetAllocMap().allocate(ptr, bytes, space, stream);
 }
 
 
 inline void matxFree(void *ptr)
 {
   MATX_NVTX_START("", matx::MATX_NVTX_LOG_INTERNAL)
 
-  if (ptr == nullptr) {
-    return;
-  }
-
-  std::unique_lock lck(memory_mtx);
-  auto iter = GetAllocMap().find(ptr);
-
-  if (iter == GetAllocMap().end()) {
-#ifdef MATX_DISABLE_MEM_TRACK_CHECK
-    // This error can occur in situations where the user includes MatX in multiple translation units
-    // and a deallocation occurs in a different one than it was allocated. Allow the user to ignore
-    // these cases if they know the issue.
-    MATX_THROW(matxInvalidParameter, "Couldn't find pointer in allocation cache");
-#endif    
-    return;
-  }
-
-  size_t bytes = iter->second.size;
-  matxMemoryStats.currentBytesAllocated -= bytes;
-
-  switch (iter->second.kind) {
-  case MATX_MANAGED_MEMORY:
-    [[fallthrough]];
-  case MATX_DEVICE_MEMORY:
-    cudaFree(ptr);
-    break;
-  case MATX_HOST_MEMORY:
-    cudaFreeHost(ptr);
-    break;
-  case MATX_HOST_MALLOC_MEMORY:
-    free(ptr);
-    break;
-  case MATX_ASYNC_DEVICE_MEMORY:
-    cudaFreeAsync(ptr, iter->second.stream);
-    break;
-  default:
-    MATX_THROW(matxInvalidType, "Invalid memory type");
-  }
-
-  GetAllocMap().erase(iter);
+  return GetAllocMap().deallocate(ptr);
 }
 
 
@@ -330,4 +369,6 @@ struct matx_allocator {
   }  
 };
 
+
+
 } // end namespace matx