add hard limit

rapidsai · Sep 10, 2021 · 989a43b · 989a43b
1 parent 75ac47b
commit 989a43b
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 14 deletions.
diff --git a/java/src/main/java/ai/rapids/cudf/Rmm.java b/java/src/main/java/ai/rapids/cudf/Rmm.java
@@ -213,18 +213,26 @@ public static synchronized void initialize(int allocationMode, LogConf logConf,
     if (initialized) {
       throw new IllegalStateException("RMM is already initialized");
     }
+
+    boolean isPool = (allocationMode & RmmAllocationMode.POOL) != 0;
+    boolean isArena = (allocationMode & RmmAllocationMode.ARENA) != 0;
+    boolean isAsync = (allocationMode & RmmAllocationMode.CUDA_ASYNC) != 0;
+    boolean isManaged = (allocationMode & RmmAllocationMode.CUDA_MANAGED_MEMORY) != 0;
+
     if (maxPoolSize > 0) {
-      if ((allocationMode & RmmAllocationMode.POOL) == 0 &&
-          (allocationMode & RmmAllocationMode.ARENA) == 0 &&
-          (allocationMode & RmmAllocationMode.CUDA_ASYNC) == 0) {
+      if (!isPool && !isArena && !isAsync) {
         throw new IllegalArgumentException(
-            "Pool limit only supported in POOL or ARENA or CUDA_ASYNC allocation mode");
+            "Pool limit only supported in POOL, ARENA, or CUDA_ASYNC allocation mode");
       }
       if (maxPoolSize < poolSize) {
         throw new IllegalArgumentException("Pool limit of " + maxPoolSize
             + " is less than initial pool size of " + poolSize);
       }
     }
+    if (isAsync && isManaged) {
+      throw new IllegalArgumentException(
+          "CUDA Unified Memory is not supported in CUDA_ASYNC allocation mode");
+    }
     LogLoc loc = LogLoc.NONE;
     String path = null;
     if (logConf != null) {

diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp
@@ -24,6 +24,7 @@
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/cuda_async_memory_resource.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
+#include <rmm/mr/device/limiting_resource_adaptor.hpp>
 #include <rmm/mr/device/logging_resource_adaptor.hpp>
 #include <rmm/mr/device/managed_memory_resource.hpp>
 #include <rmm/mr/device/owning_wrapper.hpp>
@@ -368,10 +369,16 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(
             std::make_shared<rmm::mr::cuda_memory_resource>(), pool_size, pool_limit);
       }
     } else if (use_cuda_async_alloc) {
-      auto pool_limit = (max_pool_size > 0) ? thrust::optional<std::size_t>{max_pool_size} :
-                                              thrust::optional<std::size_t>{};
-      Initialized_resource =
-          std::make_shared<rmm::mr::cuda_async_memory_resource>(pool_size, pool_limit);
+      auto const pool_limit = max_pool_size > 0 ? static_cast<std::size_t>(max_pool_size) :
+                                                  std::numeric_limits<std::size_t>::max();
+      auto const release_threshold = max_pool_size > 0 ?
+                                         thrust::optional<std::size_t>{max_pool_size} :
+                                         thrust::optional<std::size_t>{};
+      // Use `limiting_resource_adaptor` to set a hard limit on the max pool size since
+      // `cuda_async_memory_resource` only has a release threshold.
+      Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::limiting_resource_adaptor>(
+          std::make_shared<rmm::mr::cuda_async_memory_resource>(pool_size, release_threshold),
+          pool_limit);
     } else if (use_managed_mem) {
       Initialized_resource = std::make_shared<rmm::mr::managed_memory_resource>();
     } else {

diff --git a/java/src/test/java/ai/rapids/cudf/RmmTest.java b/java/src/test/java/ai/rapids/cudf/RmmTest.java
@@ -410,17 +410,30 @@ public void testPoolLimitNonPoolMode() {
   }
 
   @Test
-  public void testCudaAsyncMemoryResource() {
+  public void testCudaAsyncMemoryResourceLimit() {
     try {
-      Rmm.initialize(RmmAllocationMode.CUDA_ASYNC, false, 1024 * 1024L, 1024 * 1024L);
+      Rmm.initialize(RmmAllocationMode.CUDA_ASYNC, false, 1024, 2048);
     } catch (CudfException e) {
+      // CUDA 11.2 introduced cudaMallocAsync, older CUDA Toolkit will skip this test.
       assumeFalse(e.getMessage().contains("cudaMallocAsync not supported"));
       throw e;
     }
-    DeviceMemoryBuffer buff = Rmm.alloc(1024);
-    buff.close();
-    buff = Rmm.alloc(2048);
-    buff.close();
+    try (DeviceMemoryBuffer ignored1 = Rmm.alloc(512);
+         DeviceMemoryBuffer ignored2 = Rmm.alloc(1024)) {
+      assertThrows(OutOfMemoryError.class,
+          () -> {
+            DeviceMemoryBuffer ignored3 = Rmm.alloc(1024);
+            ignored3.close();
+          });
+    }
+  }
+
+  @Test
+  public void testCudaAsyncIsIncompatibleWithManaged() {
+    assertThrows(IllegalArgumentException.class,
+        () -> Rmm.initialize(
+            RmmAllocationMode.CUDA_ASYNC | RmmAllocationMode.CUDA_MANAGED_MEMORY,
+            false, 1024, 2048));
   }
 
   private static class AllocFailException extends RuntimeException {