diff --git a/java/src/main/java/ai/rapids/cudf/Rmm.java b/java/src/main/java/ai/rapids/cudf/Rmm.java index 8a8bafb3147..fa77fb08cab 100755 --- a/java/src/main/java/ai/rapids/cudf/Rmm.java +++ b/java/src/main/java/ai/rapids/cudf/Rmm.java @@ -88,68 +88,37 @@ public static LogConf logToStderr() { * {@link RmmAllocationMode#ARENA}, * {@link RmmAllocationMode#CUDA_ASYNC} and * {@link RmmAllocationMode#CUDA_MANAGED_MEMORY} - * @param enableLogging Enable logging memory manager events + * @param logConf How to do logging or null if you don't want to * @param poolSize The initial pool size in bytes * @throws IllegalStateException if RMM has already been initialized */ - public static void initialize(int allocationMode, boolean enableLogging, long poolSize) + public static synchronized void initialize(int allocationMode, LogConf logConf, long poolSize) throws RmmException { - initialize(allocationMode, enableLogging, poolSize, 0); - } + if (initialized) { + throw new IllegalStateException("RMM is already initialized"); + } - /** - * Initialize memory manager state and storage. This will always initialize - * the CUDA context for the calling thread if it is not already set. The - * caller is responsible for setting the desired CUDA device prior to this - * call if a specific device is already set. - *

NOTE: All cudf methods will set the chosen CUDA device in the CUDA - * context of the calling thread after this returns. - * @param allocationMode Allocation strategy to use. Bit set using - * {@link RmmAllocationMode#CUDA_DEFAULT}, - * {@link RmmAllocationMode#POOL}, - * {@link RmmAllocationMode#ARENA}, - * {@link RmmAllocationMode#CUDA_ASYNC} and - * {@link RmmAllocationMode#CUDA_MANAGED_MEMORY} - * @param enableLogging Enable logging memory manager events - * @param poolSize The initial pool size in bytes - * @param maxPoolSize The maximum size the pool is allowed to grow. If the specified value - * is <= 0 then the maximum pool size will not be artificially limited. - * @throws IllegalStateException if RMM has already been initialized - */ - public static void initialize(int allocationMode, boolean enableLogging, long poolSize, - long maxPoolSize) throws RmmException { - LogConf lc = null; - if (enableLogging) { - String f = System.getenv("RMM_LOG_FILE"); - if (f != null) { - lc = logTo(new File(f)); - } else { - lc = logToStderr(); + boolean isPool = (allocationMode & RmmAllocationMode.POOL) != 0; + boolean isArena = (allocationMode & RmmAllocationMode.ARENA) != 0; + boolean isAsync = (allocationMode & RmmAllocationMode.CUDA_ASYNC) != 0; + boolean isManaged = (allocationMode & RmmAllocationMode.CUDA_MANAGED_MEMORY) != 0; + + if (isAsync && isManaged) { + throw new IllegalArgumentException( + "CUDA Unified Memory is not supported in CUDA_ASYNC allocation mode"); + } + LogLoc loc = LogLoc.NONE; + String path = null; + if (logConf != null) { + if (logConf.file != null) { + path = logConf.file.getAbsolutePath(); } + loc = logConf.loc; } - initialize(allocationMode, lc, poolSize, maxPoolSize); - } - /** - * Initialize memory manager state and storage. This will always initialize - * the CUDA context for the calling thread if it is not already set. The - * caller is responsible for setting the desired CUDA device prior to this - * call if a specific device is already set. - *

NOTE: All cudf methods will set the chosen CUDA device in the CUDA - * context of the calling thread after this returns. - * @param allocationMode Allocation strategy to use. Bit set using - * {@link RmmAllocationMode#CUDA_DEFAULT}, - * {@link RmmAllocationMode#POOL}, - * {@link RmmAllocationMode#ARENA}, - * {@link RmmAllocationMode#CUDA_ASYNC} and - * {@link RmmAllocationMode#CUDA_MANAGED_MEMORY} - * @param logConf How to do logging or null if you don't want to - * @param poolSize The initial pool size in bytes - * @throws IllegalStateException if RMM has already been initialized - */ - public static synchronized void initialize(int allocationMode, LogConf logConf, long poolSize) - throws RmmException { - initialize(allocationMode, logConf, poolSize, 0); + initializeInternal(allocationMode, loc.internalId, path, poolSize); + MemoryCleaner.setDefaultGpu(Cuda.getDevice()); + initialized = true; } /** @@ -175,44 +144,11 @@ public static synchronized void initialize(int allocationMode, LogConf logConf, * {@link RmmAllocationMode#ARENA} or * {@link RmmAllocationMode#CUDA_ASYNC}, or the maximum pool * size is below the initial size. + * @deprecated Use the version without the maxPoolSize parameter instead. */ public static synchronized void initialize(int allocationMode, LogConf logConf, long poolSize, long maxPoolSize) throws RmmException { - if (initialized) { - throw new IllegalStateException("RMM is already initialized"); - } - - boolean isPool = (allocationMode & RmmAllocationMode.POOL) != 0; - boolean isArena = (allocationMode & RmmAllocationMode.ARENA) != 0; - boolean isAsync = (allocationMode & RmmAllocationMode.CUDA_ASYNC) != 0; - boolean isManaged = (allocationMode & RmmAllocationMode.CUDA_MANAGED_MEMORY) != 0; - - if (maxPoolSize > 0) { - if (!isPool && !isArena && !isAsync) { - throw new IllegalArgumentException( - "Pool limit only supported in POOL, ARENA, or CUDA_ASYNC allocation mode"); - } - if (maxPoolSize < poolSize) { - throw new IllegalArgumentException("Pool limit of " + maxPoolSize - + " is less than initial pool size of " + poolSize); - } - } - if (isAsync && isManaged) { - throw new IllegalArgumentException( - "CUDA Unified Memory is not supported in CUDA_ASYNC allocation mode"); - } - LogLoc loc = LogLoc.NONE; - String path = null; - if (logConf != null) { - if (logConf.file != null) { - path = logConf.file.getAbsolutePath(); - } - loc = logConf.loc; - } - - initializeInternal(allocationMode, loc.internalId, path, poolSize, maxPoolSize); - MemoryCleaner.setDefaultGpu(Cuda.getDevice()); - initialized = true; + initialize(allocationMode, logConf, poolSize); } /** @@ -256,7 +192,7 @@ private static long[] sortThresholds(long[] thresholds) { } private static native void initializeInternal(int allocationMode, int logTo, String path, - long poolSize, long maxPoolSize) throws RmmException; + long poolSize) throws RmmException; /** * Shut down any initialized RMM instance. This should be used very rarely. It does not need to diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp index ba1135e7c67..d07b754c8db 100644 --- a/java/src/main/native/src/RmmJni.cpp +++ b/java/src/main/native/src/RmmJni.cpp @@ -324,8 +324,7 @@ extern "C" { JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, jclass clazz, jint allocation_mode, jint log_to, - jstring jpath, jlong pool_size, - jlong max_pool_size) { + jstring jpath, jlong pool_size) { try { // make sure the CUDA device is setup in the context cudaError_t cuda_status = cudaFree(0); @@ -339,37 +338,26 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j bool use_arena_alloc = allocation_mode & 4; bool use_cuda_async_alloc = allocation_mode & 8; if (use_pool_alloc) { - auto pool_limit = (max_pool_size > 0) ? - thrust::optional{static_cast(max_pool_size)} : - thrust::nullopt; if (use_managed_mem) { Initialized_resource = rmm::mr::make_owning_wrapper( - std::make_shared(), pool_size, pool_limit); + std::make_shared(), pool_size, pool_size); } else { Initialized_resource = rmm::mr::make_owning_wrapper( - std::make_shared(), pool_size, pool_limit); + std::make_shared(), pool_size, pool_size); } } else if (use_arena_alloc) { - std::size_t pool_limit = (max_pool_size > 0) ? static_cast(max_pool_size) : - std::numeric_limits::max(); if (use_managed_mem) { Initialized_resource = rmm::mr::make_owning_wrapper( - std::make_shared(), pool_size, pool_limit); + std::make_shared(), pool_size, pool_size); } else { Initialized_resource = rmm::mr::make_owning_wrapper( - std::make_shared(), pool_size, pool_limit); + std::make_shared(), pool_size, pool_size); } } else if (use_cuda_async_alloc) { - auto const pool_limit = max_pool_size > 0 ? static_cast(max_pool_size) : - std::numeric_limits::max(); - auto const release_threshold = max_pool_size > 0 ? - thrust::optional{max_pool_size} : - thrust::optional{}; // Use `limiting_resource_adaptor` to set a hard limit on the max pool size since // `cuda_async_memory_resource` only has a release threshold. Initialized_resource = rmm::mr::make_owning_wrapper( - std::make_shared(pool_size, release_threshold), - pool_limit); + std::make_shared(pool_size, pool_size), pool_size); } else if (use_managed_mem) { Initialized_resource = std::make_shared(); } else { diff --git a/java/src/test/java/ai/rapids/cudf/CudfTestBase.java b/java/src/test/java/ai/rapids/cudf/CudfTestBase.java index 66ecd110983..a4450e2869c 100644 --- a/java/src/test/java/ai/rapids/cudf/CudfTestBase.java +++ b/java/src/test/java/ai/rapids/cudf/CudfTestBase.java @@ -43,7 +43,7 @@ public CudfTestBase(int allocationMode, long poolSize) { void beforeEach() { assumeTrue(Cuda.isEnvCompatibleForTesting()); if (!Rmm.isInitialized()) { - Rmm.initialize(rmmAllocationMode, false, rmmPoolSize); + Rmm.initialize(rmmAllocationMode, Rmm.logToStderr(), rmmPoolSize); } } diff --git a/java/src/test/java/ai/rapids/cudf/RmmMemoryAccessorTest.java b/java/src/test/java/ai/rapids/cudf/RmmMemoryAccessorTest.java index 81afdcc1940..a9ee36e9b97 100644 --- a/java/src/test/java/ai/rapids/cudf/RmmMemoryAccessorTest.java +++ b/java/src/test/java/ai/rapids/cudf/RmmMemoryAccessorTest.java @@ -63,7 +63,7 @@ public void init() { Rmm.shutdown(); } assertFalse(Rmm.isInitialized()); - Rmm.initialize(RmmAllocationMode.CUDA_DEFAULT, true, -1); + Rmm.initialize(RmmAllocationMode.CUDA_DEFAULT, Rmm.logToStderr(), -1); assertTrue(Rmm.isInitialized()); Rmm.shutdown(); assertFalse(Rmm.isInitialized()); @@ -74,7 +74,7 @@ public void shutdown() { if (Rmm.isInitialized()) { Rmm.shutdown(); } - Rmm.initialize(RmmAllocationMode.POOL, false, 2048); + Rmm.initialize(RmmAllocationMode.POOL, Rmm.logToStderr(), 2048); try (DeviceMemoryBuffer buffer = DeviceMemoryBuffer.allocate(1024)) { assertThrows(RmmException.class, () -> Rmm.shutdown(500, 2000, TimeUnit.MILLISECONDS)); } @@ -91,9 +91,9 @@ public void allocate() { @Test public void doubleInitFails() { if (!Rmm.isInitialized()) { - Rmm.initialize(RmmAllocationMode.CUDA_DEFAULT, false, 0); + Rmm.initialize(RmmAllocationMode.CUDA_DEFAULT, Rmm.logToStderr(), 0); } assertThrows(IllegalStateException.class, - () -> Rmm.initialize(RmmAllocationMode.POOL, false, 1024 * 1024)); + () -> Rmm.initialize(RmmAllocationMode.POOL, Rmm.logToStderr(), 1024 * 1024)); } } diff --git a/java/src/test/java/ai/rapids/cudf/RmmTest.java b/java/src/test/java/ai/rapids/cudf/RmmTest.java index f0ed699c9b9..f9d097158b6 100644 --- a/java/src/test/java/ai/rapids/cudf/RmmTest.java +++ b/java/src/test/java/ai/rapids/cudf/RmmTest.java @@ -57,7 +57,7 @@ public void teardown() { RmmAllocationMode.POOL, RmmAllocationMode.ARENA}) public void testTotalAllocated(int rmmAllocMode) { - Rmm.initialize(rmmAllocMode, false, 512 * 1024 * 1024); + Rmm.initialize(rmmAllocMode, Rmm.logToStderr(), 512 * 1024 * 1024); assertEquals(0, Rmm.getTotalBytesAllocated()); try (DeviceMemoryBuffer ignored = Rmm.alloc(1024)) { assertEquals(1024, Rmm.getTotalBytesAllocated()); @@ -110,7 +110,7 @@ public boolean onAllocFailure(long sizeRequested) { @Test public void testSetEventHandlerTwice() { - Rmm.initialize(RmmAllocationMode.CUDA_DEFAULT, false, 0L); + Rmm.initialize(RmmAllocationMode.CUDA_DEFAULT, Rmm.logToStderr(), 0L); // installing an event handler the first time should not be an error Rmm.setEventHandler(new BaseRmmEventHandler() { @Override @@ -131,7 +131,7 @@ public boolean onAllocFailure(long sizeRequested) { @Test public void testClearEventHandler() { - Rmm.initialize(RmmAllocationMode.CUDA_DEFAULT, false, 0L); + Rmm.initialize(RmmAllocationMode.CUDA_DEFAULT, Rmm.logToStderr(), 0L); // clearing the event handler when it isn't set is not an error Rmm.clearEventHandler(); @@ -161,7 +161,7 @@ public void testAllocOnlyThresholds() { final AtomicInteger deallocInvocations = new AtomicInteger(0); final AtomicLong allocated = new AtomicLong(0); - Rmm.initialize(RmmAllocationMode.POOL, false, 1024 * 1024L); + Rmm.initialize(RmmAllocationMode.POOL, Rmm.logToStderr(), 1024 * 1024L); RmmEventHandler handler = new RmmEventHandler() { @Override @@ -304,7 +304,7 @@ public void onDeallocThreshold(long totalAllocSize) { @Test public void testExceptionHandling() { - Rmm.initialize(RmmAllocationMode.POOL, false, 1024 * 1024L); + Rmm.initialize(RmmAllocationMode.POOL, Rmm.logToStderr(), 1024 * 1024L); RmmEventHandler handler = new RmmEventHandler() { @Override @@ -344,7 +344,7 @@ public void onDeallocThreshold(long totalAllocSize) { public void testThreadAutoDeviceSetup() throws Exception { // A smoke-test for automatic CUDA device setup for threads calling // into cudf. Hard to fully test without requiring multiple CUDA devices. - Rmm.initialize(RmmAllocationMode.POOL, false, 1024 * 1024L); + Rmm.initialize(RmmAllocationMode.POOL, Rmm.logToStderr(), 1024 * 1024L); DeviceMemoryBuffer buff = Rmm.alloc(1024); try { ExecutorService executor = Executors.newSingleThreadExecutor(); @@ -368,62 +368,38 @@ public void testThreadAutoDeviceSetup() throws Exception { RmmAllocationMode.POOL, RmmAllocationMode.ARENA}) public void testSetDeviceThrowsAfterRmmInit(int rmmAllocMode) { - Rmm.initialize(rmmAllocMode, false, 1024 * 1024); + Rmm.initialize(rmmAllocMode, Rmm.logToStderr(), 1024 * 1024); assertThrows(CudfException.class, () -> Cuda.setDevice(Cuda.getDevice() + 1)); // Verify that auto set device does not Cuda.autoSetDevice(); } @Test - public void testPoolGrowth() { - Rmm.initialize(RmmAllocationMode.POOL, false, 1024); - try (DeviceMemoryBuffer ignored1 = Rmm.alloc(1024); - DeviceMemoryBuffer ignored2 = Rmm.alloc(2048); - DeviceMemoryBuffer ignored3 = Rmm.alloc(4096)) { - assertEquals(7168, Rmm.getTotalBytesAllocated()); - } - } - - @Test - public void testPoolLimit() { - Rmm.initialize(RmmAllocationMode.POOL, false, 1024, 2048); - try (DeviceMemoryBuffer ignored1 = Rmm.alloc(512); - DeviceMemoryBuffer ignored2 = Rmm.alloc(1024)) { + public void testPoolSize() { + Rmm.initialize(RmmAllocationMode.POOL, Rmm.logToStderr(), 1024); + try (DeviceMemoryBuffer ignored1 = Rmm.alloc(1024)) { assertThrows(OutOfMemoryError.class, () -> { - DeviceMemoryBuffer ignored3 = Rmm.alloc(1024); - ignored3.close(); + DeviceMemoryBuffer ignored2 = Rmm.alloc(1024); + ignored2.close(); }); } } @Test - public void testPoolLimitLessThanInitialSize() { - assertThrows(IllegalArgumentException.class, - () -> Rmm.initialize(RmmAllocationMode.POOL, false, 10240, 1024)); - } - - @Test - public void testPoolLimitNonPoolMode() { - assertThrows(IllegalArgumentException.class, - () -> Rmm.initialize(RmmAllocationMode.CUDA_DEFAULT, false, 1024, 2048)); - } - - @Test - public void testCudaAsyncMemoryResourceLimit() { + public void testCudaAsyncMemoryResourceSize() { try { - Rmm.initialize(RmmAllocationMode.CUDA_ASYNC, false, 1024, 2048); + Rmm.initialize(RmmAllocationMode.CUDA_ASYNC, Rmm.logToStderr(), 1024); } catch (CudfException e) { // CUDA 11.2 introduced cudaMallocAsync, older CUDA Toolkit will skip this test. assumeFalse(e.getMessage().contains("cudaMallocAsync not supported")); throw e; } - try (DeviceMemoryBuffer ignored1 = Rmm.alloc(512); - DeviceMemoryBuffer ignored2 = Rmm.alloc(1024)) { + try (DeviceMemoryBuffer ignored1 = Rmm.alloc(1024)) { assertThrows(OutOfMemoryError.class, () -> { - DeviceMemoryBuffer ignored3 = Rmm.alloc(1024); - ignored3.close(); + DeviceMemoryBuffer ignored2 = Rmm.alloc(1024); + ignored2.close(); }); } } @@ -433,12 +409,12 @@ public void testCudaAsyncIsIncompatibleWithManaged() { assertThrows(IllegalArgumentException.class, () -> Rmm.initialize( RmmAllocationMode.CUDA_ASYNC | RmmAllocationMode.CUDA_MANAGED_MEMORY, - false, 1024, 2048)); + Rmm.logToStderr(), 1024)); } @Test public void testCudaMemoryBuffer() { - Rmm.initialize(RmmAllocationMode.ARENA, false, 1024); + Rmm.initialize(RmmAllocationMode.ARENA, Rmm.logToStderr(), 1024); try (CudaMemoryBuffer one = CudaMemoryBuffer.allocate(512); CudaMemoryBuffer two = CudaMemoryBuffer.allocate(1024)) { assertEquals(512, one.length);