diff --git a/java/src/main/java/ai/rapids/cudf/Rmm.java b/java/src/main/java/ai/rapids/cudf/Rmm.java index 8d63d2aeefc..97813182deb 100755 --- a/java/src/main/java/ai/rapids/cudf/Rmm.java +++ b/java/src/main/java/ai/rapids/cudf/Rmm.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -173,6 +173,36 @@ public static synchronized void initialize(int allocationMode, LogConf logConf, */ public static synchronized void initialize(int allocationMode, LogConf logConf, long poolSize, long maxPoolSize) throws RmmException { + initialize(allocationMode, logConf, poolSize, maxPoolSize, 0, 0); + } + + /** + * Initialize memory manager state and storage. This will always initialize + * the CUDA context for the calling thread if it is not already set. The + * caller is responsible for setting the desired CUDA device prior to this + * call if a specific device is already set. + *

NOTE: All cudf methods will set the chosen CUDA device in the CUDA + * context of the calling thread after this returns. + * @param allocationMode Allocation strategy to use. Bit set using + * {@link RmmAllocationMode#CUDA_DEFAULT}, + * {@link RmmAllocationMode#POOL}, + * {@link RmmAllocationMode#ARENA} and + * {@link RmmAllocationMode#CUDA_MANAGED_MEMORY} + * @param logConf How to do logging or null if you don't want to + * @param poolSize The initial pool size in bytes + * @param maxPoolSize The maximum size the pool is allowed to grow. If the specified value + * is <= 0 then the pool size will not be artificially limited. + * @param allocationAlignment The size to which allocations are aligned. + * @param alignmentThreshold Only allocations with size larger than or equal to this threshold + * are aligned with `allocationAlignment`. + * @throws IllegalStateException if RMM has already been initialized + * @throws IllegalArgumentException if a max pool size is specified but the allocation mode + * is not {@link RmmAllocationMode#POOL} or + * {@link RmmAllocationMode#ARENA}, or the maximum pool size is + * below the initial size. + */ + public static synchronized void initialize(int allocationMode, LogConf logConf, long poolSize, + long maxPoolSize, long allocationAlignment, long alignmentThreshold) throws RmmException { if (initialized) { throw new IllegalStateException("RMM is already initialized"); } @@ -195,7 +225,8 @@ public static synchronized void initialize(int allocationMode, LogConf logConf, loc = logConf.loc; } - initializeInternal(allocationMode, loc.internalId, path, poolSize, maxPoolSize); + initializeInternal(allocationMode, loc.internalId, path, poolSize, maxPoolSize, + allocationAlignment, alignmentThreshold); MemoryCleaner.setDefaultGpu(Cuda.getDevice()); initialized = true; } @@ -241,7 +272,8 @@ private static long[] sortThresholds(long[] thresholds) { } private static native void initializeInternal(int allocationMode, int logTo, String path, - long poolSize, long maxPoolSize) throws RmmException; + long poolSize, long maxPoolSize, long allocationAlignment, long alignmentThreshold) + throws RmmException; /** * Shut down any initialized RMM instance. This should be used very rarely. It does not need to diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp index 7f11e19fce8..e604fc7dd46 100644 --- a/java/src/main/native/src/RmmJni.cpp +++ b/java/src/main/native/src/RmmJni.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -332,7 +333,9 @@ extern "C" { JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, jclass clazz, jint allocation_mode, jint log_to, jstring jpath, jlong pool_size, - jlong max_pool_size) { + jlong max_pool_size, + jlong allocation_alignment, + jlong alignment_threshold) { try { // make sure the CUDA device is setup in the context cudaError_t cuda_status = cudaFree(0); @@ -351,13 +354,9 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j if (use_managed_mem) { Initialized_resource = rmm::mr::make_owning_wrapper( std::make_shared(), pool_size, pool_limit); - auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT); - Tracking_memory_resource.reset(wrapped); } else { Initialized_resource = rmm::mr::make_owning_wrapper( std::make_shared(), pool_size, pool_limit); - auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT); - Tracking_memory_resource.reset(wrapped); } } else if (use_arena_alloc) { std::size_t pool_limit = (max_pool_size > 0) ? static_cast(max_pool_size) : @@ -365,23 +364,26 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j if (use_managed_mem) { Initialized_resource = rmm::mr::make_owning_wrapper( std::make_shared(), pool_size, pool_limit); - auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT); - Tracking_memory_resource.reset(wrapped); } else { Initialized_resource = rmm::mr::make_owning_wrapper( std::make_shared(), pool_size, pool_limit); - auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT); - Tracking_memory_resource.reset(wrapped); } } else if (use_managed_mem) { Initialized_resource = std::make_shared(); - auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT); - Tracking_memory_resource.reset(wrapped); } else { Initialized_resource = std::make_shared(); - auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT); - Tracking_memory_resource.reset(wrapped); } + + if (allocation_alignment != 0) { + Initialized_resource = rmm::mr::make_owning_wrapper( + Initialized_resource, allocation_alignment, alignment_threshold); + } + + auto wrapped = make_tracking_adaptor( + Initialized_resource.get(), + std::max(RMM_ALLOC_SIZE_ALIGNMENT, static_cast(allocation_alignment))); + Tracking_memory_resource.reset(wrapped); + auto resource = Tracking_memory_resource.get(); rmm::mr::set_current_device_resource(resource);