Skip to content

Commit

Permalink
support RMM aligned resource adapter in JNI (#8266)
Browse files Browse the repository at this point in the history
Depends on rapidsai/rmm#768.

Authors:
  - Rong Ou (https://github.com/rongou)

Approvers:
  - Jason Lowe (https://github.com/jlowe)

URL: #8266
  • Loading branch information
rongou authored May 20, 2021
1 parent 48647aa commit 0ebf7e6
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 17 deletions.
38 changes: 35 additions & 3 deletions java/src/main/java/ai/rapids/cudf/Rmm.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -173,6 +173,36 @@ public static synchronized void initialize(int allocationMode, LogConf logConf,
*/
public static synchronized void initialize(int allocationMode, LogConf logConf, long poolSize,
long maxPoolSize) throws RmmException {
initialize(allocationMode, logConf, poolSize, maxPoolSize, 0, 0);
}

/**
* Initialize memory manager state and storage. This will always initialize
* the CUDA context for the calling thread if it is not already set. The
* caller is responsible for setting the desired CUDA device prior to this
* call if a specific device is already set.
* <p>NOTE: All cudf methods will set the chosen CUDA device in the CUDA
* context of the calling thread after this returns.
* @param allocationMode Allocation strategy to use. Bit set using
* {@link RmmAllocationMode#CUDA_DEFAULT},
* {@link RmmAllocationMode#POOL},
* {@link RmmAllocationMode#ARENA} and
* {@link RmmAllocationMode#CUDA_MANAGED_MEMORY}
* @param logConf How to do logging or null if you don't want to
* @param poolSize The initial pool size in bytes
* @param maxPoolSize The maximum size the pool is allowed to grow. If the specified value
* is <= 0 then the pool size will not be artificially limited.
* @param allocationAlignment The size to which allocations are aligned.
* @param alignmentThreshold Only allocations with size larger than or equal to this threshold
* are aligned with `allocationAlignment`.
* @throws IllegalStateException if RMM has already been initialized
* @throws IllegalArgumentException if a max pool size is specified but the allocation mode
* is not {@link RmmAllocationMode#POOL} or
* {@link RmmAllocationMode#ARENA}, or the maximum pool size is
* below the initial size.
*/
public static synchronized void initialize(int allocationMode, LogConf logConf, long poolSize,
long maxPoolSize, long allocationAlignment, long alignmentThreshold) throws RmmException {
if (initialized) {
throw new IllegalStateException("RMM is already initialized");
}
Expand All @@ -195,7 +225,8 @@ public static synchronized void initialize(int allocationMode, LogConf logConf,
loc = logConf.loc;
}

initializeInternal(allocationMode, loc.internalId, path, poolSize, maxPoolSize);
initializeInternal(allocationMode, loc.internalId, path, poolSize, maxPoolSize,
allocationAlignment, alignmentThreshold);
MemoryCleaner.setDefaultGpu(Cuda.getDevice());
initialized = true;
}
Expand Down Expand Up @@ -241,7 +272,8 @@ private static long[] sortThresholds(long[] thresholds) {
}

private static native void initializeInternal(int allocationMode, int logTo, String path,
long poolSize, long maxPoolSize) throws RmmException;
long poolSize, long maxPoolSize, long allocationAlignment, long alignmentThreshold)
throws RmmException;

/**
* Shut down any initialized RMM instance. This should be used very rarely. It does not need to
Expand Down
30 changes: 16 additions & 14 deletions java/src/main/native/src/RmmJni.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,6 +20,7 @@
#include <iostream>
#include <limits>

#include <rmm/mr/device/aligned_resource_adaptor.hpp>
#include <rmm/mr/device/arena_memory_resource.hpp>
#include <rmm/mr/device/cuda_memory_resource.hpp>
#include <rmm/mr/device/logging_resource_adaptor.hpp>
Expand Down Expand Up @@ -332,7 +333,9 @@ extern "C" {
JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, jclass clazz,
jint allocation_mode, jint log_to,
jstring jpath, jlong pool_size,
jlong max_pool_size) {
jlong max_pool_size,
jlong allocation_alignment,
jlong alignment_threshold) {
try {
// make sure the CUDA device is setup in the context
cudaError_t cuda_status = cudaFree(0);
Expand All @@ -351,37 +354,36 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j
if (use_managed_mem) {
Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
std::make_shared<rmm::mr::managed_memory_resource>(), pool_size, pool_limit);
auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
} else {
Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
std::make_shared<rmm::mr::cuda_memory_resource>(), pool_size, pool_limit);
auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
}
} else if (use_arena_alloc) {
std::size_t pool_limit = (max_pool_size > 0) ? static_cast<std::size_t>(max_pool_size) :
std::numeric_limits<std::size_t>::max();
if (use_managed_mem) {
Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(
std::make_shared<rmm::mr::managed_memory_resource>(), pool_size, pool_limit);
auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
} else {
Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(
std::make_shared<rmm::mr::cuda_memory_resource>(), pool_size, pool_limit);
auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
}
} else if (use_managed_mem) {
Initialized_resource = std::make_shared<rmm::mr::managed_memory_resource>();
auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
} else {
Initialized_resource = std::make_shared<rmm::mr::cuda_memory_resource>();
auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
}

if (allocation_alignment != 0) {
Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::aligned_resource_adaptor>(
Initialized_resource, allocation_alignment, alignment_threshold);
}

auto wrapped = make_tracking_adaptor(
Initialized_resource.get(),
std::max(RMM_ALLOC_SIZE_ALIGNMENT, static_cast<std::size_t>(allocation_alignment)));
Tracking_memory_resource.reset(wrapped);

auto resource = Tracking_memory_resource.get();
rmm::mr::set_current_device_resource(resource);

Expand Down

0 comments on commit 0ebf7e6

Please sign in to comment.