Skip to content

Commit

Permalink
Merge pull request #5632 from rongou/switch-pool
Browse files Browse the repository at this point in the history
switch from CNMeM to `pool_memory_resource`
  • Loading branch information
revans2 authored Jul 10, 2020
2 parents 11ab827 + 19bfbd6 commit 71ef47c
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 49 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

## Improvements

- PR #5632 Switch JNI code to use `pool_memory_resource` instead of CNMeM
- PR #5486 Link Boost libraries statically in the Java build
- PR #5479 Link Arrow libraries statically
- PR #5414 Use new release of Thrust/CUB in the JNI build
Expand Down
19 changes: 5 additions & 14 deletions java/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ When building libcudf, make sure you pass in the cmake options
`-DARROW_STATIC_LIB=ON -DBoost_USE_STATIC_LIBS=ON` so that Apache Arrow and Boost libraries are
linked statically.

If you use the default cmake options libcudart will be dynamically linked to libcudf and librmm
which are included. If you do this the resulting jar will have a classifier associated with it
If you use the default cmake options libcudart will be dynamically linked to libcudf
which is included. If you do this the resulting jar will have a classifier associated with it
because that jar can only be used with a single version of the CUDA runtime.

There is experimental work to try and remove that requirement but it is not fully functional
you can build RMM and cuDF with `-DCUDA_STATIC_RUNTIME=ON` when running cmake, and similarly
you can build cuDF with `-DCUDA_STATIC_RUNTIME=ON` when running cmake, and similarly
`-DCUDA_STATIC_RUNTIME=ON` when running maven. This will statically link in the CUDA runtime
and result in a jar with no classifier that should run on any host that has a version of the
driver new enough to support the runtime that this was built with. Unfortunately `libnvrtc` is still
Expand Down Expand Up @@ -92,24 +92,15 @@ between different threads (see
[blog post](https://devblogs.nvidia.com/gpu-pro-tip-cuda-7-streams-simplify-concurrency/)).

Since the PTDS option is for each compilation unit, it should be done at the same time across the
whole codebase. To enable PTDS, first build RMM:
```shell script
conda activate cudf_dev
cd src/rmm/build
cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DPER_THREAD_DEFAULT_STREAM=ON
make -j`nproc`
make install
```

then build cuDF:
whole codebase. To enable PTDS, first build cuDF:
```shell script
cd src/cudf/cpp/build
cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DPER_THREAD_DEFAULT_STREAM=ON
make -j`nproc`
make install
```

and finally build the jar:
then build the jar:
```shell script
cd src/cudf/java
mvn clean install -DPER_THREAD_DEFAULT_STREAM=ON
Expand Down
15 changes: 0 additions & 15 deletions java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -380,14 +380,6 @@
def proc = 'ldd ${native.build.path}/libcudfjni.so'.execute()
proc.consumeProcessOutput(sout, serr)
proc.waitForOrKill(10000)
def librp = ~/librmm\\.so\\s+=>\\s+(.*)librmm.*\\.so\\s+.*/
def m = librp.matcher(sout)
if (m.find()) {
pom.properties['native.deps.path'] = m.group(1)
} else {
fail("Could not find rmm as a dependency of libcudfjni out> $sout err> $serr")
}

def libcudf = ~/libcudf\\.so\\s+=>\\s+(.*)libcudf.*\\.so\\s+.*/
def cudfm = libcudf.matcher(sout)
if (cudfm.find()) {
Expand Down Expand Up @@ -469,13 +461,6 @@
<include>libcudf.so</include>
</includes>
</resource>
<resource>
<!--Set by groovy script-->
<directory>${native.deps.path}</directory>
<includes>
<include>librmm.so</include>
</includes>
</resource>
</resources>
</configuration>
</execution>
Expand Down
1 change: 0 additions & 1 deletion java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
public class NativeDepsLoader {
private static final Logger log = LoggerFactory.getLogger(NativeDepsLoader.class);
private static final String[] loadOrder = new String[] {
"rmm",
"cudf",
"cudfjni"
};
Expand Down
13 changes: 1 addition & 12 deletions java/src/main/native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,8 @@ find_path(RMM_INCLUDE "rmm"
"$ENV{CONDA_PREFIX}/include/rmm"
"$ENV{CONDA_PREFIX}/include")

find_library(RMM_LIBRARY "rmm"
HINTS "$ENV{RMM_ROOT}/lib"
"$ENV{CONDA_PREFIX}/lib")

message(STATUS "RMM: RMM_LIBRARY set to ${RMM_LIBRARY}")
message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}")

add_library(rmm SHARED IMPORTED ${RMM_LIBRARY})
if (RMM_INCLUDE AND RMM_LIBRARY)
set_target_properties(rmm PROPERTIES IMPORTED_LOCATION ${RMM_LIBRARY})
endif (RMM_INCLUDE AND RMM_LIBRARY)

###################################################################################################
# - find JNI -------------------------------------------------------------------------------------
find_package(JNI REQUIRED)
Expand Down Expand Up @@ -189,7 +179,6 @@ include_directories("${THRUST_INCLUDE}"

link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc
"${CMAKE_BINARY_DIR}/lib"
"${RMM_LIBRARY}"
"${CUDF_LIBRARY}")


Expand Down Expand Up @@ -230,6 +219,6 @@ endif(PER_THREAD_DEFAULT_STREAM)
###################################################################################################
# - link libraries --------------------------------------------------------------------------------

target_link_libraries(cudfjni cudf rmm ${CUDART_LIBRARY} cuda nvrtc)
target_link_libraries(cudfjni cudf ${CUDART_LIBRARY} cuda nvrtc)


14 changes: 7 additions & 7 deletions java/src/main/native/src/RmmJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,17 @@
* limitations under the License.
*/

#include <atomic>
#include <chrono>
#include <ctime>
#include <fstream>
#include <iostream>
#include <limits>
#include <mutex>

#include <rmm/mr/device/cnmem_managed_memory_resource.hpp>
#include <rmm/mr/device/cnmem_memory_resource.hpp>
#include <rmm/mr/device/cuda_memory_resource.hpp>
#include <rmm/mr/device/default_memory_resource.hpp>
#include <rmm/mr/device/logging_resource_adaptor.hpp>
#include <rmm/mr/device/managed_memory_resource.hpp>
#include <rmm/mr/device/pool_memory_resource.hpp>
#include <unordered_map>

#include "jni_utils.hpp"
Expand Down Expand Up @@ -365,14 +362,17 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j
bool use_pool_alloc = allocation_mode & 1;
bool use_managed_mem = allocation_mode & 2;
if (use_pool_alloc) {
std::vector<int> devices; // Just do default devices for now...
if (use_managed_mem) {
auto tmp = new rmm::mr::cnmem_managed_memory_resource(pool_size, devices);
using managed_mr = rmm::mr::managed_memory_resource;
using managed_pool = rmm::mr::pool_memory_resource<managed_mr>;
auto tmp = new managed_pool(new managed_mr(), pool_size, pool_size);
Initialized_resource.reset(tmp);
auto wrapped = make_tracking_adaptor(tmp, RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
} else {
auto tmp = new rmm::mr::cnmem_memory_resource(pool_size, devices);
using cuda_mr = rmm::mr::cuda_memory_resource;
using cuda_pool = rmm::mr::pool_memory_resource<cuda_mr>;
auto tmp = new cuda_pool(new cuda_mr(), pool_size, pool_size);
Initialized_resource.reset(tmp);
auto wrapped = make_tracking_adaptor(tmp, RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
Expand Down

0 comments on commit 71ef47c

Please sign in to comment.