Merge pull request #5632 from rongou/switch-pool

switch from CNMeM to `pool_memory_resource`
rapidsai · Jul 10, 2020 · 71ef47c · 71ef47c
2 parents 11ab827 + 19bfbd6
commit 71ef47c
Show file tree

Hide file tree

Showing 6 changed files with 14 additions and 49 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -39,6 +39,7 @@
 
 ## Improvements
 
+- PR #5632 Switch JNI code to use `pool_memory_resource` instead of CNMeM
 - PR #5486 Link Boost libraries statically in the Java build
 - PR #5479 Link Arrow libraries statically
 - PR #5414 Use new release of Thrust/CUB in the JNI build

diff --git a/java/README.md b/java/README.md
@@ -56,12 +56,12 @@ When building libcudf, make sure you pass in the cmake options
 `-DARROW_STATIC_LIB=ON -DBoost_USE_STATIC_LIBS=ON` so that Apache Arrow and Boost libraries are
 linked statically.
 
-If you use the default cmake options libcudart will be dynamically linked to libcudf and librmm
-which are included.  If you do this the resulting jar will have a classifier associated with it
+If you use the default cmake options libcudart will be dynamically linked to libcudf
+which is included.  If you do this the resulting jar will have a classifier associated with it
 because that jar can only be used with a single version of the CUDA runtime.  
 
 There is experimental work to try and remove that requirement but it is not fully functional
-you can build RMM and cuDF with `-DCUDA_STATIC_RUNTIME=ON` when running cmake, and similarly 
+you can build cuDF with `-DCUDA_STATIC_RUNTIME=ON` when running cmake, and similarly 
 `-DCUDA_STATIC_RUNTIME=ON` when running maven.  This will statically link in the CUDA runtime
 and result in a jar with no classifier that should run on any host that has a version of the
 driver new enough to support the runtime that this was built with. Unfortunately `libnvrtc` is still
@@ -92,24 +92,15 @@ between different threads (see
 [blog post](https://devblogs.nvidia.com/gpu-pro-tip-cuda-7-streams-simplify-concurrency/)).
 
 Since the PTDS option is for each compilation unit, it should be done at the same time across the
-whole codebase. To enable PTDS, first build RMM:
-```shell script
-conda activate cudf_dev
-cd src/rmm/build
-cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DPER_THREAD_DEFAULT_STREAM=ON
-make -j`nproc`
-make install
-```
-
-then build cuDF:
+whole codebase. To enable PTDS, first build cuDF:
 ```shell script
 cd src/cudf/cpp/build
 cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DPER_THREAD_DEFAULT_STREAM=ON
 make -j`nproc`
 make install
 ```
 
-and finally build the jar:
+then build the jar:
 ```shell script
 cd src/cudf/java
 mvn clean install -DPER_THREAD_DEFAULT_STREAM=ON

diff --git a/java/pom.xml b/java/pom.xml
@@ -380,14 +380,6 @@
                             def proc = 'ldd ${native.build.path}/libcudfjni.so'.execute()
                             proc.consumeProcessOutput(sout, serr)
                             proc.waitForOrKill(10000)
-                            def librp = ~/librmm\\.so\\s+=>\\s+(.*)librmm.*\\.so\\s+.*/
-                            def m = librp.matcher(sout)
-                            if (m.find()) {
-                                pom.properties['native.deps.path'] = m.group(1)
-                            } else {
-                                fail("Could not find rmm as a dependency of libcudfjni out> $sout err> $serr")
-                            }
-
                             def libcudf = ~/libcudf\\.so\\s+=>\\s+(.*)libcudf.*\\.so\\s+.*/
                             def cudfm = libcudf.matcher(sout)
                             if (cudfm.find()) {
@@ -469,13 +461,6 @@
                                         <include>libcudf.so</include>
                                     </includes>
                                 </resource>
-                                <resource>
-                                    <!--Set by groovy script-->
-                                    <directory>${native.deps.path}</directory>
-                                    <includes>
-                                        <include>librmm.so</include>
-                                    </includes>
-                                </resource>
                             </resources>
                         </configuration>
                     </execution>

diff --git a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
@@ -32,7 +32,6 @@
 public class NativeDepsLoader {
   private static final Logger log = LoggerFactory.getLogger(NativeDepsLoader.class);
   private static final String[] loadOrder = new String[] {
-      "rmm",
       "cudf",
       "cudfjni"
   };

diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
@@ -149,18 +149,8 @@ find_path(RMM_INCLUDE "rmm"
                 "$ENV{CONDA_PREFIX}/include/rmm"
                 "$ENV{CONDA_PREFIX}/include")
 
-find_library(RMM_LIBRARY "rmm"
-             HINTS "$ENV{RMM_ROOT}/lib"
-                   "$ENV{CONDA_PREFIX}/lib")
-
-message(STATUS "RMM: RMM_LIBRARY set to ${RMM_LIBRARY}")
 message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}")
 
-add_library(rmm SHARED IMPORTED ${RMM_LIBRARY})
-if (RMM_INCLUDE AND RMM_LIBRARY)
-    set_target_properties(rmm PROPERTIES IMPORTED_LOCATION ${RMM_LIBRARY})
-endif (RMM_INCLUDE AND RMM_LIBRARY)
-
 ###################################################################################################
 # - find JNI -------------------------------------------------------------------------------------
 find_package(JNI REQUIRED)
@@ -189,7 +179,6 @@ include_directories("${THRUST_INCLUDE}"
 
 link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc
                  "${CMAKE_BINARY_DIR}/lib"
-                 "${RMM_LIBRARY}"
                  "${CUDF_LIBRARY}")
 
 
@@ -230,6 +219,6 @@ endif(PER_THREAD_DEFAULT_STREAM)
 ###################################################################################################
 # - link libraries --------------------------------------------------------------------------------
 
-target_link_libraries(cudfjni cudf rmm ${CUDART_LIBRARY} cuda nvrtc)
+target_link_libraries(cudfjni cudf ${CUDART_LIBRARY} cuda nvrtc)
 
 
diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp
@@ -14,20 +14,17 @@
  * limitations under the License.
  */
 
-#include <atomic>
-#include <chrono>
 #include <ctime>
 #include <fstream>
 #include <iostream>
 #include <limits>
 #include <mutex>
 
-#include <rmm/mr/device/cnmem_managed_memory_resource.hpp>
-#include <rmm/mr/device/cnmem_memory_resource.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
 #include <rmm/mr/device/default_memory_resource.hpp>
 #include <rmm/mr/device/logging_resource_adaptor.hpp>
 #include <rmm/mr/device/managed_memory_resource.hpp>
+#include <rmm/mr/device/pool_memory_resource.hpp>
 #include <unordered_map>
 
 #include "jni_utils.hpp"
@@ -365,14 +362,17 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j
     bool use_pool_alloc = allocation_mode & 1;
     bool use_managed_mem = allocation_mode & 2;
     if (use_pool_alloc) {
-      std::vector<int> devices; // Just do default devices for now...
       if (use_managed_mem) {
-        auto tmp = new rmm::mr::cnmem_managed_memory_resource(pool_size, devices);
+        using managed_mr = rmm::mr::managed_memory_resource;
+        using managed_pool = rmm::mr::pool_memory_resource<managed_mr>;
+        auto tmp = new managed_pool(new managed_mr(), pool_size, pool_size);
         Initialized_resource.reset(tmp);
         auto wrapped = make_tracking_adaptor(tmp, RMM_ALLOC_SIZE_ALIGNMENT);
         Tracking_memory_resource.reset(wrapped);
       } else {
-        auto tmp = new rmm::mr::cnmem_memory_resource(pool_size, devices);
+        using cuda_mr = rmm::mr::cuda_memory_resource;
+        using cuda_pool = rmm::mr::pool_memory_resource<cuda_mr>;
+        auto tmp = new cuda_pool(new cuda_mr(), pool_size, pool_size);
         Initialized_resource.reset(tmp);
         auto wrapped = make_tracking_adaptor(tmp, RMM_ALLOC_SIZE_ALIGNMENT);
         Tracking_memory_resource.reset(wrapped);