Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] switch from CNMeM to pool_memory_resource [skip ci] #5632

Merged
merged 4 commits into from
Jul 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

## Improvements

- PR #5632 Switch JNI code to use `pool_memory_resource` instead of CNMeM
- PR #5486 Link Boost libraries statically in the Java build
- PR #5479 Link Arrow libraries statically
- PR #5414 Use new release of Thrust/CUB in the JNI build
Expand Down
19 changes: 5 additions & 14 deletions java/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ When building libcudf, make sure you pass in the cmake options
`-DARROW_STATIC_LIB=ON -DBoost_USE_STATIC_LIBS=ON` so that Apache Arrow and Boost libraries are
linked statically.

If you use the default cmake options libcudart will be dynamically linked to libcudf and librmm
which are included. If you do this the resulting jar will have a classifier associated with it
If you use the default cmake options libcudart will be dynamically linked to libcudf
which is included. If you do this the resulting jar will have a classifier associated with it
because that jar can only be used with a single version of the CUDA runtime.

There is experimental work to try and remove that requirement but it is not fully functional
you can build RMM and cuDF with `-DCUDA_STATIC_RUNTIME=ON` when running cmake, and similarly
you can build cuDF with `-DCUDA_STATIC_RUNTIME=ON` when running cmake, and similarly
`-DCUDA_STATIC_RUNTIME=ON` when running maven. This will statically link in the CUDA runtime
and result in a jar with no classifier that should run on any host that has a version of the
driver new enough to support the runtime that this was built with. Unfortunately `libnvrtc` is still
Expand Down Expand Up @@ -92,24 +92,15 @@ between different threads (see
[blog post](https://devblogs.nvidia.com/gpu-pro-tip-cuda-7-streams-simplify-concurrency/)).

Since the PTDS option is for each compilation unit, it should be done at the same time across the
whole codebase. To enable PTDS, first build RMM:
```shell script
conda activate cudf_dev
cd src/rmm/build
cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DPER_THREAD_DEFAULT_STREAM=ON
make -j`nproc`
make install
```

then build cuDF:
whole codebase. To enable PTDS, first build cuDF:
```shell script
cd src/cudf/cpp/build
cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DPER_THREAD_DEFAULT_STREAM=ON
make -j`nproc`
make install
```

and finally build the jar:
then build the jar:
```shell script
cd src/cudf/java
mvn clean install -DPER_THREAD_DEFAULT_STREAM=ON
Expand Down
15 changes: 0 additions & 15 deletions java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -380,14 +380,6 @@
def proc = 'ldd ${native.build.path}/libcudfjni.so'.execute()
proc.consumeProcessOutput(sout, serr)
proc.waitForOrKill(10000)
def librp = ~/librmm\\.so\\s+=>\\s+(.*)librmm.*\\.so\\s+.*/
def m = librp.matcher(sout)
if (m.find()) {
pom.properties['native.deps.path'] = m.group(1)
} else {
fail("Could not find rmm as a dependency of libcudfjni out> $sout err> $serr")
}

def libcudf = ~/libcudf\\.so\\s+=>\\s+(.*)libcudf.*\\.so\\s+.*/
def cudfm = libcudf.matcher(sout)
if (cudfm.find()) {
Expand Down Expand Up @@ -469,13 +461,6 @@
<include>libcudf.so</include>
</includes>
</resource>
<resource>
<!--Set by groovy script-->
<directory>${native.deps.path}</directory>
<includes>
<include>librmm.so</include>
</includes>
</resource>
</resources>
</configuration>
</execution>
Expand Down
1 change: 0 additions & 1 deletion java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
public class NativeDepsLoader {
private static final Logger log = LoggerFactory.getLogger(NativeDepsLoader.class);
private static final String[] loadOrder = new String[] {
"rmm",
"cudf",
"cudfjni"
};
Expand Down
13 changes: 1 addition & 12 deletions java/src/main/native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,8 @@ find_path(RMM_INCLUDE "rmm"
"$ENV{CONDA_PREFIX}/include/rmm"
"$ENV{CONDA_PREFIX}/include")

find_library(RMM_LIBRARY "rmm"
HINTS "$ENV{RMM_ROOT}/lib"
"$ENV{CONDA_PREFIX}/lib")

message(STATUS "RMM: RMM_LIBRARY set to ${RMM_LIBRARY}")
message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}")

add_library(rmm SHARED IMPORTED ${RMM_LIBRARY})
if (RMM_INCLUDE AND RMM_LIBRARY)
set_target_properties(rmm PROPERTIES IMPORTED_LOCATION ${RMM_LIBRARY})
endif (RMM_INCLUDE AND RMM_LIBRARY)

###################################################################################################
# - find JNI -------------------------------------------------------------------------------------
find_package(JNI REQUIRED)
Expand Down Expand Up @@ -189,7 +179,6 @@ include_directories("${THRUST_INCLUDE}"

link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc
"${CMAKE_BINARY_DIR}/lib"
"${RMM_LIBRARY}"
"${CUDF_LIBRARY}")


Expand Down Expand Up @@ -230,6 +219,6 @@ endif(PER_THREAD_DEFAULT_STREAM)
###################################################################################################
# - link libraries --------------------------------------------------------------------------------

target_link_libraries(cudfjni cudf rmm ${CUDART_LIBRARY} cuda nvrtc)
target_link_libraries(cudfjni cudf ${CUDART_LIBRARY} cuda nvrtc)


14 changes: 7 additions & 7 deletions java/src/main/native/src/RmmJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,17 @@
* limitations under the License.
*/

#include <atomic>
#include <chrono>
#include <ctime>
#include <fstream>
#include <iostream>
#include <limits>
#include <mutex>

#include <rmm/mr/device/cnmem_managed_memory_resource.hpp>
#include <rmm/mr/device/cnmem_memory_resource.hpp>
#include <rmm/mr/device/cuda_memory_resource.hpp>
#include <rmm/mr/device/default_memory_resource.hpp>
#include <rmm/mr/device/logging_resource_adaptor.hpp>
#include <rmm/mr/device/managed_memory_resource.hpp>
#include <rmm/mr/device/pool_memory_resource.hpp>
#include <unordered_map>

#include "jni_utils.hpp"
Expand Down Expand Up @@ -365,14 +362,17 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j
bool use_pool_alloc = allocation_mode & 1;
bool use_managed_mem = allocation_mode & 2;
if (use_pool_alloc) {
std::vector<int> devices; // Just do default devices for now...
if (use_managed_mem) {
auto tmp = new rmm::mr::cnmem_managed_memory_resource(pool_size, devices);
using managed_mr = rmm::mr::managed_memory_resource;
using managed_pool = rmm::mr::pool_memory_resource<managed_mr>;
auto tmp = new managed_pool(new managed_mr(), pool_size, pool_size);
Initialized_resource.reset(tmp);
auto wrapped = make_tracking_adaptor(tmp, RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
} else {
auto tmp = new rmm::mr::cnmem_memory_resource(pool_size, devices);
using cuda_mr = rmm::mr::cuda_memory_resource;
using cuda_pool = rmm::mr::pool_memory_resource<cuda_mr>;
auto tmp = new cuda_pool(new cuda_mr(), pool_size, pool_size);
Initialized_resource.reset(tmp);
auto wrapped = make_tracking_adaptor(tmp, RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
Expand Down