Skip to content

Commit

Permalink
Support statically linking CUDA runtime for Java bindings (#9873)
Browse files Browse the repository at this point in the history
Fixes #9528.  Adds the ability to statically link the CUDA runtime when building the Java bindings.  This also adds the ability for the Java bindings to link against an archive of libcudf rather than the shared library.  This is recommended when statically linking the CUDA runtime, as it reduces the number of shared libraries that pull in the CUDA runtime.

The Java CI script to build the official artifact has been updated to building libcudf as an archive and statically linking the CUDA runtime by default.

Authors:
  - Jason Lowe (https://github.com/jlowe)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)

URL: #9873
  • Loading branch information
jlowe authored Dec 9, 2021
1 parent c26779c commit d7ce106
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 51 deletions.
30 changes: 12 additions & 18 deletions java/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,24 +87,18 @@ within the libcudf build environment.

## Statically Linking the CUDA Runtime

If you use the default cmake options libcudart will be dynamically linked to libcudf
which is included. If you do this the resulting jar will have a classifier associated with it
because that jar can only be used with a single version of the CUDA runtime.

There is experimental work to try and remove that requirement but it is not fully functional
you can build cuDF with `-DCUDA_STATIC_RUNTIME=ON` when running cmake, and similarly
`-DCUDA_STATIC_RUNTIME=ON` when running Maven. This will statically link in the CUDA runtime
and result in a jar with no classifier that should run on any host that has a version of the
driver new enough to support the runtime that this was built with.

To build the Java bindings with a statically-linked CUDA runtime, use a build command like:
```
mvn clean install -DCUDA_STATIC_RUNTIME=ON
```

You will get errors if the CUDA runtime linking is not consistent. We tried to detect these
up front and stop the build early if there is a mismatch, but there may be some cases we missed
and this can result in some very hard to debug errors.
If you use the default cmake options libcudart will be dynamically linked to libcudf and libcudfjni.
To build with a static CUDA runtime, build libcudf with the `-DCUDA_STATIC_RUNTIME=ON` as a cmake
parameter, and similarly build with `-DCUDA_STATIC_RUNTIME=ON` when building the Java bindings
with Maven.

### Building with a libcudf Archive

When statically linking the CUDA runtime, it is recommended to build cuDF as an archive rather than
a shared library, as this allows the Java bindings to only have a single shared library that uses
the CUDA runtime. To build libcudf as an archive, specify `-DBUILD_SHARED_LIBS=OFF` as a cmake
parameter when building libcudf, then specify `-DCUDF_JNI_LIBCUDF_STATIC=ON` when building the Java
bindings with Maven.

## Per-thread Default Stream

Expand Down
19 changes: 17 additions & 2 deletions java/ci/build-in-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ gcc --version
PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
SKIP_JAVA_TESTS=${SKIP_JAVA_TESTS:-true}
BUILD_CPP_TESTS=${BUILD_CPP_TESTS:-OFF}
ENABLE_CUDA_STATIC_RUNTIME=${ENABLE_CUDA_STATIC_RUNTIME:-ON}
ENABLE_PTDS=${ENABLE_PTDS:-ON}
RMM_LOGGING_LEVEL=${RMM_LOGGING_LEVEL:-OFF}
ENABLE_NVTX=${ENABLE_NVTX:-ON}
Expand All @@ -36,6 +37,7 @@ OUT_PATH="$WORKSPACE/$OUT"
echo "SIGN_FILE: $SIGN_FILE,\
SKIP_JAVA_TESTS: $SKIP_JAVA_TESTS,\
BUILD_CPP_TESTS: $BUILD_CPP_TESTS,\
ENABLE_CUDA_STATIC_RUNTIME: $ENABLE_CUDA_STATIC_RUNTIME,\
ENABLED_PTDS: $ENABLE_PTDS,\
ENABLE_NVTX: $ENABLE_NVTX,\
ENABLE_GDS: $ENABLE_GDS,\
Expand All @@ -52,13 +54,26 @@ export LIBCUDF_KERNEL_CACHE_PATH=/rapids
rm -rf "$WORKSPACE/cpp/build"
mkdir -p "$WORKSPACE/cpp/build"
cd "$WORKSPACE/cpp/build"
cmake .. -DUSE_NVTX=$ENABLE_NVTX -DCUDF_USE_ARROW_STATIC=ON -DCUDF_ENABLE_ARROW_S3=OFF -DBUILD_TESTS=$BUILD_CPP_TESTS -DPER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS -DRMM_LOGGING_LEVEL=$RMM_LOGGING_LEVEL
cmake .. -DUSE_NVTX=$ENABLE_NVTX \
-DCUDF_USE_ARROW_STATIC=ON \
-DCUDF_ENABLE_ARROW_S3=OFF \
-DBUILD_TESTS=$BUILD_CPP_TESTS \
-DPER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS \
-DRMM_LOGGING_LEVEL=$RMM_LOGGING_LEVEL \
-DBUILD_SHARED_LIBS=OFF

make -j$PARALLEL_LEVEL
make install DESTDIR=$INSTALL_PREFIX

###### Build cudf jar ######
BUILD_ARG="-Dmaven.repo.local=\"$WORKSPACE/.m2\" -DskipTests=$SKIP_JAVA_TESTS -DPER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS -DRMM_LOGGING_LEVEL=$RMM_LOGGING_LEVEL -DUSE_GDS=$ENABLE_GDS -Dtest=*,!CuFileTest"
BUILD_ARG="-Dmaven.repo.local=\"$WORKSPACE/.m2\"\
-DskipTests=$SKIP_JAVA_TESTS\
-DPER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS\
-DCUDA_STATIC_RUNTIME=$ENABLE_CUDA_STATIC_RUNTIME\
-DCUDF_JNI_LIBCUDF_STATIC=ON\
-DRMM_LOGGING_LEVEL=$RMM_LOGGING_LEVEL\
-DUSE_GDS=$ENABLE_GDS -Dtest=*,!CuFileTest"

if [ "$SIGN_FILE" == true ]; then
# Build javadoc and sources only when SIGN_FILE is true
BUILD_ARG="$BUILD_ARG -Prelease"
Expand Down
25 changes: 9 additions & 16 deletions java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@
<USE_GDS>OFF</USE_GDS>
<GPU_ARCHS>ALL</GPU_ARCHS>
<CUDF_JNI_ARROW_STATIC>ON</CUDF_JNI_ARROW_STATIC>
<CUDF_JNI_LIBCUDF_STATIC>OFF</CUDF_JNI_LIBCUDF_STATIC>
<native.build.path>${project.build.directory}/cmake-build</native.build.path>
<slf4j.version>1.7.30</slf4j.version>
<arrow.version>0.15.1</arrow.version>
Expand Down Expand Up @@ -390,6 +391,7 @@
<arg value="-DCUDF_CPP_BUILD_DIR=${CUDF_CPP_BUILD_DIR}"/>
<arg value="-DGPU_ARCHS=${GPU_ARCHS}"/>
<arg value="-DCUDF_JNI_ARROW_STATIC=${CUDF_JNI_ARROW_STATIC}"/>
<arg value="-DCUDF_JNI_LIBCUDF_STATIC=${CUDF_JNI_LIBCUDF_STATIC}"/>
</exec>
<exec dir="${native.build.path}"
failonerror="true"
Expand Down Expand Up @@ -434,26 +436,17 @@
fail("Could not find cudf as a dependency of libcudfjni out> $sout err> $serr")
}

def libcudart = ~/libcudart\\.so\\.(.*)\\s+=>.*/
def cm = libcudart.matcher(sout)
def nvccout = new StringBuffer(), nvccerr = new StringBuffer()
def nvccproc = 'nvcc --version'.execute()
nvccproc.consumeProcessOutput(nvccout, nvccerr)
nvccproc.waitForOrKill(10000)
def cudaPattern = ~/Cuda compilation tools, release ([0-9]+)/
def cm = cudaPattern.matcher(nvccout)
if (cm.find()) {
if (pom.properties['CUDA_STATIC_RUNTIME'] == 'ON') {
fail("found libcudart when we expect to be statically linked to it")
}
def classifier = 'cuda' + cm.group(1)
.replaceFirst(/\\./, '-') // First . becomes a -
.replaceAll(/\\..*$/, '') // Drop all of the subversions from cuda
.replaceAll(/-0$/, '') // If it is a X.0 version, like 10.0 drop the .0
pom.properties['cuda.classifier'] = classifier
println 'WARNING FOUND libcudart this means your jar will only work against a single version of the cuda runtime ' + classifier
} else if (pom.properties['CUDA_STATIC_RUNTIME'] == 'OFF') {
fail('could not find libcudart when we expect to be dynamically linked to it')
} else {
pom.properties['cuda.classifier'] = ''
}

if (pom.properties['CUDA_STATIC_RUNTIME'] == 'ON') {
println 'WARNING RUNNING WITH STATIC LINKING DOES NOT FULLY WORK. USE WITH CAUTION.'
fail('could not find CUDA version')
}
</source>
</configuration>
Expand Down
46 changes: 31 additions & 15 deletions java/src/main/native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,15 @@ option(PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF)
option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
option(USE_GDS "Build with GPUDirect Storage (GDS)/cuFile support" OFF)
option(CUDF_JNI_ARROW_STATIC "Statically link Arrow" ON)
option(CUDF_JNI_LIBCUDF_STATIC "Link with libcudf.a" OFF)

message(VERBOSE "CUDF_JNI: Build with NVTX support: ${USE_NVTX}")
message(VERBOSE "CUDF_JNI: Configure CMake to build tests: ${BUILD_TESTS}")
message(VERBOSE "CUDF_JNI: Build with per-thread default stream: ${PER_THREAD_DEFAULT_STREAM}")
message(VERBOSE "CUDF_JNI: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}")
message(VERBOSE "CUDF_JNI: Build with GPUDirect Storage support: ${USE_GDS}")
message(VERBOSE "CUDF_JNI: Build with static Arrow library: ${CUDF_JNI_ARROW_STATIC}")
message(VERBOSE "CUDF_JNI: Link with libcudf statically: ${CUDF_JNI_LIBCUDF_STATIC}")

set(CUDF_SOURCE_DIR "${PROJECT_SOURCE_DIR}/../../../../cpp")
if(DEFINED ENV{CUDF_CPP_BUILD_DIR})
Expand Down Expand Up @@ -85,7 +87,7 @@ endif()
rapids_cmake_build_type("Release")

# ##################################################################################################
# * Thrust/CUB/libcudacxx
# * Thrust/CUB
# ------------------------------------------------------------------------------------
find_path(
THRUST_INCLUDE "thrust"
Expand All @@ -102,19 +104,6 @@ find_path(

message(STATUS "CUB: CUB_INCLUDE set to ${CUB_INCLUDE}")

find_path(LIBCUDACXX_INCLUDE "cuda" HINTS "$ENV{CUDF_ROOT}/_deps/libcudacxx-src/include"
"${CUDF_CPP_BUILD_DIR}/_deps/libcudacxx-src/include"
)

message(STATUS "LIBCUDACXX: LIBCUDACXX_INCLUDE set to ${LIBCUDACXX_INCLUDE}")

find_path(
SPDLOG_INCLUDE "spdlog"
HINTS "${CUDF_CPP_BUILD_DIR}/_deps/spdlog-src/include" "$ENV{RMM_ROOT}/_deps/spdlog-src/include"
"$ENV{RMM_ROOT}/include" "$ENV{CONDA_PREFIX}/include"
)

message(STATUS "SPDLOG: SPDLOG_INCLUDE set to ${SPDLOG_INCLUDE}")
# ##################################################################################################
# * CUDF ------------------------------------------------------------------------------------------

Expand All @@ -139,6 +128,14 @@ find_path(

message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}")

find_path(
SPDLOG_INCLUDE "spdlog"
HINTS "${CUDF_CPP_BUILD_DIR}/_deps/spdlog-src/include" "$ENV{RMM_ROOT}/_deps/spdlog-src/include"
"$ENV{RMM_ROOT}/include" "$ENV{CONDA_PREFIX}/include"
)

message(STATUS "SPDLOG: SPDLOG_INCLUDE set to ${SPDLOG_INCLUDE}")

# ##################################################################################################
# * ARROW -----------------------------------------------------------------------------------------

Expand Down Expand Up @@ -236,6 +233,18 @@ add_library(
src/check_nvcomp_output_sizes.cu
)

if(CUDF_JNI_LIBCUDF_STATIC)
# When linking against libcudf.a, the JNI library will include the old libcudf.so. For
# backwards-compatibility for software that expects to find libcudf.so in the JVM environment
# after cudf has loaded, the JNI code and libcudf.a will be combined into libcudf.so. A stub
# library will be created for libcudfjni.so that will simply require libcudf.so for backwards
# compatibility with software that expects to find libcudfjni.so at runtime.
set_target_properties(cudfjni PROPERTIES OUTPUT_NAME "cudf")
add_library(cudfjnistub SHARED src/emptyfile.cpp)
set_target_properties(cudfjnistub PROPERTIES OUTPUT_NAME "cudfjni")
target_link_libraries(cudfjnistub -Wl,--no-as-needed cudfjni -Wl,--as-needed)
endif()

# ##################################################################################################
# * include paths ---------------------------------------------------------------------------------

Expand Down Expand Up @@ -310,7 +319,14 @@ target_compile_definitions(cudfjni PUBLIC SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM
# ##################################################################################################
# * link libraries --------------------------------------------------------------------------------

target_link_libraries(cudfjni PRIVATE ${NVCOMP_LIBRARY} ${CUDF_LIB} ${ARROW_LIBRARY})
set(CUDF_LINK ${CUDF_LIB})
if(CUDF_JNI_LIBCUDF_STATIC)
set(CUDF_LINK -Wl,--whole-archive ${CUDF_LIB} -Wl,--no-whole-archive)
endif()

target_link_libraries(
cudfjni PRIVATE ${CUDF_LINK} ${NVCOMP_LIBRARY} ${ARROW_LIBRARY} CUDA::cuda_driver
)

# ##################################################################################################
# * cudart options --------------------------------------------------------------------------------
Expand Down
17 changes: 17 additions & 0 deletions java/src/main/native/src/emptyfile.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Intentionally empty

0 comments on commit d7ce106

Please sign in to comment.