Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automate Java cudf jar build with statically linked dependencies #10578

81 changes: 73 additions & 8 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
libcudf - build the cudf C++ code only
cudf - build the cudf Python package
cudfjar - build cudf JAR with static libcudf using devtoolset toolchain
dask_cudf - build the dask_cudf Python package
benchmarks - build benchmarks
tests - build tests
Expand Down Expand Up @@ -50,7 +51,9 @@ CUDF_KAFKA_BUILD_DIR=${REPODIR}/python/cudf_kafka/build
CUDF_BUILD_DIR=${REPODIR}/python/cudf/build
DASK_CUDF_BUILD_DIR=${REPODIR}/python/dask_cudf/build
CUSTREAMZ_BUILD_DIR=${REPODIR}/python/custreamz/build
BUILD_DIRS="${LIB_BUILD_DIR} ${CUDF_BUILD_DIR} ${DASK_CUDF_BUILD_DIR} ${KAFKA_LIB_BUILD_DIR} ${CUDF_KAFKA_BUILD_DIR} ${CUSTREAMZ_BUILD_DIR}"
CUDF_JAR_JAVA_BUILD_DIR="$REPODIR/java/target"

BUILD_DIRS="${LIB_BUILD_DIR} ${CUDF_BUILD_DIR} ${DASK_CUDF_BUILD_DIR} ${KAFKA_LIB_BUILD_DIR} ${CUDF_KAFKA_BUILD_DIR} ${CUSTREAMZ_BUILD_DIR} ${CUDF_JAR_JAVA_BUILD_DIR}"

# Set defaults for vars modified by flags to this script
VERBOSE_FLAG=""
Expand Down Expand Up @@ -101,6 +104,58 @@ function buildAll {
((${NUMARGS} == 0 )) || !(echo " ${ARGS} " | grep -q " [^-]\+ ")
}

function buildLibCudfJniInDocker {
local cudaVersion="11.5.0"
local imageName="cudf-build:${cudaVersion}-devel-centos7"
local CMAKE_GENERATOR="${CMAKE_GENERATOR:-Ninja}"
local workspaceDir="/rapids"
local localMavenRepo=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"}
local workspaceRepoDir="$workspaceDir/cudf"
local workspaceMavenRepoDir="$workspaceDir/.m2/repository"
mkdir -p "$CUDF_JAR_JAVA_BUILD_DIR/libcudf-cmake-build"
nvidia-docker build \
-f java/ci/Dockerfile.centos7 \
--build-arg CUDA_VERSION=${cudaVersion} \
-t $imageName .
nvidia-docker run -it -u $(id -u):$(id -g) --rm \
-v "/etc/group:/etc/group:ro" \
-v "/etc/passwd:/etc/passwd:ro" \
-v "/etc/shadow:/etc/shadow:ro" \
-v "/etc/sudoers.d:/etc/sudoers.d:ro" \
-v "$REPODIR:$workspaceRepoDir:rw" \
-v "$localMavenRepo:$workspaceMavenRepoDir:rw" \
--workdir "$workspaceRepoDir/java/target/libcudf-cmake-build" \
${imageName} \
scl enable devtoolset-9 \
"cmake $workspaceRepoDir/cpp \
-G${CMAKE_GENERATOR} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCUDA_STATIC_RUNTIME=ON \
-DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
-DCMAKE_INSTALL_PREFIX==/usr/local/rapids \
-DUSE_NVTX=ON -DCUDF_USE_ARROW_STATIC=ON \
-DCUDF_ENABLE_ARROW_S3=OFF \
-DBUILD_TESTS=OFF \
-DPER_THREAD_DEFAULT_STREAM=ON \
-DRMM_LOGGING_LEVEL=OFF \
-DBUILD_SHARED_LIBS=OFF && \
cmake --build . --parallel ${PARALLEL_LEVEL} && \
cd $workspaceRepoDir/java && \
mvn ${MVN_PHASES:-"package"} \
-Dmaven.repo.local=$workspaceMavenRepoDir \
-DskipTests=${SKIP_TESTS:-false} \
-Dparallel.level=${PARALLEL_LEVEL} \
-DCUDF_CPP_BUILD_DIR=$workspaceRepoDir/java/target/libcudf-cmake-build \
-DCUDA_STATIC_RUNTIME=ON \
-DPER_THREAD_DEFAULT_STREAM=ON \
-DRMM_LOGGING_LEVEL=OFF \
-DUSE_GDS=ON \
-DGPU_ARCHS=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
-DCUDF_JNI_ARROW_STATIC=ON \
-DCUDF_JNI_LIBCUDF_STATIC=ON \
-Dtest=*,!CuFileTest"
}

if hasArg -h || hasArg --h || hasArg --help; then
echo "${HELP}"
exit 0
Expand Down Expand Up @@ -178,15 +233,21 @@ fi
################################################################################
# Configure, build, and install libcudf

if buildAll || hasArg libcudf; then
if buildAll || hasArg libcudf || hasArg cudfjar; then
if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=NATIVE"
echo "Building for the architecture of the GPU in the system..."
CUDF_CMAKE_CUDA_ARCHITECTURES="${CUDF_CMAKE_CUDA_ARCHITECTURES:-NATIVE}"
if [[ "$CUDF_CMAKE_CUDA_ARCHITECTURES" == "NATIVE" ]]; then
echo "Building for the architecture of the GPU in the system..."
else
echo "Building for the GPU architecture(s) $CUDF_CMAKE_CUDA_ARCHITECTURES ..."
fi
else
CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=ALL"
CUDF_CMAKE_CUDA_ARCHITECTURES="ALL"
echo "Building for *ALL* supported GPU architectures..."
fi
fi

if buildAll || hasArg libcudf; then
# get the current count before the compile starts
if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then
# zero the sccache statistics
Expand All @@ -195,7 +256,7 @@ if buildAll || hasArg libcudf; then

cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
${CUDF_CMAKE_CUDA_ARCHITECTURES} \
-DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
-DUSE_NVTX=${BUILD_NVTX} \
-DBUILD_TESTS=${BUILD_TESTS} \
-DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \
Expand Down Expand Up @@ -262,6 +323,10 @@ if buildAll || hasArg dask_cudf; then
fi
fi

if hasArg cudfjar; then
buildLibCudfJniInDocker
fi

# Build libcudf_kafka library
if hasArg libcudf_kafka; then
cmake -S $REPODIR/cpp/libcudf_kafka -B ${KAFKA_LIB_BUILD_DIR} \
Expand Down
12 changes: 2 additions & 10 deletions java/ci/Dockerfile.centos7
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -33,16 +33,8 @@ RUN yum install -y git zlib-devel maven tar wget patch ninja-build
## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins
gerashegalov marked this conversation as resolved.
Show resolved Hide resolved
RUN mkdir /usr/local/rapids && mkdir /rapids && chmod 777 /usr/local/rapids && chmod 777 /rapids

ARG CMAKE_VERSION=3.20.5
ARG CMAKE_VERSION=3.22.3
RUN cd /usr/local/ && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \
tar zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \
rm cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz
ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-x86_64/bin:$PATH

# get GDS user-space lib
gerashegalov marked this conversation as resolved.
Show resolved Hide resolved
ARG GDS_VERSION=1.0.0
RUN cd /tmp/ && wget https://developer.download.nvidia.com/gds/redist/rel-${GDS_VERSION}/gds-redistrib-${GDS_VERSION}.tgz && \
tar zxf gds-redistrib-${GDS_VERSION}.tgz && \
cp -R ./gds-redistrib-${GDS_VERSION}/targets/x86_64-linux/lib/* /usr/local/cuda/targets/x86_64-linux/lib && \
cp -R ./gds-redistrib-${GDS_VERSION}/targets/x86_64-linux/include/* /usr/local/cuda/targets/x86_64-linux/include && \
rm -rf gds-redistrib-*
9 changes: 7 additions & 2 deletions java/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@
<native.build.path>${project.build.directory}/cmake-build</native.build.path>
<slf4j.version>1.7.30</slf4j.version>
<arrow.version>0.15.1</arrow.version>
<parallel.level>4</parallel.level>
<CUDF_CPP_BUILD_DIR/>
</properties>

<profiles>
Expand Down Expand Up @@ -395,8 +397,11 @@
</exec>
<exec dir="${native.build.path}"
failonerror="true"
executable="make">
<arg value="-j"/>
executable="cmake">
<arg value="--build"/>
<arg value="."/>
<arg value="--parallel"/>
<arg value="${parallel.level}"/>
</exec>
<mkdir dir="${project.build.directory}/extra-resources"/>
<exec executable="bash" output="${project.build.directory}/extra-resources/cudf-java-version-info.properties">
Expand Down
10 changes: 6 additions & 4 deletions java/src/main/native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@ message(VERBOSE "CUDF_JNI: Build with static Arrow library: ${CUDF_JNI_ARROW_STA
message(VERBOSE "CUDF_JNI: Link with libcudf statically: ${CUDF_JNI_LIBCUDF_STATIC}")

set(CUDF_SOURCE_DIR "${PROJECT_SOURCE_DIR}/../../../../cpp")
if(DEFINED ENV{CUDF_CPP_BUILD_DIR})
set(CUDF_CPP_BUILD_DIR "$ENV{CUDF_CPP_BUILD_DIR}")
else()
set(CUDF_CPP_BUILD_DIR "${CUDF_SOURCE_DIR}/build")
if(NOT DEFINED CUDF_CPP_BUILD_DIR OR CUDF_CPP_BUILD_DIR STREQUAL "")
if(DEFINED ENV{CUDF_CPP_BUILD_DIR})
set(CUDF_CPP_BUILD_DIR "$ENV{CUDF_CPP_BUILD_DIR}")
else()
set(CUDF_CPP_BUILD_DIR "${CUDF_SOURCE_DIR}/build")
endif()
endif()

set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/"
Expand Down