diff --git a/build/build-in-docker b/build/build-in-docker index 421cc1a855..49032185ba 100755 --- a/build/build-in-docker +++ b/build/build-in-docker @@ -1,7 +1,7 @@ #!/bin/bash # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,30 +24,27 @@ set -e SCRIPTDIR=$(cd $(dirname $0); pwd) LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} -CUDF_USE_PER_THREAD_DEFAULT_STREAM=${CUDF_USE_PER_THREAD_DEFAULT_STREAM:-ON} USE_GDS=${USE_GDS:-ON} export CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"} +# Make CUDA_VERSION consistent with the file run-in-docker +export CUDA_VERSION=${CUDA_VERSION:-11.8.0} +CUDA_CLASSIFIER=cuda${CUDA_VERSION%%.*} +BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON} if (( $# == 0 )); then echo "Usage: $0 " exit 1 fi -_CUDF_CLEAN_SKIP="" -# if ccache is enabled and libcudf.clean.skip not provided -# by the user remove the cpp build directory -# -if [[ "$CCACHE_DISABLE" != "1" ]]; then - if [[ ! "$*" =~ " -Dlibcudf.clean.skip=" ]]; then - # Don't skip clean if ccache is enabled - # unless the user overrides - _CUDF_CLEAN_SKIP="-Dlibcudf.clean.skip=false" - fi +# Set env for arm64 build, The possible values of 'uname -m' : [x86_64/i386/aarch64/mips/...] +if [ "$(uname -m)" == "aarch64" ]; then + USE_GDS="OFF" # The GDS cuFiles RDMA libraries are not included in the arm64 CUDA toolkit. + BUILD_FAULTINJ="OFF" # libcupti_static.a linked by cufaultinj, does not exist in the arm64 CUDA toolkit. fi $SCRIPTDIR/run-in-docker mvn \ -Dmaven.repo.local=$LOCAL_MAVEN_REPO \ - -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=$CUDF_USE_PER_THREAD_DEFAULT_STREAM \ -DUSE_GDS=$USE_GDS \ - $_CUDF_CLEAN_SKIP \ + -DBUILD_FAULTINJ=${BUILD_FAULTINJ} \ + -Dcuda.version=$CUDA_CLASSIFIER \ "$@" diff --git a/build/run-in-docker b/build/run-in-docker index 62d40aac48..81152a1d9d 100755 --- a/build/run-in-docker +++ b/build/run-in-docker @@ -1,7 +1,7 @@ #!/bin/bash # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,11 +27,16 @@ REPODIR=$SCRIPTDIR/.. CUDA_VERSION=${CUDA_VERSION:-11.8.0} DOCKER_CMD=${DOCKER_CMD:-docker} DOCKER_BUILD_EXTRA_ARGS=${DOCKER_BUILD_EXTRA_ARGS:-""} +if [ "$(uname -m)" == "aarch64" ]; then + DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/arm64 --build-arg CMAKE_ARCH=aarch64 $DOCKER_BUILD_EXTRA_ARGS" +else + DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/amd64 --build-arg CMAKE_ARCH=x86_64 $DOCKER_BUILD_EXTRA_ARGS" +fi DOCKER_RUN_EXTRA_ARGS=${DOCKER_RUN_EXTRA_ARGS:-""} LOCAL_CCACHE_DIR=${LOCAL_CCACHE_DIR:-"$HOME/.ccache"} LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} -SPARK_IMAGE_NAME="spark-rapids-jni-build:${CUDA_VERSION}-devel-centos7" +SPARK_IMAGE_NAME="spark-rapids-jni-build:${CUDA_VERSION}-devel-rockylinux8" # ensure directories exist mkdir -p "$LOCAL_CCACHE_DIR" "$LOCAL_MAVEN_REPO" @@ -74,4 +79,4 @@ $DOCKER_CMD run $DOCKER_GPU_OPTS $DOCKER_RUN_EXTRA_ARGS -u $(id -u):$(id -g) --r -e VERBOSE \ $DOCKER_OPTS \ $SPARK_IMAGE_NAME \ - scl enable devtoolset-11 "$RUN_CMD" + scl enable gcc-toolset-11 "$RUN_CMD" diff --git a/ci/Dockerfile b/ci/Dockerfile old mode 100755 new mode 100644 index e3b703a11e..f36ede2233 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -17,31 +17,29 @@ ### # Build the image for spark-rapids-jni development environment. # -# Arguments: CUDA_VERSION=11.8.0 +# Arguments: CUDA_VERSION=[11.X.Y, 12.X.Y], OS_RELEASE=[8, 9], TARGETPLATFORM=[linux/amd64, linux/arm64] # ### ARG CUDA_VERSION=11.8.0 -FROM nvidia/cuda:$CUDA_VERSION-devel-centos7 -ARG DEVTOOLSET_VERSION=11 +ARG OS_RELEASE=8 +ARG TARGETPLATFORM=linux/amd64 +# multi-platform build with: docker buildx build --platform linux/arm64,linux/amd64 on either amd64 or arm64 host +# check available official arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH) +FROM --platform=$TARGETPLATFORM nvidia/cuda:$CUDA_VERSION-devel-rockylinux$OS_RELEASE +ARG TOOLSET_VERSION=11 ### Install basic requirements -RUN yum install -y centos-release-scl -RUN yum install -y devtoolset-${DEVTOOLSET_VERSION} rh-python38 epel-release -RUN yum install -y zlib-devel maven tar wget patch ninja-build -# require git 2.18+ to keep consistent submodule operations -RUN yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm && yum install -y git -# pin urllib3<2.0 for https://github.com/psf/requests/issues/6432 -RUN scl enable rh-python38 "pip install requests 'urllib3<2.0'" - +RUN dnf --enablerepo=powertools install -y scl-utils gcc-toolset-${TOOLSET_VERSION} python39 zlib-devel maven tar wget patch ninja-build git ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins -RUN mkdir /usr/local/rapids && mkdir /rapids && chmod 777 /usr/local/rapids && chmod 777 /rapids +RUN mkdir -m 777 /usr/local/rapids /rapids # 3.22.3: CUDA architecture 'native' support + flexible CMAKE__*_LAUNCHER for ccache ARG CMAKE_VERSION=3.26.4 - -RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \ - tar zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \ - rm cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz -ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-x86_64/bin:$PATH +# default x86_64 from x86 build, aarch64 cmake for arm build +ARG CMAKE_ARCH=x86_64 +RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \ + tar zxf cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \ + rm cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz +ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}/bin:$PATH # ccache for interactive builds ARG CCACHE_VERSION=4.6 @@ -51,7 +49,7 @@ RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v cd ccache-${CCACHE_VERSION} && \ mkdir build && \ cd build && \ - scl enable devtoolset-${DEVTOOLSET_VERSION} \ + scl enable gcc-toolset-${TOOLSET_VERSION} \ "cmake .. \ -DCMAKE_BUILD_TYPE=Release \ -DZSTD_FROM_INTERNET=ON \ diff --git a/ci/Dockerfile.multi b/ci/Dockerfile.multi deleted file mode 100644 index d3b198530b..0000000000 --- a/ci/Dockerfile.multi +++ /dev/null @@ -1,76 +0,0 @@ -# -# Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -### -# JNI CI image for multi-platform build -# -# Arguments: CUDA_VERSION=11.8.0 -# -### -ARG CUDA_VERSION=11.8.0 -ARG OS_RELEASE=8 -# multi-platform build with: docker buildx build --platform linux/arm64,linux/amd64 on either amd64 or arm64 host -# check available offcial arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH) -FROM --platform=$TARGETPLATFORM nvidia/cuda:$CUDA_VERSION-devel-rockylinux$OS_RELEASE -ARG TOOLSET_VERSION=11 -### Install basic requirements -RUN dnf install -y scl-utils -RUN dnf install -y gcc-toolset-${TOOLSET_VERSION} python39 -RUN dnf --enablerepo=powertools install -y zlib-devel maven tar wget patch ninja-build -# require git 2.18+ to keep consistent submodule operations -RUN dnf install -y git -## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins -RUN mkdir /usr/local/rapids && mkdir /rapids && chmod 777 /usr/local/rapids && chmod 777 /rapids - -# 3.22.3+: CUDA architecture 'native' support + flexible CMAKE__*_LAUNCHER for ccache -ARG CMAKE_VERSION=3.26.4 -# default as arm64 release -ARG CMAKE_ARCH=aarch64 -# aarch64 cmake for arm build -RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \ - tar zxf cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \ - rm cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz -ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}/bin:$PATH - -# ccache for interactive builds -ARG CCACHE_VERSION=4.6 -RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}.tar.gz && \ - tar zxf ccache-${CCACHE_VERSION}.tar.gz && \ - rm ccache-${CCACHE_VERSION}.tar.gz && \ - cd ccache-${CCACHE_VERSION} && \ - mkdir build && \ - cd build && \ - scl enable gcc-toolset-${TOOLSET_VERSION} \ - "cmake .. \ - -DCMAKE_BUILD_TYPE=Release \ - -DZSTD_FROM_INTERNET=ON \ - -DREDIS_STORAGE_BACKEND=OFF && \ - cmake --build . --parallel 4 --target install" && \ - cd ../.. && \ - rm -rf ccache-${CCACHE_VERSION} - -## install a version of boost that is needed for arrow/parquet to work -RUN cd /usr/local && wget --quiet https://archives.boost.io/release/1.79.0/source/boost_1_79_0.tar.gz && \ - tar -xzf boost_1_79_0.tar.gz && \ - rm boost_1_79_0.tar.gz && \ - cd boost_1_79_0 && \ - ./bootstrap.sh --prefix=/usr/local && \ - ./b2 install --prefix=/usr/local --with-filesystem --with-system && \ - cd /usr/local && \ - rm -rf boost_1_79_0 - -# disable cuda container constraints to allow running w/ elder drivers on data-center GPUs -ENV NVIDIA_DISABLE_REQUIRE="true" diff --git a/ci/Jenkinsfile.premerge b/ci/Jenkinsfile.premerge index a59db1af9a..0a00eb6f1b 100644 --- a/ci/Jenkinsfile.premerge +++ b/ci/Jenkinsfile.premerge @@ -30,7 +30,7 @@ import ipp.blossom.* def githubHelper // blossom github helper def TEMP_IMAGE_BUILD = true -def IMAGE_PREMERGE = "${common.ARTIFACTORY_NAME}/sw-spark-docker/plugin-jni:centos7-cuda11.8.0-blossom" +def IMAGE_PREMERGE = "${common.ARTIFACTORY_NAME}/sw-spark-docker/plugin-jni:rockylinux8-cuda11.8.0-blossom" def cpuImage = pod.getCPUYAML(IMAGE_PREMERGE) def PREMERGE_DOCKERFILE = 'ci/Dockerfile' def PREMERGE_TAG @@ -150,7 +150,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true""" } if (TEMP_IMAGE_BUILD) { - PREMERGE_TAG = "centos7-cuda11.8.0-blossom-dev-${BUILD_TAG}" + PREMERGE_TAG = "rockylinux8-cuda11.8.0-blossom-dev-${BUILD_TAG}" IMAGE_PREMERGE = "${ARTIFACTORY_NAME}/sw-spark-docker-local/plugin-jni:${PREMERGE_TAG}" docker.build(IMAGE_PREMERGE, "--network=host -f ${PREMERGE_DOCKERFILE} -t $IMAGE_PREMERGE .") uploadDocker(IMAGE_PREMERGE) @@ -212,7 +212,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true""" container('gpu') { timeout(time: 3, unit: 'HOURS') { // step only timeout for test run common.resolveIncompatibleDriverIssue(this) - sh 'scl enable devtoolset-11 "ci/premerge-build.sh"' + sh 'scl enable gcc-toolset-11 "ci/premerge-build.sh"' sh 'bash ci/fuzz-test.sh' } } diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh index 18119dc45d..f591f73a23 100755 --- a/ci/submodule-sync.sh +++ b/ci/submodule-sync.sh @@ -18,7 +18,7 @@ # NOTE: # this script is for jenkins only, and should not be used for local development # run with ci/Dockerfile in jenkins: -# scl enable devtoolset-11 rh-python38 "ci/submodule-sync.sh" +# scl enable gcc-toolset-11 rh-python38 "ci/submodule-sync.sh" set -ex diff --git a/pom.xml b/pom.xml index 745f8127d1..24daa4635e 100644 --- a/pom.xml +++ b/pom.xml @@ -338,6 +338,11 @@ arm64 + + + aarch64 + + ${cuda.version}-arm64