From 19527d17a1a3fa6b6abde0c5dbf84442311806c3 Mon Sep 17 00:00:00 2001 From: Tim Liu Date: Fri, 3 May 2024 00:00:38 +0800 Subject: [PATCH 1/9] Drop Centos7 support To fix: https://github.com/NVIDIA/spark-rapids-jni/issues/1991 Drop Centos7 support, switch to build in a Rocky 8 Docker image Update the script to support both amd64 and arm64 CPUs Signed-off-by: Tim Liu --- build/build-in-docker | 44 +++++++++++++++--------- build/run-in-docker | 8 ++--- ci/Dockerfile | 74 ----------------------------------------- ci/Jenkinsfile.premerge | 8 ++--- ci/submodule-sync.sh | 4 +-- 5 files changed, 39 insertions(+), 99 deletions(-) delete mode 100755 ci/Dockerfile diff --git a/build/build-in-docker b/build/build-in-docker index 421cc1a855..e785ff76dc 100755 --- a/build/build-in-docker +++ b/build/build-in-docker @@ -1,7 +1,7 @@ #!/bin/bash # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,30 +24,44 @@ set -e SCRIPTDIR=$(cd $(dirname $0); pwd) LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} -CUDF_USE_PER_THREAD_DEFAULT_STREAM=${CUDF_USE_PER_THREAD_DEFAULT_STREAM:-ON} USE_GDS=${USE_GDS:-ON} export CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"} +export DOCKER_BUILD_EXTRA_ARGS="--platform=linux/amd64 --build-arg CMAKE_ARCH=x86_64" +profiles="source-javadoc" +CUDA_VER=${CUDA_VER:-cuda11} +USE_SANITIZER=${USE_SANITIZER:-ON} +BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON} if (( $# == 0 )); then echo "Usage: $0 " exit 1 fi -_CUDF_CLEAN_SKIP="" -# if ccache is enabled and libcudf.clean.skip not provided -# by the user remove the cpp build directory -# -if [[ "$CCACHE_DISABLE" != "1" ]]; then - if [[ ! "$*" =~ " -Dlibcudf.clean.skip=" ]]; then - # Don't skip clean if ccache is enabled - # unless the user overrides - _CUDF_CLEAN_SKIP="-Dlibcudf.clean.skip=false" - fi +case $(uname -m) in + x86_64|amd64) + arch=amd64;; + aarch64|arm64) + arch=arm64;; + *) + echo "Unsupported CPU architecture"; exit 1;; +esac + +# Set env for arm64 build +if [ "$arch" == "arm64" ]; then + profiles="${profiles},arm64" + USE_GDS="OFF" + USE_SANITIZER="ON" + BUILD_FAULTINJ="OFF" + export DOCKER_BUILD_EXTRA_ARGS="--platform=linux/arm64 --build-arg CMAKE_ARCH=aarch64" fi +git submodule update --init --recursive + $SCRIPTDIR/run-in-docker mvn \ + -P${profiles} \ + -Dlibcudf.build.configure=true \ -Dmaven.repo.local=$LOCAL_MAVEN_REPO \ - -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=$CUDF_USE_PER_THREAD_DEFAULT_STREAM \ - -DUSE_GDS=$USE_GDS \ - $_CUDF_CLEAN_SKIP \ + -DUSE_GDS=$USE_GDS -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \ + -DBUILD_TESTS=ON -DBUILD_FAULTINJ=${BUILD_FAULTINJ} -Dcuda.version=$CUDA_VER \ + -DUSE_SANITIZER=${USE_SANITIZER} \ "$@" diff --git a/build/run-in-docker b/build/run-in-docker index 62d40aac48..97c1487473 100755 --- a/build/run-in-docker +++ b/build/run-in-docker @@ -1,7 +1,7 @@ #!/bin/bash # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,12 +31,12 @@ DOCKER_RUN_EXTRA_ARGS=${DOCKER_RUN_EXTRA_ARGS:-""} LOCAL_CCACHE_DIR=${LOCAL_CCACHE_DIR:-"$HOME/.ccache"} LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} -SPARK_IMAGE_NAME="spark-rapids-jni-build:${CUDA_VERSION}-devel-centos7" +SPARK_IMAGE_NAME="spark-rapids-jni-build:${CUDA_VERSION}-devel-rockylinux8" # ensure directories exist mkdir -p "$LOCAL_CCACHE_DIR" "$LOCAL_MAVEN_REPO" -$DOCKER_CMD build $DOCKER_BUILD_EXTRA_ARGS -f $REPODIR/ci/Dockerfile \ +$DOCKER_CMD build $DOCKER_BUILD_EXTRA_ARGS -f $REPODIR/ci/Dockerfile.multi \ --build-arg CUDA_VERSION=$CUDA_VERSION \ -t $SPARK_IMAGE_NAME \ $REPODIR/build @@ -74,4 +74,4 @@ $DOCKER_CMD run $DOCKER_GPU_OPTS $DOCKER_RUN_EXTRA_ARGS -u $(id -u):$(id -g) --r -e VERBOSE \ $DOCKER_OPTS \ $SPARK_IMAGE_NAME \ - scl enable devtoolset-11 "$RUN_CMD" + scl enable gcc-toolset-11 "$RUN_CMD" diff --git a/ci/Dockerfile b/ci/Dockerfile deleted file mode 100755 index e3b703a11e..0000000000 --- a/ci/Dockerfile +++ /dev/null @@ -1,74 +0,0 @@ -# -# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -### -# Build the image for spark-rapids-jni development environment. -# -# Arguments: CUDA_VERSION=11.8.0 -# -### -ARG CUDA_VERSION=11.8.0 -FROM nvidia/cuda:$CUDA_VERSION-devel-centos7 -ARG DEVTOOLSET_VERSION=11 -### Install basic requirements -RUN yum install -y centos-release-scl -RUN yum install -y devtoolset-${DEVTOOLSET_VERSION} rh-python38 epel-release -RUN yum install -y zlib-devel maven tar wget patch ninja-build -# require git 2.18+ to keep consistent submodule operations -RUN yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm && yum install -y git -# pin urllib3<2.0 for https://github.com/psf/requests/issues/6432 -RUN scl enable rh-python38 "pip install requests 'urllib3<2.0'" - -## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins -RUN mkdir /usr/local/rapids && mkdir /rapids && chmod 777 /usr/local/rapids && chmod 777 /rapids - -# 3.22.3: CUDA architecture 'native' support + flexible CMAKE__*_LAUNCHER for ccache -ARG CMAKE_VERSION=3.26.4 - -RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \ - tar zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \ - rm cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz -ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-x86_64/bin:$PATH - -# ccache for interactive builds -ARG CCACHE_VERSION=4.6 -RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}.tar.gz && \ - tar zxf ccache-${CCACHE_VERSION}.tar.gz && \ - rm ccache-${CCACHE_VERSION}.tar.gz && \ - cd ccache-${CCACHE_VERSION} && \ - mkdir build && \ - cd build && \ - scl enable devtoolset-${DEVTOOLSET_VERSION} \ - "cmake .. \ - -DCMAKE_BUILD_TYPE=Release \ - -DZSTD_FROM_INTERNET=ON \ - -DREDIS_STORAGE_BACKEND=OFF && \ - cmake --build . --parallel ${PARALLEL_LEVEL} --target install" && \ - cd ../.. && \ - rm -rf ccache-${CCACHE_VERSION} - -## install a version of boost that is needed for arrow/parquet to work -RUN cd /usr/local && wget --quiet https://archives.boost.io/release/1.79.0/source/boost_1_79_0.tar.gz && \ - tar -xzf boost_1_79_0.tar.gz && \ - rm boost_1_79_0.tar.gz && \ - cd boost_1_79_0 && \ - ./bootstrap.sh --prefix=/usr/local && \ - ./b2 install --prefix=/usr/local --with-filesystem --with-system && \ - cd /usr/local && \ - rm -rf boost_1_79_0 - -# disable cuda container constraints to allow running w/ elder drivers on data-center GPUs -ENV NVIDIA_DISABLE_REQUIRE="true" diff --git a/ci/Jenkinsfile.premerge b/ci/Jenkinsfile.premerge index a59db1af9a..465635ab8c 100644 --- a/ci/Jenkinsfile.premerge +++ b/ci/Jenkinsfile.premerge @@ -30,9 +30,9 @@ import ipp.blossom.* def githubHelper // blossom github helper def TEMP_IMAGE_BUILD = true -def IMAGE_PREMERGE = "${common.ARTIFACTORY_NAME}/sw-spark-docker/plugin-jni:centos7-cuda11.8.0-blossom" +def IMAGE_PREMERGE = "${common.ARTIFACTORY_NAME}/sw-spark-docker/plugin-jni:rockylinux8-cuda11.8.0-blossom" def cpuImage = pod.getCPUYAML(IMAGE_PREMERGE) -def PREMERGE_DOCKERFILE = 'ci/Dockerfile' +def PREMERGE_DOCKERFILE = 'ci/Dockerfile.multi' def PREMERGE_TAG def skipped = false def major_ver // major version extracted from project version @@ -150,7 +150,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true""" } if (TEMP_IMAGE_BUILD) { - PREMERGE_TAG = "centos7-cuda11.8.0-blossom-dev-${BUILD_TAG}" + PREMERGE_TAG = "rockylinux8-cuda11.8.0-blossom-dev-${BUILD_TAG}" IMAGE_PREMERGE = "${ARTIFACTORY_NAME}/sw-spark-docker-local/plugin-jni:${PREMERGE_TAG}" docker.build(IMAGE_PREMERGE, "--network=host -f ${PREMERGE_DOCKERFILE} -t $IMAGE_PREMERGE .") uploadDocker(IMAGE_PREMERGE) @@ -212,7 +212,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true""" container('gpu') { timeout(time: 3, unit: 'HOURS') { // step only timeout for test run common.resolveIncompatibleDriverIssue(this) - sh 'scl enable devtoolset-11 "ci/premerge-build.sh"' + sh 'scl enable gcc-toolset-11 "ci/premerge-build.sh"' sh 'bash ci/fuzz-test.sh' } } diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh index 18119dc45d..2013f7b3af 100755 --- a/ci/submodule-sync.sh +++ b/ci/submodule-sync.sh @@ -17,8 +17,8 @@ # NOTE: # this script is for jenkins only, and should not be used for local development -# run with ci/Dockerfile in jenkins: -# scl enable devtoolset-11 rh-python38 "ci/submodule-sync.sh" +# run with ci/Dockerfile.multi in jenkins: +# scl enable gcc-toolset-11 rh-python38 "ci/submodule-sync.sh" set -ex From 71f33dbac4b9a959feecb6444546856d50212ed5 Mon Sep 17 00:00:00 2001 From: Tim Liu Date: Sat, 4 May 2024 23:14:15 +0800 Subject: [PATCH 2/9] Update for code review Signed-off-by: Tim Liu --- build/build-in-docker | 7 +------ build/run-in-docker | 2 +- ci/{Dockerfile.multi => Dockerfile} | 25 ++++++++++--------------- 3 files changed, 12 insertions(+), 22 deletions(-) rename ci/{Dockerfile.multi => Dockerfile} (74%) diff --git a/build/build-in-docker b/build/build-in-docker index e785ff76dc..bb465f942c 100755 --- a/build/build-in-docker +++ b/build/build-in-docker @@ -27,7 +27,6 @@ LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} USE_GDS=${USE_GDS:-ON} export CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"} export DOCKER_BUILD_EXTRA_ARGS="--platform=linux/amd64 --build-arg CMAKE_ARCH=x86_64" -profiles="source-javadoc" CUDA_VER=${CUDA_VER:-cuda11} USE_SANITIZER=${USE_SANITIZER:-ON} BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON} @@ -55,13 +54,9 @@ if [ "$arch" == "arm64" ]; then export DOCKER_BUILD_EXTRA_ARGS="--platform=linux/arm64 --build-arg CMAKE_ARCH=aarch64" fi -git submodule update --init --recursive - $SCRIPTDIR/run-in-docker mvn \ - -P${profiles} \ - -Dlibcudf.build.configure=true \ -Dmaven.repo.local=$LOCAL_MAVEN_REPO \ - -DUSE_GDS=$USE_GDS -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \ + -DUSE_GDS=$USE_GDS \ -DBUILD_TESTS=ON -DBUILD_FAULTINJ=${BUILD_FAULTINJ} -Dcuda.version=$CUDA_VER \ -DUSE_SANITIZER=${USE_SANITIZER} \ "$@" diff --git a/build/run-in-docker b/build/run-in-docker index 97c1487473..56f10cb862 100755 --- a/build/run-in-docker +++ b/build/run-in-docker @@ -36,7 +36,7 @@ SPARK_IMAGE_NAME="spark-rapids-jni-build:${CUDA_VERSION}-devel-rockylinux8" # ensure directories exist mkdir -p "$LOCAL_CCACHE_DIR" "$LOCAL_MAVEN_REPO" -$DOCKER_CMD build $DOCKER_BUILD_EXTRA_ARGS -f $REPODIR/ci/Dockerfile.multi \ +$DOCKER_CMD build $DOCKER_BUILD_EXTRA_ARGS -f $REPODIR/ci/Dockerfile \ --build-arg CUDA_VERSION=$CUDA_VERSION \ -t $SPARK_IMAGE_NAME \ $REPODIR/build diff --git a/ci/Dockerfile.multi b/ci/Dockerfile similarity index 74% rename from ci/Dockerfile.multi rename to ci/Dockerfile index d3b198530b..2f41d1ecba 100644 --- a/ci/Dockerfile.multi +++ b/ci/Dockerfile @@ -1,5 +1,5 @@ # -# Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,31 +15,26 @@ # ### -# JNI CI image for multi-platform build +# Build the image for spark-rapids-jni development environment. # -# Arguments: CUDA_VERSION=11.8.0 +# Arguments: CUDA_VERSION=[11.X.Y, 12.X.Y], OS_RELEASE=[8, 9], TARGETPLATFORM=[linux/amd64, linux/amd64] # ### ARG CUDA_VERSION=11.8.0 ARG OS_RELEASE=8 # multi-platform build with: docker buildx build --platform linux/arm64,linux/amd64 on either amd64 or arm64 host -# check available offcial arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH) +# check available official arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH) FROM --platform=$TARGETPLATFORM nvidia/cuda:$CUDA_VERSION-devel-rockylinux$OS_RELEASE ARG TOOLSET_VERSION=11 ### Install basic requirements -RUN dnf install -y scl-utils -RUN dnf install -y gcc-toolset-${TOOLSET_VERSION} python39 -RUN dnf --enablerepo=powertools install -y zlib-devel maven tar wget patch ninja-build -# require git 2.18+ to keep consistent submodule operations -RUN dnf install -y git +RUN dnf --enablerepo=powertools install -y scl-utils gcc-toolset-${TOOLSET_VERSION} python39 zlib-devel maven tar wget patch ninja-build git ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins -RUN mkdir /usr/local/rapids && mkdir /rapids && chmod 777 /usr/local/rapids && chmod 777 /rapids +RUN mkdir /usr/local/rapids /rapids && chmod 777 /usr/local/rapids /rapids -# 3.22.3+: CUDA architecture 'native' support + flexible CMAKE__*_LAUNCHER for ccache +# 3.22.3: CUDA architecture 'native' support + flexible CMAKE__*_LAUNCHER for ccache ARG CMAKE_VERSION=3.26.4 -# default as arm64 release -ARG CMAKE_ARCH=aarch64 -# aarch64 cmake for arm build +# default x86_64 from x86 build, aarch64 cmake for arm build +ARG CMAKE_ARCH=x86_64 RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \ tar zxf cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz && \ rm cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz @@ -58,7 +53,7 @@ RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v -DCMAKE_BUILD_TYPE=Release \ -DZSTD_FROM_INTERNET=ON \ -DREDIS_STORAGE_BACKEND=OFF && \ - cmake --build . --parallel 4 --target install" && \ + cmake --build . --parallel ${PARALLEL_LEVEL} --target install" && \ cd ../.. && \ rm -rf ccache-${CCACHE_VERSION} From af8c23c1a8c676b98d1f8a2a6697996f2a9c2ace Mon Sep 17 00:00:00 2001 From: Tim Liu Date: Mon, 6 May 2024 11:20:12 +0800 Subject: [PATCH 3/9] Dockerfile.multi to Dockerfile Signed-off-by: Tim Liu --- ci/Jenkinsfile.premerge | 2 +- ci/submodule-sync.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/Jenkinsfile.premerge b/ci/Jenkinsfile.premerge index 465635ab8c..0a00eb6f1b 100644 --- a/ci/Jenkinsfile.premerge +++ b/ci/Jenkinsfile.premerge @@ -32,7 +32,7 @@ def githubHelper // blossom github helper def TEMP_IMAGE_BUILD = true def IMAGE_PREMERGE = "${common.ARTIFACTORY_NAME}/sw-spark-docker/plugin-jni:rockylinux8-cuda11.8.0-blossom" def cpuImage = pod.getCPUYAML(IMAGE_PREMERGE) -def PREMERGE_DOCKERFILE = 'ci/Dockerfile.multi' +def PREMERGE_DOCKERFILE = 'ci/Dockerfile' def PREMERGE_TAG def skipped = false def major_ver // major version extracted from project version diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh index 2013f7b3af..f591f73a23 100755 --- a/ci/submodule-sync.sh +++ b/ci/submodule-sync.sh @@ -17,7 +17,7 @@ # NOTE: # this script is for jenkins only, and should not be used for local development -# run with ci/Dockerfile.multi in jenkins: +# run with ci/Dockerfile in jenkins: # scl enable gcc-toolset-11 rh-python38 "ci/submodule-sync.sh" set -ex From 329db1b9b3a73f5d2009e2976bca96d6a41728e6 Mon Sep 17 00:00:00 2001 From: Tim Liu Date: Mon, 6 May 2024 19:40:13 +0800 Subject: [PATCH 4/9] Change '--platform' param to be compatitable with lower Docker versions Signed-off-by: Tim Liu --- build/build-in-docker | 4 ++-- ci/Dockerfile | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/build/build-in-docker b/build/build-in-docker index bb465f942c..124f50d47c 100755 --- a/build/build-in-docker +++ b/build/build-in-docker @@ -26,7 +26,7 @@ SCRIPTDIR=$(cd $(dirname $0); pwd) LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} USE_GDS=${USE_GDS:-ON} export CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"} -export DOCKER_BUILD_EXTRA_ARGS="--platform=linux/amd64 --build-arg CMAKE_ARCH=x86_64" +export DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/amd64 --build-arg CMAKE_ARCH=x86_64" CUDA_VER=${CUDA_VER:-cuda11} USE_SANITIZER=${USE_SANITIZER:-ON} BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON} @@ -51,7 +51,7 @@ if [ "$arch" == "arm64" ]; then USE_GDS="OFF" USE_SANITIZER="ON" BUILD_FAULTINJ="OFF" - export DOCKER_BUILD_EXTRA_ARGS="--platform=linux/arm64 --build-arg CMAKE_ARCH=aarch64" + export DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/arm64 --build-arg CMAKE_ARCH=aarch64" fi $SCRIPTDIR/run-in-docker mvn \ diff --git a/ci/Dockerfile b/ci/Dockerfile index 2f41d1ecba..e78093b385 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -17,11 +17,12 @@ ### # Build the image for spark-rapids-jni development environment. # -# Arguments: CUDA_VERSION=[11.X.Y, 12.X.Y], OS_RELEASE=[8, 9], TARGETPLATFORM=[linux/amd64, linux/amd64] +# Arguments: CUDA_VERSION=[11.X.Y, 12.X.Y], OS_RELEASE=[8, 9], TARGETPLATFORM=[linux/amd64, linux/arm64] # ### ARG CUDA_VERSION=11.8.0 ARG OS_RELEASE=8 +ARG TARGETPLATFORM=linux/amd64 # multi-platform build with: docker buildx build --platform linux/arm64,linux/amd64 on either amd64 or arm64 host # check available official arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH) FROM --platform=$TARGETPLATFORM nvidia/cuda:$CUDA_VERSION-devel-rockylinux$OS_RELEASE From d22473fb48cdf526fdf6b40c1f7795ddbde8babc Mon Sep 17 00:00:00 2001 From: Tim Liu Date: Tue, 7 May 2024 23:01:03 +0800 Subject: [PATCH 5/9] Update for code review Signed-off-by: Tim Liu --- build/build-in-docker | 24 +++++++----------------- ci/Dockerfile | 2 +- pom.xml | 5 +++++ 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/build/build-in-docker b/build/build-in-docker index 124f50d47c..cd1c405c34 100755 --- a/build/build-in-docker +++ b/build/build-in-docker @@ -27,7 +27,7 @@ LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} USE_GDS=${USE_GDS:-ON} export CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"} export DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/amd64 --build-arg CMAKE_ARCH=x86_64" -CUDA_VER=${CUDA_VER:-cuda11} +CUDA_VERSION=${CUDA_VERSION:-cuda11} USE_SANITIZER=${USE_SANITIZER:-ON} BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON} @@ -36,27 +36,17 @@ if (( $# == 0 )); then exit 1 fi -case $(uname -m) in - x86_64|amd64) - arch=amd64;; - aarch64|arm64) - arch=arm64;; - *) - echo "Unsupported CPU architecture"; exit 1;; -esac - -# Set env for arm64 build -if [ "$arch" == "arm64" ]; then - profiles="${profiles},arm64" - USE_GDS="OFF" - USE_SANITIZER="ON" - BUILD_FAULTINJ="OFF" +# Set env for arm64 build, The possible values of 'uname -m' : [x86_64/i386/aarch64/mips/...] +if [ "$(uname -m)" == "aarch64" ]; then + USE_GDS="OFF" # The GDS cuFiles RDMA libraries are not included in the arm64 CUDA toolkit. + USE_SANITIZER="ON" # OOM failures in rmm occur during arm64 unit tests when USE_SANITIZER=OFF + BUILD_FAULTINJ="OFF" # libcupti_static.a linked by cufaultinj, does not exist in the arm64 CUDA toolkit. export DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/arm64 --build-arg CMAKE_ARCH=aarch64" fi $SCRIPTDIR/run-in-docker mvn \ -Dmaven.repo.local=$LOCAL_MAVEN_REPO \ -DUSE_GDS=$USE_GDS \ - -DBUILD_TESTS=ON -DBUILD_FAULTINJ=${BUILD_FAULTINJ} -Dcuda.version=$CUDA_VER \ + -DBUILD_FAULTINJ=${BUILD_FAULTINJ} -Dcuda.version=$CUDA_VERSION \ -DUSE_SANITIZER=${USE_SANITIZER} \ "$@" diff --git a/ci/Dockerfile b/ci/Dockerfile index e78093b385..f36ede2233 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -30,7 +30,7 @@ ARG TOOLSET_VERSION=11 ### Install basic requirements RUN dnf --enablerepo=powertools install -y scl-utils gcc-toolset-${TOOLSET_VERSION} python39 zlib-devel maven tar wget patch ninja-build git ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins -RUN mkdir /usr/local/rapids /rapids && chmod 777 /usr/local/rapids /rapids +RUN mkdir -m 777 /usr/local/rapids /rapids # 3.22.3: CUDA architecture 'native' support + flexible CMAKE__*_LAUNCHER for ccache ARG CMAKE_VERSION=3.26.4 diff --git a/pom.xml b/pom.xml index 745f8127d1..24daa4635e 100644 --- a/pom.xml +++ b/pom.xml @@ -338,6 +338,11 @@ arm64 + + + aarch64 + + ${cuda.version}-arm64 From c4957c1612fa3ac8372fd1bf6deb91427c26e13d Mon Sep 17 00:00:00 2001 From: Tim Liu Date: Wed, 8 May 2024 13:07:53 +0800 Subject: [PATCH 6/9] Make cuda version consistent Signed-off-by: Tim Liu --- build/build-in-docker | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/build/build-in-docker b/build/build-in-docker index cd1c405c34..7c6bb46aac 100755 --- a/build/build-in-docker +++ b/build/build-in-docker @@ -27,7 +27,8 @@ LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} USE_GDS=${USE_GDS:-ON} export CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"} export DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/amd64 --build-arg CMAKE_ARCH=x86_64" -CUDA_VERSION=${CUDA_VERSION:-cuda11} +export CUDA_VERSION=${CUDA_VERSION:-11.8.0} +CUDA_CLASSIFIER=cuda${CUDA_VERSION%%.*} USE_SANITIZER=${USE_SANITIZER:-ON} BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON} @@ -47,6 +48,6 @@ fi $SCRIPTDIR/run-in-docker mvn \ -Dmaven.repo.local=$LOCAL_MAVEN_REPO \ -DUSE_GDS=$USE_GDS \ - -DBUILD_FAULTINJ=${BUILD_FAULTINJ} -Dcuda.version=$CUDA_VERSION \ + -DBUILD_FAULTINJ=${BUILD_FAULTINJ} -Dcuda.version=$CUDA_CLASSIFIER \ -DUSE_SANITIZER=${USE_SANITIZER} \ "$@" From 9d59060f78b27f2faba66620d601cd4502e2f051 Mon Sep 17 00:00:00 2001 From: Tim Liu Date: Thu, 9 May 2024 00:26:21 +0800 Subject: [PATCH 7/9] Update according to the review comments Signed-off-by: Tim Liu --- build/build-in-docker | 10 +++------- build/run-in-docker | 7 ++++++- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/build/build-in-docker b/build/build-in-docker index 7c6bb46aac..0231a0cda5 100755 --- a/build/build-in-docker +++ b/build/build-in-docker @@ -25,11 +25,9 @@ SCRIPTDIR=$(cd $(dirname $0); pwd) LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} USE_GDS=${USE_GDS:-ON} -export CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"} -export DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/amd64 --build-arg CMAKE_ARCH=x86_64" +# Make CUDA_VERSION consistent with the file run-in-docker export CUDA_VERSION=${CUDA_VERSION:-11.8.0} CUDA_CLASSIFIER=cuda${CUDA_VERSION%%.*} -USE_SANITIZER=${USE_SANITIZER:-ON} BUILD_FAULTINJ=${BUILD_FAULTINJ:-ON} if (( $# == 0 )); then @@ -40,14 +38,12 @@ fi # Set env for arm64 build, The possible values of 'uname -m' : [x86_64/i386/aarch64/mips/...] if [ "$(uname -m)" == "aarch64" ]; then USE_GDS="OFF" # The GDS cuFiles RDMA libraries are not included in the arm64 CUDA toolkit. - USE_SANITIZER="ON" # OOM failures in rmm occur during arm64 unit tests when USE_SANITIZER=OFF BUILD_FAULTINJ="OFF" # libcupti_static.a linked by cufaultinj, does not exist in the arm64 CUDA toolkit. - export DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/arm64 --build-arg CMAKE_ARCH=aarch64" fi $SCRIPTDIR/run-in-docker mvn \ -Dmaven.repo.local=$LOCAL_MAVEN_REPO \ -DUSE_GDS=$USE_GDS \ - -DBUILD_FAULTINJ=${BUILD_FAULTINJ} -Dcuda.version=$CUDA_CLASSIFIER \ - -DUSE_SANITIZER=${USE_SANITIZER} \ + -DBUILD_FAULTINJ=${BUILD_FAULTINJ} \ + -Dcuda.version=$CUDA_CLASSIFIER \ "$@" diff --git a/build/run-in-docker b/build/run-in-docker index 56f10cb862..779f29a3a6 100755 --- a/build/run-in-docker +++ b/build/run-in-docker @@ -26,7 +26,12 @@ REPODIR=$SCRIPTDIR/.. CUDA_VERSION=${CUDA_VERSION:-11.8.0} DOCKER_CMD=${DOCKER_CMD:-docker} -DOCKER_BUILD_EXTRA_ARGS=${DOCKER_BUILD_EXTRA_ARGS:-""} +CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"} +if [ "$(uname -m)" == "aarch64" ]; then + DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/arm64 --build-arg CMAKE_ARCH=aarch64" +else + DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/amd64 --build-arg CMAKE_ARCH=x86_64" +fi DOCKER_RUN_EXTRA_ARGS=${DOCKER_RUN_EXTRA_ARGS:-""} LOCAL_CCACHE_DIR=${LOCAL_CCACHE_DIR:-"$HOME/.ccache"} LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} From 9e096cb5c1e1111a4d3efead4fe553b56bd143de Mon Sep 17 00:00:00 2001 From: Tim Liu Date: Thu, 9 May 2024 08:04:14 +0800 Subject: [PATCH 8/9] Update build/run-in-docker Co-authored-by: Jason Lowe --- build/run-in-docker | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/build/run-in-docker b/build/run-in-docker index 779f29a3a6..95f5b4e17d 100755 --- a/build/run-in-docker +++ b/build/run-in-docker @@ -27,10 +27,11 @@ REPODIR=$SCRIPTDIR/.. CUDA_VERSION=${CUDA_VERSION:-11.8.0} DOCKER_CMD=${DOCKER_CMD:-docker} CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"} +DOCKER_BUILD_EXTRA_ARGS=${DOCKER_BUILD_EXTRA_ARGS:-""} if [ "$(uname -m)" == "aarch64" ]; then - DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/arm64 --build-arg CMAKE_ARCH=aarch64" + DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/arm64 --build-arg CMAKE_ARCH=aarch64 $DOCKER_BUILD_EXTRA_ARGS" else - DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/amd64 --build-arg CMAKE_ARCH=x86_64" + DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/amd64 --build-arg CMAKE_ARCH=x86_64 $DOCKER_BUILD_EXTRA_ARGS" fi DOCKER_RUN_EXTRA_ARGS=${DOCKER_RUN_EXTRA_ARGS:-""} LOCAL_CCACHE_DIR=${LOCAL_CCACHE_DIR:-"$HOME/.ccache"} From deab896f44387bfb5e046a226caeff518788328b Mon Sep 17 00:00:00 2001 From: Tim Liu Date: Fri, 10 May 2024 12:49:01 +0800 Subject: [PATCH 9/9] Move CMAKE_GENERATOR back into build-in-docker Signed-off-by: Tim Liu --- build/build-in-docker | 1 + build/run-in-docker | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/build/build-in-docker b/build/build-in-docker index 0231a0cda5..49032185ba 100755 --- a/build/build-in-docker +++ b/build/build-in-docker @@ -25,6 +25,7 @@ SCRIPTDIR=$(cd $(dirname $0); pwd) LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} USE_GDS=${USE_GDS:-ON} +export CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"} # Make CUDA_VERSION consistent with the file run-in-docker export CUDA_VERSION=${CUDA_VERSION:-11.8.0} CUDA_CLASSIFIER=cuda${CUDA_VERSION%%.*} diff --git a/build/run-in-docker b/build/run-in-docker index 95f5b4e17d..81152a1d9d 100755 --- a/build/run-in-docker +++ b/build/run-in-docker @@ -26,7 +26,6 @@ REPODIR=$SCRIPTDIR/.. CUDA_VERSION=${CUDA_VERSION:-11.8.0} DOCKER_CMD=${DOCKER_CMD:-docker} -CMAKE_GENERATOR=${CMAKE_GENERATOR:-"Ninja"} DOCKER_BUILD_EXTRA_ARGS=${DOCKER_BUILD_EXTRA_ARGS:-""} if [ "$(uname -m)" == "aarch64" ]; then DOCKER_BUILD_EXTRA_ARGS="--build-arg TARGETPLATFORM=linux/arm64 --build-arg CMAKE_ARCH=aarch64 $DOCKER_BUILD_EXTRA_ARGS"