diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0222e04b90..d359f79236 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -49,9 +49,12 @@ so it should be invoked as one would invoke Maven, e.g.: `build/build-in-docker ### cudf Submodule and Build [RAPIDS cuDF](https://github.com/rapidsai/cudf) is being used as a submodule in this project. -Due to the lengthy build of libcudf, it is **not cleaned** during a normal Maven clean phase. -Use `-Dlibcudf.clean.skip=true` to clean the libcudf build area in addition to the normal clean -of `target/` directories. +Due to the lengthy build of libcudf, it is **not cleaned** during a normal Maven clean phase +unless built using `build/build-in-docker`. `build/build-in-docker` uses `ccache` by default +unless CCACHE_DISABLE=1 is set in the environment. + +`-Dlibcudf.clean.skip=false` can also be specified on the Maven command-line to force +libcudf to be cleaned during the Maven clean phase. Currently libcudf is only configured once and the build relies on cmake to re-configure as needed. This is because libcudf currently is rebuilding almost entirely when it is configured with the same diff --git a/build/build-in-docker b/build/build-in-docker index 462dfdad3b..a017226f8c 100755 --- a/build/build-in-docker +++ b/build/build-in-docker @@ -27,6 +27,7 @@ REPODIR=$SCRIPTDIR/.. CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} CUDA_VERSION=${CUDA_VERSION:-11.5.0} DOCKER_CMD=${DOCKER_CMD:-nvidia-docker} +LOCAL_CCACHE_DIR=${LOCAL_CCACHE_DIR:-"$HOME/.ccache"} LOCAL_MAVEN_REPO=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"} PER_THREAD_DEFAULT_STREAM=${PER_THREAD_DEFAULT_STREAM:-ON} USE_GDS=${USE_GDS:-ON} @@ -34,6 +35,7 @@ USE_GDS=${USE_GDS:-ON} SPARK_IMAGE_NAME="spark-rapids-jni-build:${CUDA_VERSION}-devel-centos7" WORKSPACE_DIR=/rapids WORKSPACE_REPODIR="$WORKSPACE_DIR/spark-rapids-jni" +WORKSPACE_CCACHE_REPODIR="$WORKSPACE_DIR/.ccache" WORKSPACE_MAVEN_REPODIR="$WORKSPACE_DIR/.m2/repository" if (( $# == 0 )); then @@ -41,19 +43,41 @@ if (( $# == 0 )); then exit 1 fi +# ensure directories exist +mkdir -p "$LOCAL_CCACHE_DIR" "$LOCAL_MAVEN_REPO" + $DOCKER_CMD build -f $REPODIR/ci/Dockerfile \ --build-arg CUDA_VERSION=$CUDA_VERSION \ -t $SPARK_IMAGE_NAME \ $REPODIR/build +_CUDF_CLEAN_SKIP="" +# if ccache is enabled and libcudf.clean.skip not provided +# by the user remove the cpp build directory +# +if [[ "$CCACHE_DISABLE" != "1" ]]; then + if [[ ! "$*" =~ " -Dlibcudf.clean.skip=" ]]; then + # Don't skip clean if ccache is enabled + # unless the user overrides + _CUDF_CLEAN_SKIP="-Dlibcudf.clean.skip=false" + fi +fi + $DOCKER_CMD run -it -u $(id -u):$(id -g) --rm \ -v "/etc/group:/etc/group:ro" \ -v "/etc/passwd:/etc/passwd:ro" \ -v "/etc/shadow:/etc/shadow:ro" \ -v "/etc/sudoers.d:/etc/sudoers.d:ro" \ -v "$REPODIR:$WORKSPACE_REPODIR:rw" \ + -v "$LOCAL_CCACHE_DIR:$WORKSPACE_CCACHE_REPODIR:rw" \ -v "$LOCAL_MAVEN_REPO:$WORKSPACE_MAVEN_REPODIR:rw" \ --workdir "$WORKSPACE_REPODIR" \ + -e CCACHE_DISABLE \ + -e CCACHE_DIR="$WORKSPACE_CCACHE_REPODIR" \ + -e CMAKE_C_COMPILER_LAUNCHER="ccache" \ + -e CMAKE_CXX_COMPILER_LAUNCHER="ccache" \ + -e CMAKE_CUDA_COMPILER_LAUNCHER="ccache" \ + -e CMAKE_CXX_LINKER_LAUNCHER="ccache" \ -e CMAKE_GENERATOR="$CMAKE_GENERATOR" \ -e CUDA_VISIBLE_DEVICES \ -e PARALLEL_LEVEL \ @@ -63,4 +87,5 @@ $DOCKER_CMD run -it -u $(id -u):$(id -g) --rm \ -Dmaven.repo.local=$WORKSPACE_MAVEN_REPODIR \ -DPER_THREAD_DEFAULT_STREAM=$PER_THREAD_DEFAULT_STREAM \ -DUSE_GDS=$USE_GDS \ + $_CUDF_CLEAN_SKIP \ $*" diff --git a/ci/Dockerfile b/ci/Dockerfile index 4814ac4b27..9f8ab88afe 100755 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -22,10 +22,10 @@ ### ARG CUDA_VERSION=11.5.0 FROM gpuci/cuda:$CUDA_VERSION-devel-centos7 - +ARG DEVTOOLSET_VERSION=9 ### Install basic requirements RUN yum install -y centos-release-scl -RUN yum install -y devtoolset-9 rh-python38 epel-release +RUN yum install -y devtoolset-${DEVTOOLSET_VERSION} rh-python38 epel-release RUN yum install -y zlib-devel maven tar wget patch ninja-build # require git 2.18+ to keep consistent submodule operations RUN yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo.x86_64.rpm && yum install -y git @@ -34,14 +34,33 @@ RUN scl enable rh-python38 "pip install requests" ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins RUN mkdir /usr/local/rapids && mkdir /rapids && chmod 777 /usr/local/rapids && chmod 777 /rapids -ARG CMAKE_VERSION=3.20.5 -RUN cd /usr/local/ && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \ +# 3.22.3: CUDA architecture 'native' support + flexible CMAKE__*_LAUNCHER for ccache +ARG CMAKE_VERSION=3.22.3 + +RUN cd /usr/local && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \ tar zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \ rm cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-x86_64/bin:$PATH +# ccache for interactive builds +ARG CCACHE_VERSION=4.6 +RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}.tar.gz && \ + tar zxf ccache-${CCACHE_VERSION}.tar.gz && \ + rm ccache-${CCACHE_VERSION}.tar.gz && \ + cd ccache-${CCACHE_VERSION} && \ + mkdir build && \ + cd build && \ + scl enable devtoolset-${DEVTOOLSET_VERSION} \ + "cmake .. \ + -DCMAKE_BUILD_TYPE=Release \ + -DZSTD_FROM_INTERNET=ON \ + -DREDIS_STORAGE_BACKEND=OFF && \ + cmake --build . --parallel ${PARALLEL_LEVEL} --target install" && \ + cd ../.. && \ + rm -rf ccache-${CCACHE_VERSION} + ## install a version of boost that is needed for arrow/parquet to work -RUN cd /usr/local && wget https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.gz && \ +RUN cd /usr/local && wget --quiet https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.gz && \ tar -xzf boost_1_79_0.tar.gz && \ rm boost_1_79_0.tar.gz && \ cd boost_1_79_0 && \