Skip to content

Commit

Permalink
Merge pull request #10101 from rapidsai/branch-22.02
Browse files Browse the repository at this point in the history
  • Loading branch information
raydouglass committed Feb 2, 2022
2 parents 06540b9 + a7d88cd commit f39f559
Show file tree
Hide file tree
Showing 679 changed files with 27,842 additions and 13,014 deletions.
27 changes: 27 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
---
Checks:
'modernize-*,
-modernize-use-equals-default,
-modernize-concat-nested-namespaces,
-modernize-use-trailing-return-type'

# -modernize-use-equals-default # auto-fix is broken (doesn't insert =default correctly)
# -modernize-concat-nested-namespaces # auto-fix is broken (can delete code)
# -modernize-use-trailing-return-type # just a preference

WarningsAsErrors: ''
HeaderFilterRegex: ''
AnalyzeTemporaryDtors: false
FormatStyle: none
CheckOptions:
- key: modernize-loop-convert.MaxCopySize
value: '16'
- key: modernize-loop-convert.MinConfidence
value: reasonable
- key: modernize-pass-by-value.IncludeStyle
value: llvm
- key: modernize-replace-auto-ptr.IncludeStyle
value: llvm
- key: modernize-use-nullptr.NullMacros
value: 'NULL'
...
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ repos:
args: ['-fallback-style=none']
- id: cmake-format
name: cmake-format
entry: bash cpp/scripts/run-cmake-format.sh cmake-format
entry: ./cpp/scripts/run-cmake-format.sh cmake-format
language: python
types: [cmake]
# Note that pre-commit autoupdate does not update the versions
Expand All @@ -81,7 +81,7 @@ repos:
- cmake-format==0.6.11
- id: cmake-lint
name: cmake-lint
entry: bash cpp/scripts/run-cmake-format.sh cmake-lint
entry: ./cpp/scripts/run-cmake-format.sh cmake-lint
language: python
types: [cmake]
# Note that pre-commit autoupdate does not update the versions
Expand Down
468 changes: 465 additions & 3 deletions CHANGELOG.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ git submodule update --init --remote --recursive
```bash
# create the conda environment (assuming in base `cudf` directory)
# note: RAPIDS currently doesn't support `channel_priority: strict`; use `channel_priority: flexible` instead
conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda11.0.yml
conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda11.5.yml
# activate the environment
conda activate cudf_dev
```
Expand Down
49 changes: 47 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.

# cuDF build script

Expand All @@ -17,7 +17,7 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h"
VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [-l] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
Expand All @@ -37,6 +37,8 @@ HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafk
--disable_nvtx - disable inserting NVTX profiling ranges
--show_depr_warn - show cmake deprecation warnings
--ptds - enable per-thread default stream
--build_metrics - generate build metrics report for libcudf
--incl_cache_stats - include cache statistics in build metrics report
--cmake-args=\\\"<args>\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument)
-h | --h[elp] - print this text
Expand All @@ -61,6 +63,8 @@ BUILD_NVTX=ON
BUILD_TESTS=OFF
BUILD_DISABLE_DEPRECATION_WARNING=ON
BUILD_PER_THREAD_DEFAULT_STREAM=OFF
BUILD_REPORT_METRICS=OFF
BUILD_REPORT_INCL_CACHE_STATS=OFF

# Set defaults for vars that may not have been defined externally
# FIXME: if INSTALL_PREFIX is not set, check PREFIX, then check
Expand Down Expand Up @@ -144,6 +148,14 @@ fi
if hasArg --ptds; then
BUILD_PER_THREAD_DEFAULT_STREAM=ON
fi
if hasArg --build_metrics; then
BUILD_REPORT_METRICS=ON
fi

if hasArg --incl_cache_stats; then
BUILD_REPORT_INCL_CACHE_STATS=ON
fi


# If clean given, run it prior to any other steps
if hasArg clean; then
Expand Down Expand Up @@ -172,6 +184,15 @@ if buildAll || hasArg libcudf; then
echo "Building for *ALL* supported GPU architectures..."
fi

# get the current count before the compile starts
FILES_IN_CCACHE=""
if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v ccache)" ]]; then
FILES_IN_CCACHE=$(ccache -s | grep "files in cache")
echo "$FILES_IN_CCACHE"
# zero the ccache statistics
ccache -z
fi

cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
${CUDF_CMAKE_CUDA_ARCHITECTURES} \
Expand All @@ -185,7 +206,31 @@ if buildAll || hasArg libcudf; then

cd ${LIB_BUILD_DIR}

compile_start=$(date +%s)
cmake --build . -j${PARALLEL_LEVEL} ${VERBOSE_FLAG}
compile_end=$(date +%s)
compile_total=$(( compile_end - compile_start ))

# Record build times
if [[ "$BUILD_REPORT_METRICS" == "ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
echo "Formatting build metrics"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml
MSG="<p>"
# get some ccache stats after the compile
if [[ "$BUILD_REPORT_INCL_CACHE_STATS"=="ON" && -x "$(command -v ccache)" ]]; then
MSG="${MSG}<br/>$FILES_IN_CCACHE"
HIT_RATE=$(ccache -s | grep "cache hit rate")
MSG="${MSG}<br/>${HIT_RATE}"
fi
MSG="${MSG}<br/>parallel setting: $PARALLEL_LEVEL"
MSG="${MSG}<br/>parallel build time: $compile_total seconds"
if [[ -f "${LIB_BUILD_DIR}/libcudf.so" ]]; then
LIBCUDF_FS=$(ls -lh ${LIB_BUILD_DIR}/libcudf.so | awk '{print $5}')
MSG="${MSG}<br/>libcudf.so size: $LIBCUDF_FS"
fi
echo "$MSG"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${LIB_BUILD_DIR}/ninja_log.html
fi

if [[ ${INSTALL_TARGET} != "" ]]; then
cmake --build . -j${PARALLEL_LEVEL} --target install ${VERBOSE_FLAG}
Expand Down
4 changes: 2 additions & 2 deletions ci/benchmark/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"

# Dask & Distributed git tag
export DASK_DISTRIBUTED_GIT_TAG='2021.11.2'
export DASK_DISTRIBUTED_GIT_TAG='2022.01.0'

function remove_libcudf_kernel_cache_dir {
EXITCODE=$?
Expand Down Expand Up @@ -98,7 +98,7 @@ conda list --show-channel-urls
################################################################################

logger "Build libcudf..."
if [[ ${BUILD_MODE} == "pull-request" ]]; then
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests --ptds
else
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests -l --ptds
Expand Down
8 changes: 8 additions & 0 deletions ci/cpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ if [ "$BUILD_LIBCUDF" == '1' ]; then
mkdir -p ${CONDA_BLD_DIR}/libcudf/work
cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcudf/work

# Copy libcudf build metrics results
LIBCUDF_BUILD_DIR=$CONDA_BLD_DIR/libcudf/work/cpp/build
echo "Checking for build metrics log $LIBCUDF_BUILD_DIR/ninja_log.html"
if [[ -f "$LIBCUDF_BUILD_DIR/ninja_log.html" ]]; then
gpuci_logger "Copying build metrics results"
mkdir -p "$WORKSPACE/build-metrics"
cp "$LIBCUDF_BUILD_DIR/ninja_log.html" "$WORKSPACE/build-metrics/BuildMetrics.html"
fi

gpuci_logger "Build conda pkg for libcudf_kafka"
gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcudf_kafka $CONDA_BUILD_ARGS
Expand Down
28 changes: 4 additions & 24 deletions ci/cpu/prebuild.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,11 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
set -e

DEFAULT_CUDA_VER="11.5"

#Always upload cudf Python package
#Always upload cudf packages
export UPLOAD_CUDF=1

#Upload libcudf once per CUDA
if [[ "$PYTHON" == "3.7" ]]; then
export UPLOAD_LIBCUDF=1
else
export UPLOAD_LIBCUDF=0
fi

# upload cudf_kafka for all versions of Python
if [[ "$CUDA" == "${DEFAULT_CUDA_VER}" ]]; then
export UPLOAD_CUDF_KAFKA=1
else
export UPLOAD_CUDF_KAFKA=0
fi

#We only want to upload libcudf_kafka once per python/CUDA combo
if [[ "$PYTHON" == "3.7" ]] && [[ "$CUDA" == "${DEFAULT_CUDA_VER}" ]]; then
export UPLOAD_LIBCUDF_KAFKA=1
else
export UPLOAD_LIBCUDF_KAFKA=0
fi
export UPLOAD_LIBCUDF=1
export UPLOAD_CUDF_KAFKA=1
export UPLOAD_LIBCUDF_KAFKA=1

if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
#If project flash is not activate, always build both
Expand Down
2 changes: 1 addition & 1 deletion ci/cpu/upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export GPUCI_RETRY_SLEEP=30
export LABEL_OPTION=${LABEL_OPTION:-"--label main"}

# Skip uploads unless BUILD_MODE == "branch"
if [ ${BUILD_MODE} != "branch" ]; then
if [ "${BUILD_MODE}" != "branch" ]; then
echo "Skipping upload"
return 0
fi
Expand Down
49 changes: 41 additions & 8 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2018-2020, NVIDIA CORPORATION.
# Copyright (c) 2018-2021, NVIDIA CORPORATION.
##############################################
# cuDF GPU build and test script for CI #
##############################################
Expand Down Expand Up @@ -31,7 +31,10 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`

# Dask & Distributed git tag
export DASK_DISTRIBUTED_GIT_TAG='2021.11.2'
export DASK_DISTRIBUTED_GIT_TAG='2022.01.0'

# ucx-py version
export UCX_PY_VERSION='0.24.*'

################################################################################
# TRAP - Setup trap for removing jitify cache
Expand Down Expand Up @@ -83,10 +86,10 @@ gpuci_mamba_retry install -y \
"rapids-notebook-env=$MINOR_VERSION.*" \
"dask-cuda=${MINOR_VERSION}" \
"rmm=$MINOR_VERSION.*" \
"ucx-py=0.23.*"
"ucx-py=${UCX_PY_VERSION}"

# https://docs.rapids.ai/maintainers/depmgmt/
# gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env
# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
# gpuci_mamba_retry install -y "your-pkg=1.0.0"


Expand Down Expand Up @@ -121,7 +124,7 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
################################################################################

gpuci_logger "Build from source"
if [[ ${BUILD_MODE} == "pull-request" ]]; then
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds
else
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests -l --ptds
Expand Down Expand Up @@ -166,16 +169,46 @@ else
gpuci_logger "Check GPU usage"
nvidia-smi

gpuci_logger "GoogleTests"
set -x
cd $LIB_BUILD_DIR

gpuci_logger "GoogleTests"

for gt in gtests/* ; do
test_name=$(basename ${gt})
echo "Running GoogleTest $test_name"
${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
done

# Copy libcudf build time results
echo "Checking for build time log $LIB_BUILD_DIR/ninja_log.xml"
if [[ -f "$LIB_BUILD_DIR/ninja_log.xml" ]]; then
gpuci_logger "Copying build time results"
cp "$LIB_BUILD_DIR/ninja_log.xml" "$WORKSPACE/test-results/buildtimes-junit.xml"
fi

################################################################################
# MEMCHECK - Run compute-sanitizer on GoogleTest (only in nightly builds)
################################################################################
if [[ "$BUILD_MODE" == "branch" && "$BUILD_TYPE" == "gpu" ]]; then
if [[ "$COMPUTE_SANITIZER_ENABLE" == "true" ]]; then
gpuci_logger "Memcheck on GoogleTests with rmm_mode=cuda"
export GTEST_CUDF_RMM_MODE=cuda
COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck"
mkdir -p "$WORKSPACE/test-results/"
for gt in gtests/*; do
test_name=$(basename ${gt})
if [[ "$test_name" == "ERROR_TEST" ]]; then
continue
fi
echo "Running GoogleTest $test_name"
${COMPUTE_SANITIZER_CMD} ${gt} | tee "$WORKSPACE/test-results/${test_name}.cs.log"
done
unset GTEST_CUDF_RMM_MODE
# test-results/*.cs.log are processed in gpuci
fi
fi

CUDF_CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcudf-*.tar.bz2"`
CUDF_CONDA_FILE=`basename "$CUDF_CONDA_FILE" .tar.bz2` #get filename without extension
CUDF_CONDA_FILE=${CUDF_CONDA_FILE//-/=} #convert to conda install
Expand All @@ -184,12 +217,12 @@ else
KAFKA_CONDA_FILE=${KAFKA_CONDA_FILE//-/=} #convert to conda install

gpuci_logger "Installing $CUDF_CONDA_FILE & $KAFKA_CONDA_FILE"
conda install -c ${CONDA_ARTIFACT_PATH} "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE"
gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE"

install_dask

gpuci_logger "Build python libs from source"
if [[ ${BUILD_MODE} == "pull-request" ]]; then
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka --ptds
else
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka -l --ptds
Expand Down
15 changes: 11 additions & 4 deletions ci/gpu/java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ export CONDA_ARTIFACT_PATH="$WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/"
export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`

# ucx-py version
export UCX_PY_VERSION='0.24.*'

################################################################################
# TRAP - Setup trap for removing jitify cache
################################################################################
Expand Down Expand Up @@ -74,19 +77,23 @@ conda config --show-sources
conda list --show-channel-urls

gpuci_logger "Install dependencies"
gpuci_conda_retry install -y \
gpuci_mamba_retry install -y \
"cudatoolkit=$CUDA_REL" \
"rapids-build-env=$MINOR_VERSION.*" \
"rapids-notebook-env=$MINOR_VERSION.*" \
"dask-cuda=${MINOR_VERSION}" \
"rmm=$MINOR_VERSION.*" \
"ucx-py=0.23.*" \
"ucx-py=${UCX_PY_VERSION}" \
"openjdk=8.*" \
"maven"
# "mamba install openjdk" adds an activation script to set JAVA_HOME but this is
# not triggered on installation. Re-activating the conda environment will set
# this environment variable so that CMake can find JNI.
conda activate rapids

# https://docs.rapids.ai/maintainers/depmgmt/
# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
# gpuci_conda_retry install -y "your-pkg=1.0.0"
# gpuci_mamba_retry install -y "your-pkg=1.0.0"


gpuci_logger "Check compiler versions"
Expand Down Expand Up @@ -127,7 +134,7 @@ KAFKA_CONDA_FILE=`basename "$KAFKA_CONDA_FILE" .tar.bz2` #get filename without e
KAFKA_CONDA_FILE=${KAFKA_CONDA_FILE//-/=} #convert to conda install

gpuci_logger "Installing $CUDF_CONDA_FILE & $KAFKA_CONDA_FILE"
conda install -c ${CONDA_ARTIFACT_PATH} "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE"
gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE"

install_dask

Expand Down
Loading

0 comments on commit f39f559

Please sign in to comment.