Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-22.02' into remove-meth…
Browse files Browse the repository at this point in the history
…od-parameter-from-merge-and-join
  • Loading branch information
bdice committed Jan 12, 2022
2 parents 9590998 + b8c4816 commit b689e4f
Show file tree
Hide file tree
Showing 165 changed files with 4,953 additions and 2,252 deletions.
43 changes: 35 additions & 8 deletions build.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Copyright (c) 2019-2022, NVIDIA CORPORATION.

# cuDF build script

Expand All @@ -17,7 +17,7 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h"
VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [-l] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
Expand All @@ -37,6 +37,8 @@ HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafk
--disable_nvtx - disable inserting NVTX profiling ranges
--show_depr_warn - show cmake deprecation warnings
--ptds - enable per-thread default stream
--build_metrics - generate build metrics report for libcudf
--incl_cache_stats - include cache statistics in build metrics report
--cmake-args=\\\"<args>\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument)
-h | --h[elp] - print this text
Expand All @@ -61,6 +63,8 @@ BUILD_NVTX=ON
BUILD_TESTS=OFF
BUILD_DISABLE_DEPRECATION_WARNING=ON
BUILD_PER_THREAD_DEFAULT_STREAM=OFF
BUILD_REPORT_METRICS=OFF
BUILD_REPORT_INCL_CACHE_STATS=OFF

# Set defaults for vars that may not have been defined externally
# FIXME: if INSTALL_PREFIX is not set, check PREFIX, then check
Expand Down Expand Up @@ -144,6 +148,14 @@ fi
if hasArg --ptds; then
BUILD_PER_THREAD_DEFAULT_STREAM=ON
fi
if hasArg --build_metrics; then
BUILD_REPORT_METRICS=ON
fi

if hasArg --incl_cache_stats; then
BUILD_REPORT_INCL_CACHE_STATS=ON
fi


# If clean given, run it prior to any other steps
if hasArg clean; then
Expand Down Expand Up @@ -174,8 +186,11 @@ if buildAll || hasArg libcudf; then

# get the current count before the compile starts
FILES_IN_CCACHE=""
if [ -x "$(command -v ccache)" ]; then
if [[ "$BUILD_REPORT_INCL_CACHE_STATS"=="ON" && -x "$(command -v ccache)" ]]; then
FILES_IN_CCACHE=$(ccache -s | grep "files in cache")
echo "$FILES_IN_CCACHE"
# zero the ccache statistics
ccache -z
fi

cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \
Expand All @@ -197,12 +212,24 @@ if buildAll || hasArg libcudf; then
compile_total=$(( compile_end - compile_start ))

# Record build times
if [[ -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
echo "Formatting build times"
if [[ "$BUILD_REPORT_METRICS"=="ON" && -f "${LIB_BUILD_DIR}/.ninja_log" ]]; then
echo "Formatting build metrics"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt xml > ${LIB_BUILD_DIR}/ninja_log.xml
message="$FILES_IN_CCACHE <p>$PARALLEL_LEVEL parallel build time is $compile_total seconds"
echo "$message"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$message" > ${LIB_BUILD_DIR}/ninja_log.html
MSG="<p>"
# get some ccache stats after the compile
if [[ "$BUILD_REPORT_INCL_CACHE_STATS"=="ON" && -x "$(command -v ccache)" ]]; then
MSG="${MSG}<br/>$FILES_IN_CCACHE"
HIT_RATE=$(ccache -s | grep "cache hit rate")
MSG="${MSG}<br/>${HIT_RATE}"
fi
MSG="${MSG}<br/>parallel setting: $PARALLEL_LEVEL"
MSG="${MSG}<br/>parallel build time: $compile_total seconds"
if [[ -f "${LIB_BUILD_DIR}/libcudf.so" ]]; then
LIBCUDF_FS=$(ls -lh ${LIB_BUILD_DIR}/libcudf.so | awk '{print $5}')
MSG="${MSG}<br/>libcudf.so size: $LIBCUDF_FS"
fi
echo "$MSG"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${LIB_BUILD_DIR}/ninja_log.html
fi

if [[ ${INSTALL_TARGET} != "" ]]; then
Expand Down
4 changes: 2 additions & 2 deletions ci/benchmark/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"

# Dask & Distributed git tag
export DASK_DISTRIBUTED_GIT_TAG='2021.11.2'
export DASK_DISTRIBUTED_GIT_TAG='main'

function remove_libcudf_kernel_cache_dir {
EXITCODE=$?
Expand Down Expand Up @@ -98,7 +98,7 @@ conda list --show-channel-urls
################################################################################

logger "Build libcudf..."
if [[ ${BUILD_MODE} == "pull-request" ]]; then
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests --ptds
else
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf benchmarks tests -l --ptds
Expand Down
8 changes: 8 additions & 0 deletions ci/cpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ if [ "$BUILD_LIBCUDF" == '1' ]; then
mkdir -p ${CONDA_BLD_DIR}/libcudf/work
cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcudf/work

# Copy libcudf build metrics results
LIBCUDF_BUILD_DIR=$CONDA_BLD_DIR/libcudf/work/cpp/build
echo "Checking for build metrics log $LIBCUDF_BUILD_DIR/ninja_log.html"
if [[ -f "$LIBCUDF_BUILD_DIR/ninja_log.html" ]]; then
gpuci_logger "Copying build metrics results"
mkdir -p "$WORKSPACE/build-metrics"
cp "$LIBCUDF_BUILD_DIR/ninja_log.html" "$WORKSPACE/build-metrics/BuildMetrics.html"
fi

gpuci_logger "Build conda pkg for libcudf_kafka"
gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcudf_kafka $CONDA_BUILD_ARGS
Expand Down
2 changes: 1 addition & 1 deletion ci/cpu/upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export GPUCI_RETRY_SLEEP=30
export LABEL_OPTION=${LABEL_OPTION:-"--label main"}

# Skip uploads unless BUILD_MODE == "branch"
if [ ${BUILD_MODE} != "branch" ]; then
if [ "${BUILD_MODE}" != "branch" ]; then
echo "Skipping upload"
return 0
fi
Expand Down
14 changes: 6 additions & 8 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`

# Dask & Distributed git tag
export DASK_DISTRIBUTED_GIT_TAG='2021.11.2'
export DASK_DISTRIBUTED_GIT_TAG='main'

# ucx-py version
export UCX_PY_VERSION='0.24.*'
Expand Down Expand Up @@ -89,7 +89,7 @@ gpuci_mamba_retry install -y \
"ucx-py=${UCX_PY_VERSION}"

# https://docs.rapids.ai/maintainers/depmgmt/
# gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env
# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
# gpuci_mamba_retry install -y "your-pkg=1.0.0"


Expand Down Expand Up @@ -124,7 +124,7 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
################################################################################

gpuci_logger "Build from source"
if [[ ${BUILD_MODE} == "pull-request" ]]; then
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds
else
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests -l --ptds
Expand Down Expand Up @@ -181,12 +181,10 @@ else
done

# Copy libcudf build time results
echo "Checking for build time log $LIB_BUILD_DIR/ninja_log.html"
if [[ -f "$LIB_BUILD_DIR/ninja_log.html" ]]; then
echo "Checking for build time log $LIB_BUILD_DIR/ninja_log.xml"
if [[ -f "$LIB_BUILD_DIR/ninja_log.xml" ]]; then
gpuci_logger "Copying build time results"
cp "$LIB_BUILD_DIR/ninja_log.xml" "$WORKSPACE/test-results/buildtimes-junit.xml"
mkdir -p "$WORKSPACE/build-metrics"
cp "$LIB_BUILD_DIR/ninja_log.html" "$WORKSPACE/build-metrics/BuildMetrics.html"
fi

################################################################################
Expand Down Expand Up @@ -224,7 +222,7 @@ else
install_dask

gpuci_logger "Build python libs from source"
if [[ ${BUILD_MODE} == "pull-request" ]]; then
if [[ "${BUILD_MODE}" == "pull-request" ]]; then
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka --ptds
else
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka -l --ptds
Expand Down
10 changes: 7 additions & 3 deletions ci/gpu/java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ conda config --show-sources
conda list --show-channel-urls

gpuci_logger "Install dependencies"
gpuci_conda_retry install -y \
gpuci_mamba_retry install -y \
"cudatoolkit=$CUDA_REL" \
"rapids-build-env=$MINOR_VERSION.*" \
"rapids-notebook-env=$MINOR_VERSION.*" \
Expand All @@ -86,10 +86,14 @@ gpuci_conda_retry install -y \
"ucx-py=${UCX_PY_VERSION}" \
"openjdk=8.*" \
"maven"
# "mamba install openjdk" adds an activation script to set JAVA_HOME but this is
# not triggered on installation. Re-activating the conda environment will set
# this environment variable so that CMake can find JNI.
conda activate rapids

# https://docs.rapids.ai/maintainers/depmgmt/
# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
# gpuci_conda_retry install -y "your-pkg=1.0.0"
# gpuci_mamba_retry install -y "your-pkg=1.0.0"


gpuci_logger "Check compiler versions"
Expand Down Expand Up @@ -130,7 +134,7 @@ KAFKA_CONDA_FILE=`basename "$KAFKA_CONDA_FILE" .tar.bz2` #get filename without e
KAFKA_CONDA_FILE=${KAFKA_CONDA_FILE//-/=} #convert to conda install

gpuci_logger "Installing $CUDF_CONDA_FILE & $KAFKA_CONDA_FILE"
conda install -c ${CONDA_ARTIFACT_PATH} "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE"
gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE"

install_dask

Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ dependencies:
- pydocstyle=6.1.1
- typing_extensions
- pre-commit
- dask>=2021.11.1,<=2021.11.2
- distributed>=2021.11.1,<=2021.11.2
- dask>=2021.11.1
- distributed>=2021.11.1
- streamz
- arrow-cpp=5.0.0
- dlpack>=0.5,<0.6.0a0
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf_kafka/build.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

# This assumes the script is executed from the root of the repo directory
./build.sh -v cudf_kafka
16 changes: 9 additions & 7 deletions conda/recipes/cudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version=environ.get('CONDA_PY', 36) %}
{% set cuda_version='.'.join(environ.get('CUDA', '11.5').split('.')[:2]) %}
{% set cuda_version = '.'.join(environ.get('CUDA', '11.5').split('.')[:2]) %}
{% set py_version = environ.get('PY_VER', '3.8') %}
{% set py_version_numeric = py_version.replace('.', '') %}

package:
name: cudf_kafka
Expand All @@ -14,7 +15,7 @@ source:

build:
number: {{ GIT_DESCRIBE_NUMBER }}
string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
string: py{{ py_version_numeric }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
script_env:
- CC
- CXX
Expand All @@ -26,14 +27,15 @@ requirements:
build:
- cmake >=3.20.1
host:
- python
- python {{ py_version }}
- cython >=0.29,<0.30
- setuptools
- cudf {{ version }}
- libcudf_kafka {{ version }}
- setuptools
run:
- python {{ py_version }}
- libcudf_kafka {{ version }}
- python-confluent-kafka
- python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version_numeric }}*
- cudf {{ version }}

test: # [linux64]
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/custreamz/build.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

# This assumes the script is executed from the root of the repo directory
./build.sh -v custreamz
17 changes: 9 additions & 8 deletions conda/recipes/custreamz/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# Copyright (c) 2018-2019, NVIDIA CORPORATION.
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version=environ.get('CONDA_PY', 36) %}
{% set cuda_version='.'.join(environ.get('CUDA', '11.5').split('.')[:2]) %}
{% set cuda_version = '.'.join(environ.get('CUDA', '11.5').split('.')[:2]) %}
{% set py_version = environ.get('PY_VER', '3.8') %}
{% set py_version_numeric = py_version.replace('.', '') %}

package:
name: custreamz
Expand All @@ -14,7 +15,7 @@ source:

build:
number: {{ GIT_DESCRIBE_NUMBER }}
string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
string: py{{ py_version_numeric }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
script_env:
- VERSION_SUFFIX
- PARALLEL_LEVEL
Expand All @@ -24,16 +25,16 @@ build:

requirements:
host:
- python
- python-confluent-kafka
- python {{ py_version }}
- python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version_numeric }}*
- cudf_kafka {{ version }}
run:
- python
- python {{ py_version }}
- streamz
- cudf {{ version }}
- dask>=2021.11.1,<=2021.11.2
- distributed>=2021.11.1,<=2021.11.2
- python-confluent-kafka
- python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version_numeric }}*
- cudf_kafka {{ version }}

test: # [linux64]
Expand Down
8 changes: 4 additions & 4 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ requirements:
host:
- python
- cudf {{ version }}
- dask>=2021.11.1,<=2021.11.2
- distributed>=2021.11.1,<=2021.11.2
- dask>=2021.11.1
- distributed>=2021.11.1
- cudatoolkit {{ cuda_version }}
run:
- python
- cudf {{ version }}
- dask>=2021.11.1,<=2021.11.2
- distributed>=2021.11.1,<=2021.11.2
- dask>=2021.11.1
- distributed>=2021.11.1
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}

test: # [linux64]
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
# This assumes the script is executed from the root of the repo directory
./build.sh -v libcudf --allgpuarch --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib\"
else
./build.sh -v libcudf tests --allgpuarch --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib\"
./build.sh -v libcudf tests --allgpuarch --build_metrics --incl_cache_stats --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib\"
fi
1 change: 1 addition & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ test:
- test -f $PREFIX/include/cudf/lists/explode.hpp
- test -f $PREFIX/include/cudf/lists/drop_list_duplicates.hpp
- test -f $PREFIX/include/cudf/lists/extract.hpp
- test -f $PREFIX/include/cudf/lists/filling.hpp
- test -f $PREFIX/include/cudf/lists/contains.hpp
- test -f $PREFIX/include/cudf/lists/gather.hpp
- test -f $PREFIX/include/cudf/lists/lists_column_view.hpp
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf_kafka/build.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
# This assumes the script is executed from the root of the repo directory
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/libcudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2021, NVIDIA CORPORATION.
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
Expand Down Expand Up @@ -26,7 +26,7 @@ requirements:
- cmake >=3.20.1
host:
- libcudf {{version}}
- librdkafka >=1.6.0,<1.7.0a0
- librdkafka >=1.7.0,<1.8.0a0
run:
- {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not

Expand Down
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ set_target_properties(
)

target_compile_options(
cudftestutil PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>"
cudftestutil PUBLIC "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>>"
"$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>>"
)

Expand Down
Loading

0 comments on commit b689e4f

Please sign in to comment.