Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-22.10' into cubinlinker…
Browse files Browse the repository at this point in the history
…-mvc
  • Loading branch information
gmarkall committed Sep 26, 2022
2 parents dd1a687 + a945377 commit d9ad59b
Show file tree
Hide file tree
Showing 205 changed files with 11,250 additions and 1,635 deletions.
File renamed without changes.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
python/cudf/cudf/_version.py export-subst
python/strings_udf/strings_udf/_version.py export-subst
python/cudf_kafka/cudf_kafka/_version.py export-subst
python/custreamz/custreamz/_version.py export-subst
python/dask_cudf/dask_cudf/_version.py export-subst
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ python/cudf_kafka/*/_lib/**/*.cpp
python/cudf_kafka/*/_lib/**/*.h
python/custreamz/*/_lib/**/*.cpp
python/custreamz/*/_lib/**/*.h
python/strings_udf/strings_udf/_lib/*.cpp
python/strings_udf/strings_udf/*.ptx
.Python
env/
develop-eggs/
Expand Down
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ repos:
# project can specify its own first/third-party packages.
args: ["--config-root=python/", "--resolve-all-configs"]
files: python/.*
exclude: (__init__.py|setup.py)$
types_or: [python, cython, pyi]
- repo: https://github.com/psf/black
rev: 22.3.0
Expand Down
11 changes: 10 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz strings_udf -v -g -n -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
Expand Down Expand Up @@ -335,6 +335,15 @@ if buildAll || hasArg cudf; then
fi
fi

if buildAll || hasArg strings_udf; then

cd ${REPODIR}/python/strings_udf
python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
if [[ ${INSTALL_TARGET} != "" ]]; then
python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} -DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
fi
fi

# Build and install the dask_cudf Python package
if buildAll || hasArg dask_cudf; then

Expand Down
12 changes: 12 additions & 0 deletions ci/cpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,14 @@ fi
if [ "$BUILD_LIBCUDF" == '1' ]; then
gpuci_logger "Build conda pkg for libcudf"
gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcudf $CONDA_BUILD_ARGS

# BUILD_LIBCUDF == 1 means this job is being run on the cpu_build jobs
# that is where we must also build the strings_udf package
gpuci_logger "Build conda pkg for strings_udf (python 3.8)"
gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/strings_udf $CONDA_BUILD_ARGS --python=3.8
gpuci_logger "Build conda pkg for strings_udf (python 3.9)"
gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/strings_udf $CONDA_BUILD_ARGS --python=3.9

mkdir -p ${CONDA_BLD_DIR}/libcudf/work
cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcudf/work
gpuci_logger "sccache stats"
Expand Down Expand Up @@ -110,6 +118,10 @@ if [ "$BUILD_CUDF" == '1' ]; then

gpuci_logger "Build conda pkg for custreamz"
gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL

gpuci_logger "Build conda pkg for strings_udf"
gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/strings_udf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL

fi
################################################################################
# UPLOAD - Conda packages
Expand Down
13 changes: 13 additions & 0 deletions ci/cpu/upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,19 @@ if [[ "$BUILD_LIBCUDF" == "1" && "$UPLOAD_LIBCUDF" == "1" ]]; then
export LIBCUDF_FILES=$(conda build --no-build-id --croot "${CONDA_BLD_DIR}" conda/recipes/libcudf --output)
LIBCUDF_FILES=$(echo "$LIBCUDF_FILES" | sed 's/.*libcudf-example.*//') # skip libcudf-example pkg upload
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing --no-progress $LIBCUDF_FILES

# since strings_udf compiles libcudf code, we require it be built in the same environment as libcudf
# however since libcudf is agnostic to the python version that is present, we must vary it explicitly
# here if we want packages for both python 3.8 and 3.9
export STRINGS_UDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/strings_udf --python=3.8 --output)
test -e ${STRINGS_UDF_FILE}
echo "Upload strings_udf (python 3.8): ${STRINGS_UDF_FILE}"
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${STRINGS_UDF_FILE} --no-progress

export STRINGS_UDF_FILE=$(conda build --croot "${CONDA_BLD_DIR}" conda/recipes/strings_udf --python=3.9 --output)
test -e ${STRINGS_UDF_FILE}
echo "Upload strings_udf (python 3.9): ${STRINGS_UDF_FILE}"
gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${STRINGS_UDF_FILE} --no-progress
fi

if [[ "$BUILD_CUDF" == "1" && "$UPLOAD_CUDF" == "1" ]]; then
Expand Down
38 changes: 34 additions & 4 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ gpuci_logger "Check conda environment"
conda info
conda config --show-sources
conda list --show-channel-urls

gpuci_logger "Check compiler versions"
python --version

Expand Down Expand Up @@ -123,11 +122,11 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
install_dask

################################################################################
# BUILD - Build libcudf, cuDF, libcudf_kafka, and dask_cudf from source
# BUILD - Build libcudf, cuDF, libcudf_kafka, dask_cudf, and strings_udf from source
################################################################################

gpuci_logger "Build from source"
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka strings_udf benchmarks tests --ptds

################################################################################
# TEST - Run GoogleTest
Expand Down Expand Up @@ -185,7 +184,11 @@ else
gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}

gpuci_logger "Installing cudf, dask-cudf, cudf_kafka and custreamz"
# the CUDA component of strings_udf must be built on cuda 11.5 just like libcudf
# but because there is no separate python package, we must also build the python on the 11.5 jobs
# this means that at this point (on the GPU test jobs) the whole package is already built and has been
# copied by CI from the upstream 11.5 jobs into $CONDA_ARTIFACT_PATH
gpuci_logger "Installing cudf, dask-cudf, cudf_kafka, and custreamz"
gpuci_mamba_retry install cudf dask-cudf cudf_kafka custreamz -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"

gpuci_logger "GoogleTests"
Expand Down Expand Up @@ -249,6 +252,8 @@ fi

cd "$WORKSPACE/python/cudf/cudf"
# It is essential to cd into $WORKSPACE/python/cudf/cudf as `pytest-xdist` + `coverage` seem to work only at this directory level.
gpuci_logger "Check conda packages"
conda list
gpuci_logger "Python py.test for cuDF"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope tests

Expand All @@ -260,6 +265,31 @@ cd "$WORKSPACE/python/custreamz"
gpuci_logger "Python py.test for cuStreamz"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term custreamz

gpuci_logger "Installing strings_udf"
gpuci_mamba_retry install strings_udf -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"

cd "$WORKSPACE/python/strings_udf/strings_udf"
gpuci_logger "Python py.test for strings_udf"

# We do not want to exit with a nonzero exit code in the case where no
# strings_udf tests are run because that will always happen when the local CUDA
# version is not 11.5. We need to suppress the exit code because this script is
# run with set -e and we're already setting a trap that we don't want to
# override here.

STRINGS_UDF_PYTEST_RETCODE=0
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitxml="$WORKSPACE/junit-strings-udf.xml" -v --cov-config=.coveragerc --cov=strings_udf --cov-report=xml:"$WORKSPACE/python/strings_udf/strings-udf-coverage.xml" --cov-report term tests || STRINGS_UDF_PYTEST_RETCODE=$?

if [ ${STRINGS_UDF_PYTEST_RETCODE} -eq 5 ]; then
echo "No strings UDF tests were run, but this script will continue to execute."
elif [ ${STRINGS_UDF_PYTEST_RETCODE} -ne 0 ]; then
exit ${STRINGS_UDF_PYTEST_RETCODE}
else
cd "$WORKSPACE/python/cudf/cudf"
gpuci_logger "Python py.test retest cuDF UDFs"
py.test tests/test_udf_masked_ops.py -n 8 --cache-clear
fi

# Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
# Benchmarks are run in DEBUG_ONLY mode, meaning that only small data sizes are used.
# Therefore, these runs only verify that benchmarks are valid.
Expand Down
1 change: 1 addition & 0 deletions ci/gpu/test-notebooks.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

NOTEBOOKS_DIR="$WORKSPACE/notebooks"
NBTEST="$WORKSPACE/ci/utils/nbtest.sh"
Expand Down
1 change: 1 addition & 0 deletions ci/local/build.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
# Copyright (c) 2019-2022, NVIDIA CORPORATION.

GIT_DESCRIBE_TAG=`git describe --tags`
MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
Expand Down
3 changes: 3 additions & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g'
# Python update
sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt

# Strings UDF update
sed_runner 's/'"strings_udf_version .*)"'/'"strings_udf_version ${NEXT_FULL_TAG})"'/g' python/strings_udf/CMakeLists.txt

# cpp libcudf_kafka update
sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt

Expand Down
1 change: 1 addition & 0 deletions ci/utils/nbtest.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

MAGIC_OVERRIDE_CODE="
def my_run_line_magic(*args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ dependencies:
- python>=3.8,<3.10
- numba>=0.56.2
- numpy
- pandas>=1.0,<1.5.0dev0
- pandas>=1.0,<1.6.0dev0
- pyarrow=9
- fastavro>=0.22.9
- python-snappy>=0.6.0
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ requirements:
- protobuf>=3.20.1,<3.21.0a0
- python
- typing_extensions
- pandas >=1.0,<1.5.0dev0
- pandas >=1.0,<1.6.0dev0
- cupy >=9.5.0,<12.0.0a0
- numba >=0.56.2
- numpy
Expand Down
4 changes: 4 additions & 0 deletions conda/recipes/strings_udf/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) 2022, NVIDIA CORPORATION.

# This assumes the script is executed from the root of the repo directory
./build.sh strings_udf
14 changes: 14 additions & 0 deletions conda/recipes/strings_udf/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
c_compiler_version:
- 9

cxx_compiler_version:
- 9

sysroot_version:
- "2.17"

cmake_version:
- ">=3.20.1,!=3.23.0"

cuda_compiler:
- nvcc
65 changes: 65 additions & 0 deletions conda/recipes/strings_udf/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright (c) 2022, NVIDIA CORPORATION.

{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
{% set py_version=environ.get('CONDA_PY', 38) %}
{% set cuda_version='.'.join(environ.get('CUDA', '11.5').split('.')[:2]) %}
{% set cuda_major=cuda_version.split('.')[0] %}

package:
name: strings_udf
version: {{ version }}

source:
git_url: ../../..

build:
number: {{ GIT_DESCRIBE_NUMBER }}
string: cuda_{{ cuda_major }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
script_env:
- VERSION_SUFFIX
- PARALLEL_LEVEL
# libcudf's run_exports pinning is looser than we would like
ignore_run_exports:
- libcudf
ignore_run_exports_from:
- {{ compiler('cuda') }}

requirements:
build:
- cmake {{ cmake_version }}
- {{ compiler('c') }}
- {{ compiler('cxx') }}
- {{ compiler('cuda') }} {{ cuda_version }}
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- python
- cython >=0.29,<0.30
- scikit-build>=0.13.1
- setuptools
- numba >=0.54
- libcudf ={{ version }}
- cudf ={{ version }}
- cudatoolkit ={{ cuda_version }}
run:
- python
- typing_extensions
- numba >=0.54
- numpy
- libcudf ={{ version }}
- cudf ={{ version }}
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
- cachetools
- ptxcompiler # [linux64] # CUDA enhanced compatibility. See https://github.com/rapidsai/ptxcompiler
test: # [linux64]
requires: # [linux64]
- cudatoolkit {{ cuda_version }}.* # [linux64]
imports: # [linux64]
- strings_udf # [linux64]

about:
home: https://rapids.ai/
license: Apache-2.0
license_family: APACHE
license_file: LICENSE
summary: strings_udf library
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ add_library(
src/io/csv/writer_impl.cu
src/io/functions.cpp
src/io/json/json_gpu.cu
src/io/json/json_tree.cu
src/io/json/nested_json_gpu.cu
src/io/json/reader_impl.cu
src/io/json/experimental/read_json.cpp
Expand Down
12 changes: 5 additions & 7 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,10 @@ ConfigureBench(MERGE_BENCH merge/merge.cpp)
ConfigureBench(NULLMASK_BENCH null_mask/set_null_mask.cpp)

# ##################################################################################################
# * parquet writer chunks benchmark ---------------------------------------------------------------
ConfigureBench(PARQUET_WRITER_CHUNKS_BENCH io/parquet/parquet_writer_chunks.cpp)
# * parquet writer benchmark ----------------------------------------------------------------------
ConfigureNVBench(
PARQUET_WRITER_NVBENCH io/parquet/parquet_writer.cpp io/parquet/parquet_writer_chunks.cpp
)

# ##################################################################################################
# * parquet reader benchmark ----------------------------------------------------------------------
Expand All @@ -238,11 +240,7 @@ ConfigureNVBench(ORC_READER_NVBENCH io/orc/orc_reader_input.cpp io/orc/orc_reade

# ##################################################################################################
# * csv reader benchmark --------------------------------------------------------------------------
ConfigureBench(CSV_READER_BENCH io/csv/csv_reader.cpp)

# ##################################################################################################
# * parquet writer benchmark ----------------------------------------------------------------------
ConfigureBench(PARQUET_WRITER_BENCH io/parquet/parquet_writer.cpp)
ConfigureNVBench(CSV_READER_NVBENCH io/csv/csv_reader_input.cpp io/csv/csv_reader_options.cpp)

# ##################################################################################################
# * orc writer benchmark --------------------------------------------------------------------------
Expand Down
Loading

0 comments on commit d9ad59b

Please sign in to comment.