Skip to content

Commit

Permalink
Merge pull request #4714 from rapidsai/branch-0.13
Browse files Browse the repository at this point in the history
  • Loading branch information
raydouglass committed Mar 30, 2020
2 parents 8d7bf34 + 6158033 commit 19f5174
Show file tree
Hide file tree
Showing 770 changed files with 64,736 additions and 19,703 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
python/cudf/cudf/_version.py export-subst
CHANGELOG.md merge=union
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ cpp/ @rapidsai/cudf-cpp-codeowners

#python code owners
python/ @rapidsai/cudf-python-codeowners
notebooks/ @rapidsai/cudf-python-codeowners
python/dask_cudf/ @rapidsai/cudf-dask-codeowners

#cmake code owners
Expand Down
35 changes: 35 additions & 0 deletions .github/workflows/new-issues-to-triage-projects.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Auto Assign New Issues to Triage Project

on:
issues:
types: [opened]

env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

jobs:
assign_one_project:
runs-on: ubuntu-latest
name: Assign to New Issues to Triage Project
steps:
- name: Process bug issues
uses: docker://takanabe/github-actions-automate-projects:v0.0.1
if: contains(github.event.issue.labels.*.name, 'bug') && contains(github.event.issue.labels.*.name, '? - Needs Triage')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_PROJECT_URL: https://github.com/rapidsai/cudf/projects/1
GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing'
- name: Process feature issues
uses: docker://takanabe/github-actions-automate-projects:v0.0.1
if: contains(github.event.issue.labels.*.name, 'feature request') && contains(github.event.issue.labels.*.name, '? - Needs Triage')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_PROJECT_URL: https://github.com/rapidsai/cudf/projects/9
GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing'
- name: Process other issues
uses: docker://takanabe/github-actions-automate-projects:v0.0.1
if: contains(github.event.issue.labels.*.name, '? - Needs Triage') && (!contains(github.event.issue.labels.*.name, 'bug') && !contains(github.event.issue.labels.*.name, 'feature request'))
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_PROJECT_URL: https://github.com/rapidsai/cudf/projects/10
GITHUB_PROJECT_COLUMN_NAME: 'Needs prioritizing'
13 changes: 13 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,15 @@ DartConfiguration.tcl
.DS_Store
*.manifest
*.spec
.nfs*

## Python build directories & artifacts
dask-worker-space/
dist/
cudf.egg-info/
python/build
python/*/build
python/cudf/cudf-coverage.xml
python/cudf/*/_lib/**/*.cpp
python/cudf/*/_lib/**/*.h
python/cudf/*/_lib/.nfs*
Expand All @@ -28,6 +31,7 @@ python/cudf/*/_libxx/**/*.h
python/cudf/*/_libxx/.nfs*
python/cudf/*.ipynb
python/cudf/.ipynb_checkpoints
python/nvstrings/nvstrings-coverage.xml
python/*/record.txt
.Python
env/
Expand Down Expand Up @@ -55,8 +59,11 @@ htmlcov/
.cache
nosetests.xml
coverage.xml
junit-cudf.xml
junit-nvstrings.xml
*.cover
.hypothesis/
test-results

## Patching
*.diff
Expand Down Expand Up @@ -142,3 +149,9 @@ ENV/

# mypy
.mypy_cache/

## VSCode IDE
.vscode

# Dask
dask-worker-space/
326 changes: 318 additions & 8 deletions CHANGELOG.md

Large diffs are not rendered by default.

14 changes: 9 additions & 5 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,12 @@ git submodule update --init --remote --recursive
# create the conda environment (assuming in base `cudf` directory)
conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda10.0.yml
# activate the environment
source activate cudf_dev
conda activate cudf_dev
```
- If you're using CUDA 9.2, you will need to create the environment with `conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda9.2.yml` instead.
- If using CUDA 9.2, create the environment with `conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda9.2.yml` instead.
- For other CUDA versions, check the corresponding cudf_dev_cuda*.yml file in conda/environments

- Build and install `libcudf`. CMake depends on the `nvcc` executable being on your path or defined in `$CUDACXX`.
- Build and install `libcudf` after its dependencies. CMake depends on the `nvcc` executable being on your path or defined in `$CUDACXX`.
```bash
$ cd $CUDF_HOME/cpp # navigate to C/C++ CUDA source root directory
$ mkdir build # make a build directory
Expand All @@ -173,15 +174,18 @@ $ cd build # ente
# -DCMAKE_INSTALL_PREFIX set to the install path for your libraries or $CONDA_PREFIX if you're using Anaconda, i.e. -DCMAKE_INSTALL_PREFIX=/install/path or -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX
# -DCMAKE_CXX11_ABI set to ON or OFF depending on the ABI version you want, defaults to ON. When turned ON, ABI compability for C++11 is used. When OFF, pre-C++11 ABI compability is used.
$ cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_CXX11_ABI=ON # configure cmake ...

$ make -j # compile the libraries librmm.so, libcudf.so ... '-j' will start a parallel job using the number of physical cores available on your system
$ make install # install the libraries librmm.so, libcudf.so to the CMAKE_INSTALL_PREFIX
```

- As a convenience, a `build.sh` script is provided in `$CUDF_HOME`. To execute the same build commands above, run the script as shown below. Note that the libraries will be installed to the location set in `$INSTALL_PREFIX` if set (i.e. `export INSTALL_PREFIX=/install/path`), otherwise to `$CONDA_PREFIX`.
```bash
$ cd $CUDF_HOME
$ ./build.sh libcudf # compile the cuDF libraries and install them to $INSTALL_PREFIX if set, otherwise $CONDA_PREFIX
$ ./build.sh # To build both C++ and Python cuDF versions with their dependencies
```
- To build only the C++ component with the script
```bash
$ ./build.sh libnvstrings libcudf # Build only the cuDF C++ components and install them to $INSTALL_PREFIX if set, otherwise $CONDA_PREFIX
```

- To run tests (Optional):
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# <div align="left"><img src="img/rapids_logo.png" width="90px"/>&nbsp;cuDF - GPU DataFrames</div>

[![Build Status](https://gpuci.gpuopenanalytics.com/buildStatus/icon?job=gpuCI%2Fcudf%2Fbranches%2Fcudf-gpu-branch-0.12)](https://gpuci.gpuopenanalytics.com/job/gpuCI/job/cudf/job/branches/job/cudf-gpu-branch-0.12/)
[![Build Status](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/branches/job/cudf-branch-pipeline/badge/icon)](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/branches/job/cudf-branch-pipeline/)

**NOTE:** For the latest stable [README.md](https://github.com/rapidsai/cudf/blob/master/README.md) ensure you are on the `master` branch.

Expand Down
71 changes: 52 additions & 19 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,25 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libnvstrings nvstrings libcudf cudf dask_cudf benchmarks -v -g -n --allgpuarch -h"
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [-v] [-g] [-n] [-h]
clean - remove all existing build artifacts and configuration (start
over)
libnvstrings - build the nvstrings C++ code only
nvstrings - build the nvstrings Python package
libcudf - build the cudf C++ code only
cudf - build the cudf Python package
dask_cudf - build the dask_cudf Python package
benchmarks - build benchmarks
-v - verbose build mode
-g - build for debug
-n - no install step
--allgpuarch - build for all supported GPU architectures
-h - print this text
VALIDARGS="clean libnvstrings nvstrings libcudf cudf dask_cudf benchmarks tests -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn -h"
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [-v] [-g] [-n] [-h] [-l]
clean - remove all existing build artifacts and configuration (start
over)
libnvstrings - build the nvstrings C++ code only
nvstrings - build the nvstrings Python package
libcudf - build the cudf C++ code only
cudf - build the cudf Python package
dask_cudf - build the dask_cudf Python package
benchmarks - build benchmarks
tests - build tests
-v - verbose build mode
-g - build for debug
-n - no install step
-l - build legacy tests
--allgpuarch - build for all supported GPU architectures
--disable_nvtx - disable inserting NVTX profiling ranges
--show_depr_warn - show cmake deprecation warnings
-h - print this text
default action (no args) is to build and install 'libnvstrings' then
'nvstrings' then 'libcudf' then 'cudf' then 'dask_cudf' targets
Expand All @@ -49,6 +53,10 @@ BUILD_TYPE=Release
INSTALL_TARGET=install
BENCHMARKS=OFF
BUILD_ALL_GPU_ARCH=0
BUILD_NVTX=ON
BUILD_TESTS=OFF
BUILD_LEGACY_TESTS=OFF
BUILD_DISABLE_DEPRECATION_WARNING=ON

# Set defaults for vars that may not have been defined externally
# FIXME: if INSTALL_PREFIX is not set, check PREFIX, then check
Expand Down Expand Up @@ -88,12 +96,26 @@ if hasArg -g; then
fi
if hasArg -n; then
INSTALL_TARGET=""
LIBCUDF_BUILD_DIR=${LIB_BUILD_DIR}
LIBNVSTRINGS_BUILD_DIR=${LIB_BUILD_DIR}
fi
if hasArg -l; then
BUILD_LEGACY_TESTS=ON
fi
if hasArg --allgpuarch; then
BUILD_ALL_GPU_ARCH=1
fi
if hasArg benchmarks; then
BENCHMARKS=ON
BENCHMARKS="ON"
fi
if hasArg tests; then
BUILD_TESTS=ON
fi
if hasArg --disable_nvtx; then
BUILD_NVTX="OFF"
fi
if hasArg --show_depr_warn; then
BUILD_DISABLE_DEPRECATION_WARNING=OFF
fi

# If clean given, run it prior to any other steps
Expand Down Expand Up @@ -128,7 +150,10 @@ if buildAll || hasArg libnvstrings || hasArg libcudf; then
cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-DCMAKE_CXX11_ABI=ON \
${GPU_ARCH} \
-DUSE_NVTX=${BUILD_NVTX} \
-DBUILD_BENCHMARKS=${BENCHMARKS} \
-DBUILD_LEGACY_TESTS=${BUILD_LEGACY_TESTS} \
-DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} ..
fi

Expand All @@ -140,6 +165,10 @@ if buildAll || hasArg libnvstrings; then
else
make -j${PARALLEL_LEVEL} nvstrings VERBOSE=${VERBOSE}
fi

if [[ ${BUILD_TESTS} == "ON" ]]; then
make -j${PARALLEL_LEVEL} build_tests_nvstrings VERBOSE=${VERBOSE}
fi
fi

# Build and install the nvstrings Python package
Expand All @@ -150,7 +179,7 @@ if buildAll || hasArg nvstrings; then
python setup.py build_ext
python setup.py install --single-version-externally-managed --record=record.txt
else
python setup.py build_ext --library-dir=${LIBNVSTRINGS_BUILD_DIR}
python setup.py build_ext --build-lib=${PWD} --library-dir=${LIBNVSTRINGS_BUILD_DIR}
fi
fi

Expand All @@ -163,17 +192,21 @@ if buildAll || hasArg libcudf; then
else
make -j${PARALLEL_LEVEL} cudf VERBOSE=${VERBOSE}
fi

if [[ ${BUILD_TESTS} == "ON" ]]; then
make -j${PARALLEL_LEVEL} build_tests_cudf VERBOSE=${VERBOSE}
fi
fi

# Build and install the cudf Python package
if buildAll || hasArg cudf; then

cd ${REPODIR}/python/cudf
if [[ ${INSTALL_TARGET} != "" ]]; then
python setup.py build_ext --inplace
PARALLEL_LEVEL=${PARALLEL_LEVEL} python setup.py build_ext --inplace
python setup.py install --single-version-externally-managed --record=record.txt
else
python setup.py build_ext --inplace --library-dir=${LIBCUDF_BUILD_DIR}
PARALLEL_LEVEL=${PARALLEL_LEVEL} python setup.py build_ext --inplace --library-dir=${LIBCUDF_BUILD_DIR}
fi
fi

Expand Down
2 changes: 1 addition & 1 deletion ci/cpu/prebuild.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash

#Upload cudf once per PYTHON
if [[ "$CUDA" == "9.2" ]]; then
if [[ "$CUDA" == "10.0" ]]; then
export UPLOAD_CUDF=1
else
export UPLOAD_CUDF=0
Expand Down
28 changes: 18 additions & 10 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ logger "Activate conda env..."
source activate gdf
conda install "rmm=$MINOR_VERSION.*" "cudatoolkit=$CUDA_REL" \
"dask>=2.1.0" "distributed>=2.1.0" "numpy>=1.16" "double-conversion" \
"rapidjson" "flatbuffers" "boost-cpp" "fsspec>=0.3.3" "dlpack" \
"rapidjson" "flatbuffers" "boost-cpp" "fsspec>=0.3.3,<0.7.0a0" "dlpack" \
"feather-format" "cupy>=6.6.0,<8.0.0a0,!=7.1.0" "arrow-cpp=0.15.0" "pyarrow=0.15.0" \
"fastavro>=0.22.0" "pandas>=0.25,<0.26" "hypothesis" "s3fs" "gcsfs" \
"boto3" "moto" "httpretty" "streamz"
"boto3" "moto" "httpretty" "streamz" "ipython=7.3*" "jupyterlab"

# Install the master version of dask, distributed, and streamz
logger "pip install git+https://github.com/dask/distributed.git --upgrade --no-deps"
Expand All @@ -83,7 +83,11 @@ conda list
################################################################################

logger "Build libcudf..."
$WORKSPACE/build.sh clean libnvstrings nvstrings libcudf cudf dask_cudf benchmarks
if [[ ${BUILD_MODE} == "pull-request" ]]; then
$WORKSPACE/build.sh clean libnvstrings nvstrings libcudf cudf dask_cudf benchmarks tests
else
$WORKSPACE/build.sh clean libnvstrings nvstrings libcudf cudf dask_cudf benchmarks tests -l
fi

################################################################################
# TEST - Run GoogleTest and py.tests for libnvstrings, nvstrings, libcudf, and
Expand All @@ -96,20 +100,22 @@ else
logger "Check GPU usage..."
nvidia-smi

logger "GoogleTest for libnvstrings..."
logger "GoogleTests..."
cd $WORKSPACE/cpp/build
GTEST_OUTPUT="xml:${WORKSPACE}/test-results/" make -j${PARALLEL_LEVEL} test_nvstrings

logger "GoogleTest for libcudf..."
cd $WORKSPACE/cpp/build
GTEST_OUTPUT="xml:${WORKSPACE}/test-results/" make -j${PARALLEL_LEVEL} test_cudf
for gt in ${WORKSPACE}/cpp/build/gtests/* ; do
test_name=$(basename ${gt})
echo "Running GoogleTest $test_name"
${gt} --gtest_output=xml:${WORKSPACE}/test-results/
done


# set environment variable for numpy 1.16
# will be enabled for later versions by default
np_ver=$(python -c "import numpy; print('.'.join(numpy.__version__.split('.')[:-1]))")
if [ "$np_ver" == "1.16" ];then
logger "export NUMPY_EXPERIMENTAL_ARRAY_FUNCTION=1"
export NUMPY_EXPERIMENTAL_ARRAY_FUNCTION=1
logger "export NUMPY_EXPERIMENTAL_ARRAY_FUNCTION=1"
export NUMPY_EXPERIMENTAL_ARRAY_FUNCTION=1
fi

cd $WORKSPACE/python/nvstrings
Expand All @@ -128,4 +134,6 @@ else
logger "Python py.test for cuStreamz..."
py.test --cache-clear --junitxml=${WORKSPACE}/junit-custreamz.xml -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:${WORKSPACE}/python/custreamz/custreamz-coverage.xml --cov-report term

${WORKSPACE}/ci/gpu/test-notebooks.sh 2>&1 | tee nbtest.log
python ${WORKSPACE}/ci/utils/nbtestlog2junitxml.py nbtest.log
fi
47 changes: 47 additions & 0 deletions ci/gpu/test-notebooks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash

NOTEBOOKS_DIR=${WORKSPACE}/notebooks
NBTEST=${WORKSPACE}/ci/utils/nbtest.sh
LIBCUDF_KERNEL_CACHE_PATH=${WORKSPACE}/.jitcache

cd ${NOTEBOOKS_DIR}
TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u)

# Add notebooks that should be skipped here
# (space-separated list of filenames without paths)

SKIPNBS=""

## Check env
env

EXITCODE=0

# Always run nbtest in all TOPLEVEL_NB_FOLDERS, set EXITCODE to failure
# if any run fails

cd ${NOTEBOOKS_DIR}
for nb in $(find . -name "*.ipynb"); do
nbBasename=$(basename ${nb})
# Skip all NBs that use dask (in the code or even in their name)
if ((echo ${nb}|grep -qi dask) || \
(grep -q dask ${nb})); then
echo "--------------------------------------------------------------------------------"
echo "SKIPPING: ${nb} (suspected Dask usage, not currently automatable)"
echo "--------------------------------------------------------------------------------"
elif (echo " ${SKIPNBS} " | grep -q " ${nbBasename} "); then
echo "--------------------------------------------------------------------------------"
echo "SKIPPING: ${nb} (listed in skip list)"
echo "--------------------------------------------------------------------------------"
else
nvidia-smi
${NBTEST} ${nbBasename}
EXITCODE=$((EXITCODE | $?))
rm -rf ${LIBCUDF_KERNEL_CACHE_PATH}/*
fi
done


nvidia-smi

exit ${EXITCODE}
Loading

0 comments on commit 19f5174

Please sign in to comment.