Skip to content

Commit

Permalink
Merge branch 'branch-21.10' of github.com:rapidsai/cudf into fea-enab…
Browse files Browse the repository at this point in the history
…le_compiled_binops
  • Loading branch information
karthikeyann committed Aug 23, 2021
2 parents 62b52d7 + 6cd0167 commit e7e81ce
Show file tree
Hide file tree
Showing 363 changed files with 17,240 additions and 9,107 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,7 @@ dask-worker-space/

# protobuf
**/*_pb2.py

# Sphinx docs & build artifacts
docs/cudf/source/api_docs/generated/*
docs/cudf/source/api_docs/api/*
255 changes: 253 additions & 2 deletions CHANGELOG.md

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,15 @@ Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapids

cuDF can be installed with conda ([miniconda](https://conda.io/miniconda.html), or the full [Anaconda distribution](https://www.anaconda.com/download)) from the `rapidsai` channel:

For `cudf version == 21.06` :
For `cudf version == 21.08` :
```bash
# for CUDA 11.0
conda install -c rapidsai -c nvidia -c numba -c conda-forge \
cudf=21.06 python=3.7 cudatoolkit=11.0
cudf=21.08 python=3.7 cudatoolkit=11.0

# or, for CUDA 11.2
conda install -c rapidsai -c nvidia -c numba -c conda-forge \
cudf=21.06 python=3.7 cudatoolkit=11.2
cudf=21.08 python=3.7 cudatoolkit=11.2

```

Expand Down
77 changes: 51 additions & 26 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,27 @@ ARGS=$*
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h"
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [-l]
clean - remove all existing build artifacts and configuration (start
over)
libcudf - build the cudf C++ code only
cudf - build the cudf Python package
dask_cudf - build the dask_cudf Python package
benchmarks - build benchmarks
tests - build tests
libcudf_kafka - build the libcudf_kafka C++ code only
cudf_kafka - build the cudf_kafka Python package
custreamz - build the custreamz Python package
-v - verbose build mode
-g - build for debug
-n - no install step
-l - build legacy tests
--allgpuarch - build for all supported GPU architectures
--disable_nvtx - disable inserting NVTX profiling ranges
--show_depr_warn - show cmake deprecation warnings
--ptds - enable per-thread default stream
-h | --h[elp] - print this text
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [-l] [--cmake-args=\"<args>\"]
clean - remove all existing build artifacts and configuration (start
over)
libcudf - build the cudf C++ code only
cudf - build the cudf Python package
dask_cudf - build the dask_cudf Python package
benchmarks - build benchmarks
tests - build tests
libcudf_kafka - build the libcudf_kafka C++ code only
cudf_kafka - build the cudf_kafka Python package
custreamz - build the custreamz Python package
-v - verbose build mode
-g - build for debug
-n - no install step
-l - build legacy tests
--allgpuarch - build for all supported GPU architectures
--disable_nvtx - disable inserting NVTX profiling ranges
--show_depr_warn - show cmake deprecation warnings
--ptds - enable per-thread default stream
--cmake-args=\\\"<args>\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument)
-h | --h[elp] - print this text
default action (no args) is to build and install 'libcudf' then 'cudf'
then 'dask_cudf' targets
Expand Down Expand Up @@ -71,6 +72,28 @@ function hasArg {
(( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
}

function cmakeArgs {
# Check for multiple cmake args options
if [[ $(echo $ARGS | { grep -Eo "\-\-cmake\-args" || true; } | wc -l ) -gt 1 ]]; then
echo "Multiple --cmake-args options were provided, please provide only one: ${ARGS}"
exit 1
fi

# Check for cmake args option
if [[ -n $(echo $ARGS | { grep -E "\-\-cmake\-args" || true; } ) ]]; then
# There are possible weird edge cases that may cause this regex filter to output nothing and fail silently
# the true pipe will catch any weird edge cases that may happen and will cause the program to fall back
# on the invalid option error
CMAKE_ARGS=$(echo $ARGS | { grep -Eo "\-\-cmake\-args=\".+\"" || true; })
if [[ -n ${CMAKE_ARGS} ]]; then
# Remove the full CMAKE_ARGS argument from list of args so that it passes validArgs function
ARGS=${ARGS//$CMAKE_ARGS/}
# Filter the full argument down to just the extra string that will be added to cmake call
CMAKE_ARGS=$(echo $CMAKE_ARGS | grep -Eo "\".+\"" | sed -e 's/^"//' -e 's/"$//')
fi
fi
}

function buildAll {
((${NUMARGS} == 0 )) || !(echo " ${ARGS} " | grep -q " [^-]\+ ")
}
Expand All @@ -82,9 +105,11 @@ fi

# Check for valid usage
if (( ${NUMARGS} != 0 )); then
# Check for cmake args
cmakeArgs
for a in ${ARGS}; do
if ! (echo " ${VALIDARGS} " | grep -q " ${a} "); then
echo "Invalid option: ${a}"
echo "Invalid option or formatting, check --help: ${a}"
exit 1
fi
done
Expand Down Expand Up @@ -139,7 +164,6 @@ fi
# Configure, build, and install libcudf

if buildAll || hasArg libcudf; then

if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES="
echo "Building for the architecture of the GPU in the system..."
Expand All @@ -156,7 +180,8 @@ if buildAll || hasArg libcudf; then
-DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \
-DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \
-DPER_THREAD_DEFAULT_STREAM=${BUILD_PER_THREAD_DEFAULT_STREAM} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE}
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
${CMAKE_ARGS}

cd ${LIB_BUILD_DIR}

Expand All @@ -172,8 +197,7 @@ if buildAll || hasArg cudf; then

cd ${REPODIR}/python/cudf
if [[ ${INSTALL_TARGET} != "" ]]; then
PARALLEL_LEVEL=${PARALLEL_LEVEL} python setup.py build_ext --inplace -j${PARALLEL_LEVEL}
python setup.py install --single-version-externally-managed --record=record.txt
PARALLEL_LEVEL=${PARALLEL_LEVEL} python setup.py build_ext -j${PARALLEL_LEVEL} install --single-version-externally-managed --record=record.txt
else
PARALLEL_LEVEL=${PARALLEL_LEVEL} python setup.py build_ext --inplace -j${PARALLEL_LEVEL} --library-dir=${LIBCUDF_BUILD_DIR}
fi
Expand All @@ -196,7 +220,8 @@ if hasArg libcudf_kafka; then
cmake -S $REPODIR/cpp/libcudf_kafka -B ${KAFKA_LIB_BUILD_DIR} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-DBUILD_TESTS=${BUILD_TESTS} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE}
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
${CMAKE_ARGS}


cd ${KAFKA_LIB_BUILD_DIR}
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- numba>=0.53.1
- numpy
- pandas>=1.0,<1.3.0dev0
- pyarrow=4.0.1=*cuda
- pyarrow=5.0.0=*cuda
- fastavro>=0.22.9
- notebook>=0.5.0
- cython>=0.29,<0.30
Expand All @@ -26,7 +26,6 @@ dependencies:
- pytest-benchmark
- pytest-xdist
- sphinx
- sphinx_rtd_theme
- sphinxcontrib-websupport
- nbsphinx
- numpydoc
Expand All @@ -43,7 +42,7 @@ dependencies:
- dask>=2021.6.0
- distributed>=2021.6.0
- streamz
- arrow-cpp=4.0.1
- arrow-cpp=5.0.0
- dlpack>=0.5,<0.6.0a0
- arrow-cpp-proc * cuda
- double-conversion
Expand All @@ -57,6 +56,7 @@ dependencies:
- nvtx>=0.2.1
- cachetools
- transformers
- pydata-sphinx-theme
- pip:
- git+https://github.com/dask/dask.git@main
- git+https://github.com/dask/distributed.git@main
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/cudf_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- numba>=0.53.1
- numpy
- pandas>=1.0,<1.3.0dev0
- pyarrow=4.0.1=*cuda
- pyarrow=5.0.0=*cuda
- fastavro>=0.22.9
- notebook>=0.5.0
- cython>=0.29,<0.30
Expand All @@ -26,7 +26,6 @@ dependencies:
- pytest-benchmark
- pytest-xdist
- sphinx
- sphinx_rtd_theme
- sphinxcontrib-websupport
- nbsphinx
- numpydoc
Expand All @@ -43,7 +42,7 @@ dependencies:
- dask>=2021.6.0
- distributed>=2021.6.0
- streamz
- arrow-cpp=4.0.1
- arrow-cpp=5.0.0
- dlpack>=0.5,<0.6.0a0
- arrow-cpp-proc * cuda
- double-conversion
Expand All @@ -57,6 +56,7 @@ dependencies:
- nvtx>=0.2.1
- cachetools
- transformers
- pydata-sphinx-theme
- pip:
- git+https://github.com/dask/dask.git@main
- git+https://github.com/dask/distributed.git@main
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ requirements:
- setuptools
- numba >=0.53.1
- dlpack>=0.5,<0.6.0a0
- pyarrow 4.0.1 *cuda
- pyarrow 5.0.0 *cuda
- libcudf {{ version }}
- rmm {{ minor_version }}
- cudatoolkit {{ cuda_version }}
Expand Down
9 changes: 4 additions & 5 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ requirements:
host:
- librmm {{ minor_version }}.*
- cudatoolkit {{ cuda_version }}.*
- arrow-cpp 4.0.1 *cuda
- arrow-cpp 5.0.0 *cuda
- arrow-cpp-proc * cuda
- dlpack>=0.5,<0.6.0a0
run:
Expand All @@ -51,11 +51,9 @@ test:
- test -f $PREFIX/lib/libcudf.so
- test -f $PREFIX/lib/libcudftestutil.a
- test -f $PREFIX/include/cudf/aggregation.hpp
- test -f $PREFIX/include/cudf/ast/transform.hpp
- test -f $PREFIX/include/cudf/ast/detail/linearizer.hpp
- test -f $PREFIX/include/cudf/ast/detail/expression_parser.hpp
- test -f $PREFIX/include/cudf/ast/detail/operators.hpp
- test -f $PREFIX/include/cudf/ast/nodes.hpp
- test -f $PREFIX/include/cudf/ast/operators.hpp
- test -f $PREFIX/include/cudf/ast/expressions.hpp
- test -f $PREFIX/include/cudf/binaryop.hpp
- test -f $PREFIX/include/cudf/labeling/label_bins.hpp
- test -f $PREFIX/include/cudf/column/column_factories.hpp
Expand Down Expand Up @@ -102,6 +100,7 @@ test:
- test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
- test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h
- test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp
- test -f $PREFIX/include/cudf/detail/utilities/visitor_overload.hpp
- test -f $PREFIX/include/cudf/dictionary/detail/concatenate.hpp
- test -f $PREFIX/include/cudf/dictionary/detail/encode.hpp
- test -f $PREFIX/include/cudf/dictionary/detail/merge.hpp
Expand Down
47 changes: 26 additions & 21 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "")
set(CUDF_BUILD_FOR_DETECTED_ARCHS TRUE)
endif()

file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-21.10/RAPIDS.cmake
${CMAKE_BINARY_DIR}/RAPIDS.cmake)
include(${CMAKE_BINARY_DIR}/RAPIDS.cmake)

include(rapids-cmake)
include(rapids-cpm)
include(rapids-cuda)
include(rapids-export)
include(rapids-find)


project(CUDF VERSION 21.10.00 LANGUAGES C CXX)

# Needed because GoogleBenchmark changes the state of FindThreads.cmake,
Expand All @@ -44,6 +55,7 @@ option(BUILD_BENCHMARKS "Configure CMake to build (google & nvbench) benchmarks"
option(BUILD_SHARED_LIBS "Build cuDF shared libraries" ON)
option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON)
option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF)
option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF)
option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF)
option(CUDF_ENABLE_ARROW_PARQUET "Find (or build) Arrow with Parquet support" OFF)
option(CUDF_ENABLE_ARROW_S3 "Build/Enable AWS S3 Arrow filesystem support" ON)
Expand Down Expand Up @@ -137,6 +149,9 @@ include(cmake/thirdparty/CUDF_GetArrow.cmake)
include(cmake/thirdparty/CUDF_GetDLPack.cmake)
# find libcu++
include(cmake/thirdparty/CUDF_GetLibcudacxx.cmake)
# find cuCollections
# Should come after including thrust and libcudacxx
include(cmake/thirdparty/CUDF_GetcuCollections.cmake)
# find or install GoogleTest
include(cmake/thirdparty/CUDF_GetGTest.cmake)
# preprocess jitify-able kernels
Expand All @@ -151,8 +166,8 @@ add_library(cudf
src/aggregation/aggregation.cpp
src/aggregation/aggregation.cu
src/aggregation/result_cache.cpp
src/ast/linearizer.cpp
src/ast/transform.cu
src/ast/expression_parser.cpp
src/ast/expressions.cpp
src/binaryop/binaryop.cpp
src/binaryop/compiled/binary_ops.cu
src/binaryop/compiled/Add.cu
Expand Down Expand Up @@ -255,6 +270,7 @@ add_library(cudf
src/interop/dlpack.cpp
src/interop/from_arrow.cu
src/interop/to_arrow.cu
src/interop/detail/arrow_allocator.cpp
src/io/avro/avro.cpp
src/io/avro/avro_gpu.cu
src/io/avro/reader_impl.cu
Expand Down Expand Up @@ -283,7 +299,7 @@ add_library(cudf
src/io/orc/writer_impl.cu
src/io/parquet/compact_protocol_writer.cpp
src/io/parquet/page_data.cu
src/io/parquet/page_dict.cu
src/io/parquet/chunk_dict.cu
src/io/parquet/page_enc.cu
src/io/parquet/page_hdr.cu
src/io/parquet/parquet.cpp
Expand All @@ -305,6 +321,7 @@ add_library(cudf
src/join/cross_join.cu
src/join/hash_join.cu
src/join/join.cu
src/join/join_utils.cu
src/join/semi_join.cu
src/lists/contains.cu
src/lists/combine/concatenate_list_elements.cu
Expand Down Expand Up @@ -436,6 +453,7 @@ add_library(cudf
src/text/subword/wordpiece_tokenizer.cu
src/text/tokenize.cu
src/transform/bools_to_mask.cu
src/transform/compute_column.cu
src/transform/encode.cu
src/transform/mask_to_bools.cu
src/transform/nans_to_nulls.cu
Expand Down Expand Up @@ -523,7 +541,8 @@ target_link_libraries(cudf
PUBLIC ZLIB::ZLIB
${ARROW_LIBRARIES}
cudf::Thrust
rmm::rmm)
rmm::rmm
PRIVATE cuco::cuco)

if(CUDA_STATIC_RUNTIME)
# Tell CMake what CUDA language runtime to use
Expand Down Expand Up @@ -628,9 +647,11 @@ endif()
###################################################################################################
# - install targets -------------------------------------------------------------------------------

include(CPack)

include(GNUInstallDirs)

set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/cudf)
set(INSTALL_CONFIGDIR lib/cmake/cudf)
set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME cudf)

# install target for cudf_base and the proxy libcudf.so
Expand Down Expand Up @@ -679,22 +700,6 @@ configure_package_config_file(cmake/cudf-build-config.cmake.in ${CUDF_BINARY_DIR
write_basic_package_version_file(${CUDF_BINARY_DIR}/cudf-config-version.cmake
COMPATIBILITY SameMinorVersion)

if(TARGET arrow_shared)
get_target_property(arrow_is_imported arrow_shared IMPORTED)
if(NOT arrow_is_imported)
export(TARGETS arrow_shared arrow_cuda_shared
FILE ${CUDF_BINARY_DIR}/cudf-arrow-targets.cmake
NAMESPACE cudf::)
endif()
elseif(TARGET arrow_static)
get_target_property(arrow_is_imported arrow_static IMPORTED)
if(NOT arrow_is_imported)
export(TARGETS arrow_static arrow_cuda_static
FILE ${CUDF_BINARY_DIR}/cudf-arrow-targets.cmake
NAMESPACE cudf::)
endif()
endif()

if(TARGET gtest)
get_target_property(gtest_is_imported gtest IMPORTED)
if(NOT gtest_is_imported)
Expand Down
Loading

0 comments on commit e7e81ce

Please sign in to comment.