Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Aug 17, 2021
2 parents 364abe1 + 3402fec commit 8b10894
Show file tree
Hide file tree
Showing 177 changed files with 6,213 additions and 2,224 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,7 @@ dask-worker-space/

# protobuf
**/*_pb2.py

# Sphinx docs & build artifacts
docs/cudf/source/api_docs/generated/*
docs/cudf/source/api_docs/api/*
255 changes: 253 additions & 2 deletions CHANGELOG.md

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,15 @@ Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapids

cuDF can be installed with conda ([miniconda](https://conda.io/miniconda.html), or the full [Anaconda distribution](https://www.anaconda.com/download)) from the `rapidsai` channel:

For `cudf version == 21.06` :
For `cudf version == 21.08` :
```bash
# for CUDA 11.0
conda install -c rapidsai -c nvidia -c numba -c conda-forge \
cudf=21.06 python=3.7 cudatoolkit=11.0
cudf=21.08 python=3.7 cudatoolkit=11.0

# or, for CUDA 11.2
conda install -c rapidsai -c nvidia -c numba -c conda-forge \
cudf=21.06 python=3.7 cudatoolkit=11.2
cudf=21.08 python=3.7 cudatoolkit=11.2

```

Expand Down
77 changes: 51 additions & 26 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,27 @@ ARGS=$*
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h"
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [-l]
clean - remove all existing build artifacts and configuration (start
over)
libcudf - build the cudf C++ code only
cudf - build the cudf Python package
dask_cudf - build the dask_cudf Python package
benchmarks - build benchmarks
tests - build tests
libcudf_kafka - build the libcudf_kafka C++ code only
cudf_kafka - build the cudf_kafka Python package
custreamz - build the custreamz Python package
-v - verbose build mode
-g - build for debug
-n - no install step
-l - build legacy tests
--allgpuarch - build for all supported GPU architectures
--disable_nvtx - disable inserting NVTX profiling ranges
--show_depr_warn - show cmake deprecation warnings
--ptds - enable per-thread default stream
-h | --h[elp] - print this text
HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [-l] [--cmake-args=\"<args>\"]
clean - remove all existing build artifacts and configuration (start
over)
libcudf - build the cudf C++ code only
cudf - build the cudf Python package
dask_cudf - build the dask_cudf Python package
benchmarks - build benchmarks
tests - build tests
libcudf_kafka - build the libcudf_kafka C++ code only
cudf_kafka - build the cudf_kafka Python package
custreamz - build the custreamz Python package
-v - verbose build mode
-g - build for debug
-n - no install step
-l - build legacy tests
--allgpuarch - build for all supported GPU architectures
--disable_nvtx - disable inserting NVTX profiling ranges
--show_depr_warn - show cmake deprecation warnings
--ptds - enable per-thread default stream
--cmake-args=\\\"<args>\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument)
-h | --h[elp] - print this text
default action (no args) is to build and install 'libcudf' then 'cudf'
then 'dask_cudf' targets
Expand Down Expand Up @@ -71,6 +72,28 @@ function hasArg {
(( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
}

function cmakeArgs {
# Check for multiple cmake args options
if [[ $(echo $ARGS | { grep -Eo "\-\-cmake\-args" || true; } | wc -l ) -gt 1 ]]; then
echo "Multiple --cmake-args options were provided, please provide only one: ${ARGS}"
exit 1
fi

# Check for cmake args option
if [[ -n $(echo $ARGS | { grep -E "\-\-cmake\-args" || true; } ) ]]; then
# There are possible weird edge cases that may cause this regex filter to output nothing and fail silently
# the true pipe will catch any weird edge cases that may happen and will cause the program to fall back
# on the invalid option error
CMAKE_ARGS=$(echo $ARGS | { grep -Eo "\-\-cmake\-args=\".+\"" || true; })
if [[ -n ${CMAKE_ARGS} ]]; then
# Remove the full CMAKE_ARGS argument from list of args so that it passes validArgs function
ARGS=${ARGS//$CMAKE_ARGS/}
# Filter the full argument down to just the extra string that will be added to cmake call
CMAKE_ARGS=$(echo $CMAKE_ARGS | grep -Eo "\".+\"" | sed -e 's/^"//' -e 's/"$//')
fi
fi
}

function buildAll {
((${NUMARGS} == 0 )) || !(echo " ${ARGS} " | grep -q " [^-]\+ ")
}
Expand All @@ -82,9 +105,11 @@ fi

# Check for valid usage
if (( ${NUMARGS} != 0 )); then
# Check for cmake args
cmakeArgs
for a in ${ARGS}; do
if ! (echo " ${VALIDARGS} " | grep -q " ${a} "); then
echo "Invalid option: ${a}"
echo "Invalid option or formatting, check --help: ${a}"
exit 1
fi
done
Expand Down Expand Up @@ -139,7 +164,6 @@ fi
# Configure, build, and install libcudf

if buildAll || hasArg libcudf; then

if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES="
echo "Building for the architecture of the GPU in the system..."
Expand All @@ -156,7 +180,8 @@ if buildAll || hasArg libcudf; then
-DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \
-DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \
-DPER_THREAD_DEFAULT_STREAM=${BUILD_PER_THREAD_DEFAULT_STREAM} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE}
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
${CMAKE_ARGS}

cd ${LIB_BUILD_DIR}

Expand All @@ -172,8 +197,7 @@ if buildAll || hasArg cudf; then

cd ${REPODIR}/python/cudf
if [[ ${INSTALL_TARGET} != "" ]]; then
PARALLEL_LEVEL=${PARALLEL_LEVEL} python setup.py build_ext --inplace -j${PARALLEL_LEVEL}
python setup.py install --single-version-externally-managed --record=record.txt
PARALLEL_LEVEL=${PARALLEL_LEVEL} python setup.py build_ext -j${PARALLEL_LEVEL} install --single-version-externally-managed --record=record.txt
else
PARALLEL_LEVEL=${PARALLEL_LEVEL} python setup.py build_ext --inplace -j${PARALLEL_LEVEL} --library-dir=${LIBCUDF_BUILD_DIR}
fi
Expand All @@ -196,7 +220,8 @@ if hasArg libcudf_kafka; then
cmake -S $REPODIR/cpp/libcudf_kafka -B ${KAFKA_LIB_BUILD_DIR} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-DBUILD_TESTS=${BUILD_TESTS} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE}
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
${CMAKE_ARGS}


cd ${KAFKA_LIB_BUILD_DIR}
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- numba>=0.53.1
- numpy
- pandas>=1.0,<1.3.0dev0
- pyarrow=4.0.1=*cuda
- pyarrow=5.0.0=*cuda
- fastavro>=0.22.9
- notebook>=0.5.0
- cython>=0.29,<0.30
Expand All @@ -26,7 +26,6 @@ dependencies:
- pytest-benchmark
- pytest-xdist
- sphinx
- sphinx_rtd_theme
- sphinxcontrib-websupport
- nbsphinx
- numpydoc
Expand All @@ -43,7 +42,7 @@ dependencies:
- dask>=2021.6.0
- distributed>=2021.6.0
- streamz
- arrow-cpp=4.0.1
- arrow-cpp=5.0.0
- dlpack>=0.5,<0.6.0a0
- arrow-cpp-proc * cuda
- double-conversion
Expand All @@ -57,6 +56,7 @@ dependencies:
- nvtx>=0.2.1
- cachetools
- transformers
- pydata-sphinx-theme
- pip:
- git+https://github.com/dask/dask.git@main
- git+https://github.com/dask/distributed.git@main
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/cudf_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- numba>=0.53.1
- numpy
- pandas>=1.0,<1.3.0dev0
- pyarrow=4.0.1=*cuda
- pyarrow=5.0.0=*cuda
- fastavro>=0.22.9
- notebook>=0.5.0
- cython>=0.29,<0.30
Expand All @@ -26,7 +26,6 @@ dependencies:
- pytest-benchmark
- pytest-xdist
- sphinx
- sphinx_rtd_theme
- sphinxcontrib-websupport
- nbsphinx
- numpydoc
Expand All @@ -43,7 +42,7 @@ dependencies:
- dask>=2021.6.0
- distributed>=2021.6.0
- streamz
- arrow-cpp=4.0.1
- arrow-cpp=5.0.0
- dlpack>=0.5,<0.6.0a0
- arrow-cpp-proc * cuda
- double-conversion
Expand All @@ -57,6 +56,7 @@ dependencies:
- nvtx>=0.2.1
- cachetools
- transformers
- pydata-sphinx-theme
- pip:
- git+https://github.com/dask/dask.git@main
- git+https://github.com/dask/distributed.git@main
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ requirements:
- setuptools
- numba >=0.53.1
- dlpack>=0.5,<0.6.0a0
- pyarrow 4.0.1 *cuda
- pyarrow 5.0.0 *cuda
- libcudf {{ version }}
- rmm {{ minor_version }}
- cudatoolkit {{ cuda_version }}
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ requirements:
host:
- librmm {{ minor_version }}.*
- cudatoolkit {{ cuda_version }}.*
- arrow-cpp 4.0.1 *cuda
- arrow-cpp 5.0.0 *cuda
- arrow-cpp-proc * cuda
- dlpack>=0.5,<0.6.0a0
run:
Expand Down
21 changes: 4 additions & 17 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ option(BUILD_BENCHMARKS "Configure CMake to build (google & nvbench) benchmarks"
option(BUILD_SHARED_LIBS "Build cuDF shared libraries" ON)
option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON)
option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF)
option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF)
option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF)
option(CUDF_ENABLE_ARROW_PARQUET "Find (or build) Arrow with Parquet support" OFF)
option(CUDF_ENABLE_ARROW_S3 "Build/Enable AWS S3 Arrow filesystem support" ON)
Expand Down Expand Up @@ -629,9 +630,11 @@ endif()
###################################################################################################
# - install targets -------------------------------------------------------------------------------

include(CPack)

include(GNUInstallDirs)

set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/cudf)
set(INSTALL_CONFIGDIR lib/cmake/cudf)
set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME cudf)

# install target for cudf_base and the proxy libcudf.so
Expand Down Expand Up @@ -680,22 +683,6 @@ configure_package_config_file(cmake/cudf-build-config.cmake.in ${CUDF_BINARY_DIR
write_basic_package_version_file(${CUDF_BINARY_DIR}/cudf-config-version.cmake
COMPATIBILITY SameMinorVersion)

if(TARGET arrow_shared)
get_target_property(arrow_is_imported arrow_shared IMPORTED)
if(NOT arrow_is_imported)
export(TARGETS arrow_shared arrow_cuda_shared
FILE ${CUDF_BINARY_DIR}/cudf-arrow-targets.cmake
NAMESPACE cudf::)
endif()
elseif(TARGET arrow_static)
get_target_property(arrow_is_imported arrow_static IMPORTED)
if(NOT arrow_is_imported)
export(TARGETS arrow_static arrow_cuda_static
FILE ${CUDF_BINARY_DIR}/cudf-arrow-targets.cmake
NAMESPACE cudf::)
endif()
endif()

if(TARGET gtest)
get_target_property(gtest_is_imported gtest IMPORTED)
if(NOT gtest_is_imported)
Expand Down
3 changes: 3 additions & 0 deletions cpp/cmake/cudf-build-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ else()
if (NOT DEFINED CUDF_ENABLE_ARROW_S3)
set(CUDF_ENABLE_ARROW_S3 OFF)
endif()
if (NOT DEFINED CUDF_ENABLE_ARROW_ORC)
set(CUDF_ENABLE_ARROW_ORC OFF)
endif()
if (NOT DEFINED CUDF_ENABLE_ARROW_PYTHON)
set(CUDF_ENABLE_ARROW_PYTHON OFF)
endif()
Expand Down
48 changes: 40 additions & 8 deletions cpp/cmake/thirdparty/CUDF_GetArrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,25 @@
# limitations under the License.
#=============================================================================

function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_PYTHON ENABLE_PARQUET)
function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENABLE_PYTHON ENABLE_PARQUET)

if(BUILD_STATIC)
if(TARGET arrow_static AND TARGET arrow_cuda_static)
list(APPEND ARROW_LIBRARIES arrow_static)
list(APPEND ARROW_LIBRARIES arrow_cuda_static)
set(ARROW_FOUND TRUE PARENT_SCOPE)
set(ARROW_LIBRARIES ${ARROW_LIBRARIES} PARENT_SCOPE)
return()
endif()
else()
if(TARGET arrow_shared AND TARGET arrow_cuda_shared)
list(APPEND ARROW_LIBRARIES arrow_shared)
list(APPEND ARROW_LIBRARIES arrow_cuda_shared)
set(ARROW_FOUND TRUE PARENT_SCOPE)
set(ARROW_LIBRARIES ${ARROW_LIBRARIES} PARENT_SCOPE)
return()
endif()
endif()

set(ARROW_BUILD_SHARED ON)
set(ARROW_BUILD_STATIC OFF)
Expand All @@ -40,12 +58,8 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_PYTHON E
list(APPEND ARROW_PYTHON_OPTIONS "ARROW_PYTHON ON")
# Arrow's logic to build Boost from source is busted, so we have to get it from the system.
list(APPEND ARROW_PYTHON_OPTIONS "BOOST_SOURCE SYSTEM")
# Arrow's logic to find Thrift is busted, so we have to build it from
# source. Why can't we use `THRIFT_SOURCE BUNDLED` you might ask?
# Because that's _also_ busted. The only thing that seems to is to set
# _all_ dependencies to bundled, then optionall un-set BOOST_SOURCE to
# SYSTEM.
list(APPEND ARROW_PYTHON_OPTIONS "ARROW_DEPENDENCY_SOURCE BUNDLED")
list(APPEND ARROW_PYTHON_OPTIONS "Thrift_SOURCE BUNDLED")
list(APPEND ARROW_PYTHON_OPTIONS "ARROW_DEPENDENCY_SOURCE AUTO")
endif()

# Set this so Arrow correctly finds the CUDA toolkit when the build machine
Expand All @@ -68,6 +82,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_PYTHON E
"ARROW_CXXFLAGS -w"
"ARROW_JEMALLOC OFF"
"ARROW_S3 ${ENABLE_S3}"
"ARROW_ORC ${ENABLE_ORC}"
# e.g. needed by blazingsql-io
"ARROW_PARQUET ${ENABLE_PARQUET}"
${ARROW_PYTHON_OPTIONS}
Expand Down Expand Up @@ -144,14 +159,31 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_PYTHON E
set(ARROW_FOUND "${ARROW_FOUND}" PARENT_SCOPE)
set(ARROW_LIBRARIES "${ARROW_LIBRARIES}" PARENT_SCOPE)

if(TARGET arrow_shared)
get_target_property(arrow_is_imported arrow_shared IMPORTED)
if(NOT arrow_is_imported)
export(TARGETS arrow_shared arrow_cuda_shared
FILE ${CUDF_BINARY_DIR}/cudf-arrow-targets.cmake
NAMESPACE cudf::)
endif()
elseif(TARGET arrow_static)
get_target_property(arrow_is_imported arrow_static IMPORTED)
if(NOT arrow_is_imported)
export(TARGETS arrow_static arrow_cuda_static
FILE ${CUDF_BINARY_DIR}/cudf-arrow-targets.cmake
NAMESPACE cudf::)
endif()
endif()

endfunction()

set(CUDF_VERSION_Arrow 4.0.1)
set(CUDF_VERSION_Arrow 5.0.0)

find_and_configure_arrow(
${CUDF_VERSION_Arrow}
${CUDF_USE_ARROW_STATIC}
${CUDF_ENABLE_ARROW_S3}
${CUDF_ENABLE_ARROW_ORC}
${CUDF_ENABLE_ARROW_PYTHON}
${CUDF_ENABLE_ARROW_PARQUET}
)
Loading

0 comments on commit 8b10894

Please sign in to comment.