Skip to content

Commit

Permalink
Merge branch 'branch-22.10' of https://github.com/rapidsai/cudf into …
Browse files Browse the repository at this point in the history
…json-tree
  • Loading branch information
karthikeyann committed Aug 11, 2022
2 parents 2b59b04 + a67b718 commit 3b2acb2
Show file tree
Hide file tree
Showing 264 changed files with 11,867 additions and 3,528 deletions.
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ repos:
# of dependencies, so we'll have to update this manually.
additional_dependencies:
- cmakelang==0.6.13
verbose: true
require_serial: true
- id: cmake-lint
name: cmake-lint
entry: ./cpp/scripts/run-cmake-format.sh cmake-lint
Expand All @@ -69,6 +71,8 @@ repos:
# of dependencies, so we'll have to update this manually.
additional_dependencies:
- cmakelang==0.6.13
verbose: true
require_serial: true
- id: copyright-check
name: copyright-check
# This hook's use of Git tools appears to conflict with
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# cuDF 22.10.00 (Date TBD)

Please see https://github.com/rapidsai/cudf/releases/tag/v22.10.00a for the latest changes to this development branch.

# cuDF 22.08.00 (Date TBD)

Please see https://github.com/rapidsai/cudf/releases/tag/v22.08.00a for the latest changes to this development branch.
Expand Down
7 changes: 7 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,13 @@ Now code linters and formatters will be run each time you commit changes.

You can skip these checks with `git commit --no-verify` or with the short version `git commit -n`.

## Developer Guidelines

The [C++ Developer Guide](cpp/docs/DEVELOPER_GUIDE.md) includes details on contributing to libcudf C++ code.

The [Python Developer Guide](https://docs.rapids.ai/api/cudf/stable/developer_guide/index.html) includes details on contributing to cuDF Python code.


## Attribution

Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md
Expand Down
4 changes: 2 additions & 2 deletions ci/benchmark/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
gpuci_logger "gpuci_mamba_retry update dask"
gpuci_mamba_retry update dask
else
gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.05.2 conda-forge::distributed>=2022.05.2 conda-forge::dask-core>=2022.05.2 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask>=2022.05.2 conda-forge::distributed>=2022.05.2 conda-forge::dask-core>=2022.05.2 --force-reinstall
gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.7.1 conda-forge::distributed>=2022.7.1 conda-forge::dask-core>=2022.7.1 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask>=2022.7.1 conda-forge::distributed>=2022.7.1 conda-forge::dask-core>=2022.7.1 --force-reinstall
fi

# Install the master version of streamz
Expand Down
2 changes: 1 addition & 1 deletion ci/checks/style.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ LANG=C.UTF-8
. /opt/conda/etc/profile.d/conda.sh
conda activate rapids

FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.08/cmake-format-rapids-cmake.json
FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/cmake-format-rapids-cmake.json
export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
Expand Down
6 changes: 3 additions & 3 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ unset GIT_DESCRIBE_TAG
export INSTALL_DASK_MAIN=1

# ucx-py version
export UCX_PY_VERSION='0.27.*'
export UCX_PY_VERSION='0.28.*'

################################################################################
# TRAP - Setup trap for removing jitify cache
Expand Down Expand Up @@ -92,8 +92,8 @@ function install_dask {
gpuci_mamba_retry update dask
conda list
else
gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.05.2 conda-forge::distributed>=2022.05.2 conda-forge::dask-core>=2022.05.2 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask>=2022.05.2 conda-forge::distributed>=2022.05.2 conda-forge::dask-core>=2022.05.2 --force-reinstall
gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.7.1 conda-forge::distributed>=2022.7.1 conda-forge::dask-core>=2022.7.1 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask>=2022.7.1 conda-forge::distributed>=2022.7.1 conda-forge::dask-core>=2022.7.1 --force-reinstall
fi
# Install the main version of streamz
gpuci_logger "Install the main version of streamz"
Expand Down
2 changes: 1 addition & 1 deletion ci/gpu/java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`

# ucx-py version
export UCX_PY_VERSION='0.27.*'
export UCX_PY_VERSION='0.28.*'

################################################################################
# TRAP - Setup trap for removing jitify cache
Expand Down
13 changes: 7 additions & 6 deletions conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ dependencies:
- clang=11.1.0
- clang-tools=11.1.0
- cupy>=9.5.0,<11.0.0a0
- rmm=22.08.*
- rmm=22.10.*
- cmake>=3.20.1,!=3.23.0
- cmake_setuptools>=0.1.3
- scikit-build>=0.13.1
- python>=3.8,<3.10
- numba>=0.54
- numpy
- pandas>=1.0,<1.5.0dev0
- pyarrow=8.0.0
- pyarrow=9
- fastavro>=0.22.9
- python-snappy>=0.6.0
- notebook>=0.5.0
Expand All @@ -48,10 +48,10 @@ dependencies:
- pydocstyle=6.1.1
- typing_extensions
- pre-commit
- dask>=2022.05.2
- distributed>=2022.05.2
- dask>=2022.7.1
- distributed>=2022.7.1
- streamz
- arrow-cpp=8.0.0
- arrow-cpp=9
- dlpack>=0.5,<0.6.0a0
- double-conversion
- rapidjson
Expand All @@ -61,7 +61,7 @@ dependencies:
- sphinx-autobuild
- myst-nb
- scipy
- dask-cuda=22.08.*
- dask-cuda=22.10.*
- mimesis<4.1
- packaging
- protobuf>=3.20.1,<3.21.0a0
Expand All @@ -76,6 +76,7 @@ dependencies:
- botocore>=1.24.21
- aiobotocore>=2.2.0
- s3fs>=2022.3.0
- werkzeug<2.2.0 # Temporary transient dependency pinning to avoid URL-LIB3 + moto timeouts
- pytorch<1.12.0
- pip:
- git+https://github.com/python-streamz/streamz.git@master
Expand Down
5 changes: 2 additions & 3 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,11 @@ requirements:
- protobuf>=3.20.1,<3.21.0a0
- python
- cython >=0.29,<0.30
- cmake>=3.20.1,!=3.23.0
- scikit-build>=0.13.1
- setuptools
- numba >=0.54
- dlpack>=0.5,<0.6.0a0
- pyarrow =8.0.0
- pyarrow =9
- libcudf ={{ version }}
- rmm ={{ minor_version }}
- cudatoolkit ={{ cuda_version }}
Expand All @@ -53,7 +52,7 @@ requirements:
- cupy >=9.5.0,<11.0.0a0
- numba >=0.54
- numpy
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }} *cuda
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
- libcudf {{ version }}
- fastavro >=0.22.0
- {{ pin_compatible('rmm', max_pin='x.x') }}
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/custreamz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ requirements:
- python
- streamz
- cudf ={{ version }}
- dask>=2022.05.2
- distributed>=2022.05.2
- dask>=2022.7.1
- distributed>=2022.7.1
- python-confluent-kafka >=1.7.0,<1.8.0a0
- cudf_kafka ={{ version }}

Expand Down
8 changes: 4 additions & 4 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ requirements:
host:
- python
- cudf ={{ version }}
- dask>=2022.05.2
- distributed>=2022.05.2
- dask>=2022.7.1
- distributed>=2022.7.1
- cudatoolkit ={{ cuda_version }}
run:
- python
- cudf ={{ version }}
- dask>=2022.05.2
- distributed>=2022.05.2
- dask>=2022.7.1
- distributed>=2022.7.1
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}

test: # [linux64]
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

export cudf_ROOT="$(realpath ./cpp/build)"
./build.sh -n -v libcudf libcudf_kafka benchmarks tests --build_metrics --incl_cache_stats --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib\"
./build.sh -n -v libcudf libcudf_kafka benchmarks tests --build_metrics --incl_cache_stats --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib -DCUDF_ENABLE_ARROW_S3=ON\"
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ gtest_version:
- "=1.10.0"

arrow_cpp_version:
- "=8.0.0"
- "=9"

dlpack_version:
- ">=0.5,<0.6.0a0"
Expand Down
5 changes: 4 additions & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ outputs:
- test -f $PREFIX/include/cudf/detail/transpose.hpp
- test -f $PREFIX/include/cudf/detail/unary.hpp
- test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp
- test -f $PREFIX/include/cudf/detail/utilities/column.hpp
- test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp
- test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h
- test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
- test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp
Expand Down Expand Up @@ -166,9 +166,11 @@ outputs:
- test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp
- test -f $PREFIX/include/cudf/lists/detail/contains.hpp
- test -f $PREFIX/include/cudf/lists/detail/copying.hpp
- test -f $PREFIX/include/cudf/lists/detail/dremel.hpp
- test -f $PREFIX/include/cudf/lists/detail/extract.hpp
- test -f $PREFIX/include/cudf/lists/detail/interleave_columns.hpp
- test -f $PREFIX/include/cudf/lists/detail/scatter_helper.cuh
- test -f $PREFIX/include/cudf/lists/detail/set_operations.hpp
- test -f $PREFIX/include/cudf/lists/detail/sorting.hpp
- test -f $PREFIX/include/cudf/lists/detail/stream_compaction.hpp
- test -f $PREFIX/include/cudf/lists/explode.hpp
Expand All @@ -178,6 +180,7 @@ outputs:
- test -f $PREFIX/include/cudf/lists/list_view.hpp
- test -f $PREFIX/include/cudf/lists/lists_column_factories.hpp
- test -f $PREFIX/include/cudf/lists/lists_column_view.hpp
- test -f $PREFIX/include/cudf/lists/set_operations.hpp
- test -f $PREFIX/include/cudf/lists/sorting.hpp
- test -f $PREFIX/include/cudf/lists/stream_compaction.hpp
- test -f $PREFIX/include/cudf/merge.hpp
Expand Down
14 changes: 12 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,15 @@ rapids_cuda_init_architectures(CUDF)

project(
CUDF
VERSION 22.08.00
VERSION 22.10.00
LANGUAGES C CXX CUDA
)
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5)
message(
FATAL_ERROR
"libcudf requires CUDA Toolkit 11.5+ to compile (nvcc ${CMAKE_CUDA_COMPILER_VERSION} provided)"
)
endif()

# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to
# have different values for the `Threads::Threads` target. Setting this flag ensures
Expand All @@ -47,7 +53,7 @@ option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF)
option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF)
option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF)
option(CUDF_ENABLE_ARROW_PARQUET "Find (or build) Arrow with Parquet support" OFF)
option(CUDF_ENABLE_ARROW_S3 "Build/Enable AWS S3 Arrow filesystem support" ON)
option(CUDF_ENABLE_ARROW_S3 "Build/Enable AWS S3 Arrow filesystem support" OFF)
option(
CUDF_USE_PER_THREAD_DEFAULT_STREAM
"Build cuDF with per-thread default stream, including passing the per-thread default
Expand Down Expand Up @@ -299,6 +305,7 @@ add_library(
src/hash/hashing.cu
src/hash/md5_hash.cu
src/hash/murmur_hash.cu
src/hash/spark_murmur_hash.cu
src/interop/dlpack.cpp
src/interop/from_arrow.cu
src/interop/to_arrow.cu
Expand All @@ -323,6 +330,7 @@ add_library(
src/io/json/json_gpu.cu
src/io/json/nested_json_gpu.cu
src/io/json/reader_impl.cu
src/io/json/experimental/read_json.cpp
src/io/orc/aggregate_orc_metadata.cpp
src/io/orc/dict_enc.cu
src/io/orc/orc.cpp
Expand Down Expand Up @@ -379,13 +387,15 @@ add_library(
src/lists/copying/segmented_gather.cu
src/lists/copying/scatter_helper.cu
src/lists/count_elements.cu
src/lists/dremel.cu
src/lists/explode.cu
src/lists/extract.cu
src/lists/interleave_columns.cu
src/lists/lists_column_factories.cu
src/lists/lists_column_view.cu
src/lists/segmented_sort.cu
src/lists/sequences.cu
src/lists/set_operations.cu
src/lists/stream_compaction/apply_boolean_mask.cu
src/lists/stream_compaction/distinct.cu
src/lists/utilities.cu
Expand Down
10 changes: 8 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,9 @@ ConfigureBench(
REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/minmax.cpp
reduction/reduce.cpp reduction/scan.cpp
)
ConfigureNVBench(REDUCTION_NVBENCH reduction/segment_reduce.cu reduction/rank.cpp)
ConfigureNVBench(
REDUCTION_NVBENCH reduction/distinct_count.cpp reduction/rank.cpp reduction/segment_reduce.cu
)

# ##################################################################################################
# * reduction benchmark ---------------------------------------------------------------------------
Expand All @@ -199,7 +201,10 @@ ConfigureBench(
groupby/group_struct_values.cpp groupby/group_no_requests.cpp groupby/group_scan.cpp
)

ConfigureNVBench(GROUPBY_NVBENCH groupby/group_rank.cpp groupby/group_struct_keys.cpp)
ConfigureNVBench(
GROUPBY_NVBENCH groupby/group_max.cpp groupby/group_nunique.cpp groupby/group_rank.cpp
groupby/group_struct_keys.cpp
)

# ##################################################################################################
# * hashing benchmark -----------------------------------------------------------------------------
Expand Down Expand Up @@ -288,6 +293,7 @@ ConfigureBench(
# ##################################################################################################
# * json benchmark -------------------------------------------------------------------
ConfigureBench(JSON_BENCH string/json.cu)
ConfigureNVBench(FST_NVBENCH io/fst.cu)

# ##################################################################################################
# * io benchmark ---------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <rmm/device_uvector.hpp>

#include <thrust/binary_search.h>
#include <thrust/copy.h>
#include <thrust/device_ptr.h>
#include <thrust/execution_policy.h>
#include <thrust/fill.h>
Expand Down
Loading

0 comments on commit 3b2acb2

Please sign in to comment.