Skip to content

Commit

Permalink
Merge branch 'branch-21.10' of github.com:rapidsai/cudf into bug-orc-…
Browse files Browse the repository at this point in the history
…statistics-default
  • Loading branch information
ayushdg committed Sep 23, 2021
2 parents 2e3fa15 + c431650 commit f7f8808
Show file tree
Hide file tree
Showing 237 changed files with 8,728 additions and 4,568 deletions.
4 changes: 2 additions & 2 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ function install_dask {
# Install the main version of dask, distributed, and streamz
gpuci_logger "Install the main version of dask, distributed, and streamz"
set -x
pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
pip install "git+https://github.com/dask/distributed.git@2021.07.1" --upgrade --no-deps
pip install "git+https://github.com/dask/dask.git@2021.07.1" --upgrade --no-deps
# Need to uninstall streamz that is already in the env.
pip uninstall -y streamz
pip install "git+https://github.com/python-streamz/streamz.git@master" --upgrade --no-deps
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ dependencies:
- python>=3.7,<3.9
- numba>=0.53.1
- numpy
- pandas>=1.0,<1.3.0dev0
- pandas>=1.0,<1.4.0dev0
- pyarrow=5.0.0=*cuda
- fastavro>=0.22.9
- notebook>=0.5.0
Expand Down Expand Up @@ -58,7 +58,7 @@ dependencies:
- transformers
- pydata-sphinx-theme
- pip:
- git+https://github.com/dask/dask.git@main
- git+https://github.com/dask/distributed.git@main
- git+https://github.com/dask/dask.git@2021.07.1
- git+https://github.com/dask/distributed.git@2021.07.1
- git+https://github.com/python-streamz/streamz.git@master
- pyorc
6 changes: 3 additions & 3 deletions conda/environments/cudf_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ dependencies:
- python>=3.7,<3.9
- numba>=0.53.1
- numpy
- pandas>=1.0,<1.3.0dev0
- pandas>=1.0,<1.4.0dev0
- pyarrow=5.0.0=*cuda
- fastavro>=0.22.9
- notebook>=0.5.0
Expand Down Expand Up @@ -58,7 +58,7 @@ dependencies:
- transformers
- pydata-sphinx-theme
- pip:
- git+https://github.com/dask/dask.git@main
- git+https://github.com/dask/distributed.git@main
- git+https://github.com/dask/dask.git@2021.07.1
- git+https://github.com/dask/distributed.git@2021.07.1
- git+https://github.com/python-streamz/streamz.git@master
- pyorc
12 changes: 6 additions & 6 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ requirements:
- protobuf
- python
- typing_extensions
- pandas >=1.0,<1.3.0dev0
- pandas >=1.0,<1.4.0dev0
- cupy >7.1.0,<10.0.0a0
- numba >=0.53.1
- numpy
Expand All @@ -51,11 +51,11 @@ requirements:
- packaging
- cachetools

test:
requires:
- cudatoolkit {{ cuda_version }}.*
imports:
- cudf
test: # [linux64]
requires: # [linux64]
- cudatoolkit {{ cuda_version }}.* # [linux64]
imports: # [linux64]
- cudf # [linux64]

about:
home: http://rapids.ai/
Expand Down
10 changes: 5 additions & 5 deletions conda/recipes/cudf_kafka/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ requirements:
- python-confluent-kafka
- cudf {{ version }}

test:
requires:
- cudatoolkit {{ cuda_version }}.*
imports:
- cudf_kafka
test: # [linux64]
requires: # [linux64]
- cudatoolkit {{ cuda_version }}.* # [linux64]
imports: # [linux64]
- cudf_kafka # [linux64]

about:
home: http://rapids.ai/
Expand Down
10 changes: 5 additions & 5 deletions conda/recipes/custreamz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ requirements:
- python-confluent-kafka
- cudf_kafka {{ version }}

test:
requires:
- cudatoolkit {{ cuda_version }}.*
imports:
- custreamz
test: # [linux64]
requires: # [linux64]
- cudatoolkit {{ cuda_version }}.* # [linux64]
imports: # [linux64]
- custreamz # [linux64]

about:
home: http://rapids.ai/
Expand Down
8 changes: 4 additions & 4 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ requirements:
- cudf {{ version }}
- dask>=2021.6.0
- distributed>=2021.6.0
test:
requires:
- cudatoolkit {{ cuda_version }}.*

test: # [linux64]
requires: # [linux64]
- cudatoolkit {{ cuda_version }}.* # [linux64]


about:
Expand Down
9 changes: 9 additions & 0 deletions conda/recipes/dask-cudf/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ function logger() {
echo -e "\n>>>> $@\n"
}

# Importing cudf on arm64 CPU only nodes is currently not working due to a
# difference in reported gpu devices between arm64 and amd64
ARCH=$(arch)

if [ "${ARCH}" = "aarch64" ]; then
logger "Skipping tests on arm64"
exit 0
fi

# Install the latest version of dask and distributed
logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps"
pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
Expand Down
1 change: 1 addition & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ test:
- test -f $PREFIX/include/cudf_test/cudf_gtest.hpp
- test -f $PREFIX/include/cudf_test/cxxopts.hpp
- test -f $PREFIX/include/cudf_test/file_utilities.hpp
- test -f $PREFIX/include/cudf_test/io_metadata_utilities.hpp
- test -f $PREFIX/include/cudf_test/iterator_utilities.hpp
- test -f $PREFIX/include/cudf_test/table_utilities.hpp
- test -f $PREFIX/include/cudf_test/timestamp_utilities.cuh
Expand Down
37 changes: 13 additions & 24 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,8 @@ include(cmake/Modules/ConfigureCUDA.cmake) # set other CUDA compilation flags
# - dependencies ----------------------------------------------------------------------------------

# find zlib
rapids_find_package(ZLIB REQUIRED
BUILD_EXPORT_SET cudf-exports
INSTALL_EXPORT_SET cudf-exports)
rapids_find_package(ZLIB REQUIRED)

# find Threads (needed by cudftestutil)
rapids_find_package(Threads REQUIRED
BUILD_EXPORT_SET cudf-exports
Expand All @@ -114,6 +113,8 @@ rapids_find_package(Threads REQUIRED
rapids_cpm_init()
# find jitify
include(cmake/thirdparty/get_jitify.cmake)
# find nvCOMP
include(cmake/thirdparty/get_nvcomp.cmake)
# find thrust/cub
include(cmake/thirdparty/get_thrust.cmake)
# find rmm
Expand Down Expand Up @@ -232,7 +233,6 @@ add_library(cudf
src/groupby/sort/group_sum.cu
src/groupby/sort/scan.cpp
src/groupby/sort/group_count_scan.cu
src/groupby/sort/group_dense_rank_scan.cu
src/groupby/sort/group_max_scan.cu
src/groupby/sort/group_min_scan.cu
src/groupby/sort/group_rank_scan.cu
Expand Down Expand Up @@ -505,11 +505,12 @@ add_dependencies(cudf jitify_preprocess_run)

# Specify the target module library dependencies
target_link_libraries(cudf
PUBLIC ZLIB::ZLIB
${ARROW_LIBRARIES}
PUBLIC ${ARROW_LIBRARIES}
cudf::Thrust
rmm::rmm
PRIVATE cuco::cuco)
PRIVATE cuco::cuco
ZLIB::ZLIB
nvcomp::nvcomp)

# Add Conda library, and include paths if specified
if(TARGET conda_env)
Expand Down Expand Up @@ -564,6 +565,7 @@ add_library(cudftestutil STATIC
tests/utilities/base_fixture.cpp
tests/utilities/column_utilities.cu
tests/utilities/table_utilities.cu
tests/io/metadata_utilities.cpp
tests/strings/utilities.cu)

set_target_properties(cudftestutil
Expand Down Expand Up @@ -662,8 +664,6 @@ rapids_export_write_dependencies(INSTALL cudf-testing-exports

set(doc_string
[=[
#[=======================================================================[

Provide targets for the cudf library.

Built based on the Apache Arrow columnar memory format, cuDF is a GPU DataFrame
Expand All @@ -687,19 +687,6 @@ This module offers an optional testing component which defines the
following IMPORTED GLOBAL targets:

cudf::cudftestutil - The main cudf testing library

Result Variables
^^^^^^^^^^^^^^^^

This module will set the following variables in your project::

CUDF_FOUND
CUDF_VERSION
CUDF_VERSION_MAJOR
CUDF_VERSION_MINOR
CUDF_VERSION_PATCH

#]=======================================================================]
]=])


Expand All @@ -726,6 +713,7 @@ rapids_export(INSTALL cudf
EXPORT_SET cudf-exports
GLOBAL_TARGETS cudf
NAMESPACE cudf::
DOCUMENTATION doc_string
FINAL_CODE_BLOCK install_code_string)

################################################################################################
Expand All @@ -748,10 +736,11 @@ rapids_export(BUILD cudf
EXPORT_SET cudf-exports
GLOBAL_TARGETS cudf
NAMESPACE cudf::
FINAL_CODE_BLOCK code_string)
DOCUMENTATION doc_string
FINAL_CODE_BLOCK build_code_string)

export(EXPORT cudf-testing-exports
FILE ${CUDF_BINARY_DIR}/cudf-testing.cmake
FILE ${CUDF_BINARY_DIR}/cudf-testing-targets.cmake
NAMESPACE cudf::)
rapids_export_write_dependencies(BUILD cudf-testing-exports
"${CUDF_BINARY_DIR}/cudf-testing-dependencies.cmake")
Expand Down
14 changes: 9 additions & 5 deletions cpp/benchmarks/join/conditional_join_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ template <typename key_type, typename payload_type>
class ConditionalJoin : public cudf::benchmark {
};

// For compatibility with the shared logic for equality (hash) joins, all of
// the join lambdas defined by these macros accept a null_equality parameter
// but ignore it (don't forward it to the underlying join implementation)
// because conditional joins do not use this parameter.
#define CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \
BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, key_type, payload_type) \
(::benchmark::State & st) \
Expand All @@ -28,7 +32,7 @@ class ConditionalJoin : public cudf::benchmark {
cudf::table_view const& right, \
cudf::ast::operation binary_pred, \
cudf::null_equality compare_nulls) { \
return cudf::conditional_inner_join(left, right, binary_pred, compare_nulls); \
return cudf::conditional_inner_join(left, right, binary_pred); \
}; \
constexpr bool is_conditional = true; \
BM_join<key_type, payload_type, nullable, is_conditional>(st, join); \
Expand All @@ -47,7 +51,7 @@ CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_64bit_nulls, int6
cudf::table_view const& right, \
cudf::ast::operation binary_pred, \
cudf::null_equality compare_nulls) { \
return cudf::conditional_left_join(left, right, binary_pred, compare_nulls); \
return cudf::conditional_left_join(left, right, binary_pred); \
}; \
constexpr bool is_conditional = true; \
BM_join<key_type, payload_type, nullable, is_conditional>(st, join); \
Expand All @@ -66,7 +70,7 @@ CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_64bit_nulls, int64_
cudf::table_view const& right, \
cudf::ast::operation binary_pred, \
cudf::null_equality compare_nulls) { \
return cudf::conditional_inner_join(left, right, binary_pred, compare_nulls); \
return cudf::conditional_inner_join(left, right, binary_pred); \
}; \
constexpr bool is_conditional = true; \
BM_join<key_type, payload_type, nullable, is_conditional>(st, join); \
Expand All @@ -85,7 +89,7 @@ CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_64bit_nulls, int64_
cudf::table_view const& right, \
cudf::ast::operation binary_pred, \
cudf::null_equality compare_nulls) { \
return cudf::conditional_left_anti_join(left, right, binary_pred, compare_nulls); \
return cudf::conditional_left_anti_join(left, right, binary_pred); \
}; \
constexpr bool is_conditional = true; \
BM_join<key_type, payload_type, nullable, is_conditional>(st, join); \
Expand Down Expand Up @@ -116,7 +120,7 @@ CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_64bit_nul
cudf::table_view const& right, \
cudf::ast::operation binary_pred, \
cudf::null_equality compare_nulls) { \
return cudf::conditional_left_semi_join(left, right, binary_pred, compare_nulls); \
return cudf::conditional_left_semi_join(left, right, binary_pred); \
}; \
constexpr bool is_conditional = true; \
BM_join<key_type, payload_type, nullable, is_conditional>(st, join); \
Expand Down
56 changes: 28 additions & 28 deletions cpp/cmake/thirdparty/get_arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -101,34 +101,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
"ARROW_PROTOBUF_USE_SHARED ${ARROW_BUILD_SHARED}"
"ARROW_ZSTD_USE_SHARED ${ARROW_BUILD_SHARED}")

if(Arrow_ADDED)
rapids_export(BUILD Arrow
VERSION ${VERSION}
EXPORT_SET arrow_targets
GLOBAL_TARGETS arrow_shared arrow_static
NAMESPACE cudf::)

rapids_export(BUILD ArrowCUDA
VERSION ${VERSION}
EXPORT_SET arrow_cuda_targets
GLOBAL_TARGETS arrow_cuda_shared arrow_cuda_static
NAMESPACE cudf::)
endif()
# We generate the arrow-config and arrowcuda-config files
# when we built arrow locally, so always do `find_dependency`
rapids_export_package(BUILD Arrow cudf-exports)
rapids_export_package(INSTALL Arrow cudf-exports)

# We have to generate the find_dependency(ArrowCUDA) ourselves
# since we need to specify ArrowCUDA_DIR to be where Arrow
# was found, since Arrow packages ArrowCUDA.config in a non-standard
# location
rapids_export_package(BUILD ArrowCUDA cudf-exports)

include("${rapids-cmake-dir}/export/find_package_root.cmake")
rapids_export_find_package_root(BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports)
rapids_export_find_package_root(BUILD ArrowCUDA [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports)

set(ARROW_FOUND TRUE)
set(ARROW_LIBRARIES "")

Expand Down Expand Up @@ -184,6 +156,34 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
message(FATAL_ERROR "CUDF: Arrow library not found or downloaded.")
endif()

if(Arrow_ADDED)
rapids_export(BUILD Arrow
VERSION ${VERSION}
EXPORT_SET arrow_targets
GLOBAL_TARGETS arrow_shared arrow_static
NAMESPACE cudf::)

rapids_export(BUILD ArrowCUDA
VERSION ${VERSION}
EXPORT_SET arrow_cuda_targets
GLOBAL_TARGETS arrow_cuda_shared arrow_cuda_static
NAMESPACE cudf::)
endif()
# We generate the arrow-config and arrowcuda-config files
# when we built arrow locally, so always do `find_dependency`
rapids_export_package(BUILD Arrow cudf-exports)
rapids_export_package(INSTALL Arrow cudf-exports)

# We have to generate the find_dependency(ArrowCUDA) ourselves
# since we need to specify ArrowCUDA_DIR to be where Arrow
# was found, since Arrow packages ArrowCUDA.config in a non-standard
# location
rapids_export_package(BUILD ArrowCUDA cudf-exports)

include("${rapids-cmake-dir}/export/find_package_root.cmake")
rapids_export_find_package_root(BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports)
rapids_export_find_package_root(BUILD ArrowCUDA [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports)

set(ARROW_FOUND "${ARROW_FOUND}" PARENT_SCOPE)
set(ARROW_LIBRARIES "${ARROW_LIBRARIES}" PARENT_SCOPE)

Expand Down
6 changes: 1 addition & 5 deletions cpp/cmake/thirdparty/get_cucollections.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,8 @@

function(find_and_configure_cucollections)

if(TARGET cuco::cuco)
return()
endif()

# Find or install cuCollections
CPMFindPackage(NAME cuco
rapids_cpm_find(cuco 0.0
GLOBAL_TARGETS cuco::cuco
CPM_ARGS
GITHUB_REPOSITORY NVIDIA/cuCollections
Expand Down
Loading

0 comments on commit f7f8808

Please sign in to comment.