Merge branch 'branch-21.12' into enh-groupby_cache_hashed

rapidsai · Oct 4, 2021 · 7d7eda5 · 7d7eda5
2 parents ece8279 + d68e626
commit 7d7eda5
Show file tree

Hide file tree

Showing 156 changed files with 3,177 additions and 2,430 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -59,6 +59,7 @@ repos:
         hooks:
               - id: mypy
                 args: ["--config-file=python/cudf/setup.cfg", "python/cudf/cudf"]
+                pass_filenames: false
       - repo: https://github.com/pycqa/pydocstyle
         rev: 6.0.0
         hooks:

diff --git a/ci/cpu/prebuild.sh b/ci/cpu/prebuild.sh
@@ -3,6 +3,16 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
 set -e
 
+ARCH=$(arch)
+if [ "${ARCH}" = "x86_64" ]; then
+    DEFAULT_CUDA_VER="11.0"
+elif [ "${ARCH}" = "aarch64" ]; then
+    DEFAULT_CUDA_VER="11.2"
+else
+    echo "Unsupported arch ${ARCH}"
+    exit 1
+fi
+
 #Always upload cudf Python package
 export UPLOAD_CUDF=1
 
@@ -14,14 +24,14 @@ else
 fi
 
 # upload cudf_kafka for all versions of Python
-if [[ "$CUDA" == "11.0" ]]; then
+if [[ "$CUDA" == "${DEFAULT_CUDA_VER}" ]]; then
     export UPLOAD_CUDF_KAFKA=1
 else
     export UPLOAD_CUDF_KAFKA=0
 fi
 
 #We only want to upload libcudf_kafka once per python/CUDA combo
-if [[ "$PYTHON" == "3.7" ]] && [[ "$CUDA" == "11.0" ]]; then
+if [[ "$PYTHON" == "3.7" ]] && [[ "$CUDA" == "${DEFAULT_CUDA_VER}" ]]; then
     export UPLOAD_LIBCUDF_KAFKA=1
 else
     export UPLOAD_LIBCUDF_KAFKA=0
@@ -31,4 +41,4 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
     #If project flash is not activate, always build both
     export BUILD_LIBCUDF=1
     export BUILD_CUDF=1
-fi
+fi
diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
@@ -56,7 +56,7 @@ dependencies:
   - protobuf
   - nvtx>=0.2.1
   - cachetools
-  - transformers
+  - transformers<=4.10.3
   - pydata-sphinx-theme
   - pip:
       - git+https://github.com/dask/dask.git@main

diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml
@@ -56,7 +56,7 @@ dependencies:
   - protobuf
   - nvtx>=0.2.1
   - cachetools
-  - transformers
+  - transformers<=4.10.3
   - pydata-sphinx-theme
   - pip:
       - git+https://github.com/dask/dask.git@main

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -473,8 +473,7 @@ target_include_directories(cudf
                        "$<BUILD_INTERFACE:${CUDF_GENERATED_INCLUDE_DIR}/include>"
            PRIVATE     "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
            INTERFACE   "$<INSTALL_INTERFACE:include>"
-                       "$<INSTALL_INTERFACE:include/libcudf/libcudacxx>"
-                       "$<INSTALL_INTERFACE:include/libcudf/Thrust>")
+                       "$<INSTALL_INTERFACE:include/libcudf/libcudacxx>")
 
 target_compile_definitions(cudf
             PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_DEFINITIONS}>"
@@ -511,7 +510,7 @@ target_link_libraries(cudf
                   cudf::Thrust
                   rmm::rmm
            PRIVATE cuco::cuco
-	             ZLIB::ZLIB
+                   ZLIB::ZLIB
                    nvcomp::nvcomp)
 
 # Add Conda library, and include paths if specified
@@ -692,6 +691,40 @@ following IMPORTED GLOBAL  targets:
     ]=])
 
 
+set(common_code_string
+    [=[
+if(NOT TARGET cudf::Thrust)
+  thrust_create_target(cudf::Thrust FROM_OPTIONS)
+endif()
+
+# nvcc automatically adds the CUDA Toolkit system include paths before any
+# system include paths that CMake adds.
+#
+# CMake implicitly treats all includes on import targets as 'SYSTEM' includes.
+#
+# To get the cudacxx shipped with cudf to be picked up by consumers instead of the
+# version shipped with the CUDA Toolkit we need to make sure it is a non-SYSTEM
+# include on the CMake side.
+#
+# To do this currently, we move the includes from the cudf::cudf target to a
+# non-import target to ensure they are `-I` instead of `-isystem`
+
+add_library(cudf_non_system_includes INTERFACE)
+target_link_libraries(cudf::cudf INTERFACE cudf_non_system_includes)
+
+get_target_property(all_includes cudf::cudf INTERFACE_INCLUDE_DIRECTORIES)
+set(system_includes )
+set(normal_includes )
+foreach(include IN LISTS all_includes)
+  if(include MATCHES "/include/libcudf/")
+    list(APPEND normal_includes "${include}")
+  else()
+    list(APPEND system_includes "${include}")
+  endif()
+endforeach()
+set_target_properties(cudf::cudf PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${system_includes}")
+set_target_properties(cudf_non_system_includes PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${normal_includes}")
+]=])
 set(install_code_string
     [=[
 set(ArrowCUDA_DIR "${Arrow_DIR}")
@@ -705,11 +738,8 @@ if(testing IN_LIST cudf_FIND_COMPONENTS)
     include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake")
   endif()
 endif()
-
-if(NOT TARGET cudf::Thrust)
-  thrust_create_target(cudf::Thrust FROM_OPTIONS)
-endif()
 ]=])
+string(APPEND install_code_string "${common_code_string}")
 
 rapids_export(INSTALL cudf
     EXPORT_SET cudf-exports
@@ -728,11 +758,8 @@ endif()
 if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake")
   include("${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake")
 endif()
-
-if(NOT TARGET cudf::Thrust)
-  thrust_create_target(cudf::Thrust FROM_OPTIONS)
-endif()
 ]=])
+string(APPEND build_code_string "${common_code_string}")
 
 rapids_export(BUILD cudf
     EXPORT_SET cudf-exports

diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake
@@ -157,17 +157,40 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
     endif()
 
     if(Arrow_ADDED)
+        set(arrow_code_string
+        [=[
+        if (TARGET cudf::arrow_shared AND (NOT TARGET arrow_shared))
+            add_library(arrow_shared ALIAS cudf::arrow_shared)
+        endif()
+        if (TARGET cudf::arrow_static AND (NOT TARGET arrow_static))
+            add_library(arrow_static ALIAS cudf::arrow_static)
+        endif()
+        ]=]
+        )
+        set(arrow_cuda_code_string
+        [=[
+        if (TARGET cudf::arrow_cuda_shared AND (NOT TARGET arrow_cuda_shared))
+            add_library(arrow_cuda_shared ALIAS cudf::arrow_cuda_shared)
+        endif()
+        if (TARGET cudf::arrow_cuda_static AND (NOT TARGET arrow_cuda_static))
+            add_library(arrow_cuda_static ALIAS cudf::arrow_cuda_static)
+        endif()
+        ]=]
+        )
+
         rapids_export(BUILD Arrow
           VERSION ${VERSION}
           EXPORT_SET arrow_targets
-          GLOBAL_TARGETS arrow_shared arrow_static
-          NAMESPACE cudf::)
+          GLOBAL_TARGETS arrow_shared cud
+          NAMESPACE cudf::
+          FINAL_CODE_BLOCK arrow_code_string)
 
         rapids_export(BUILD ArrowCUDA
           VERSION ${VERSION}
           EXPORT_SET arrow_cuda_targets
           GLOBAL_TARGETS arrow_cuda_shared arrow_cuda_static
-          NAMESPACE cudf::)
+          NAMESPACE cudf::
+          FINAL_CODE_BLOCK arrow_cuda_code_string)
     endif()
     # We generate the arrow-config and arrowcuda-config files
     # when we built arrow locally, so always do `find_dependency`

diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake
@@ -21,7 +21,7 @@ function(find_and_configure_cucollections)
         GLOBAL_TARGETS cuco::cuco
         CPM_ARGS
             GITHUB_REPOSITORY NVIDIA/cuCollections
-            GIT_TAG           0d602ae21ea4f38d23ed816aa948453d97b2ee4e
+            GIT_TAG           729857a5698a0e8d8f812e0464f65f37854ae17b
             OPTIONS           "BUILD_TESTS OFF"
                               "BUILD_BENCHMARKS OFF"
                               "BUILD_EXAMPLES OFF"

diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/copy.hpp>
-#include <cudf/detail/gather.cuh>
+#include <cudf/detail/gather.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/device_atomics.cuh>
@@ -36,12 +36,15 @@
 #include <rmm/device_buffer.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <cub/cub.cuh>
 
 #include <algorithm>
 
-namespace {
+namespace cudf {
+namespace detail {
+
 // Compute the count of elements that pass the mask within each block
 template <typename Filter, int block_size>
 __global__ void compute_block_counts(cudf::size_type* __restrict__ block_counts,
@@ -293,9 +296,9 @@ struct scatter_gather_functor {
                     filter);
 
     auto output_table = cudf::detail::gather(cudf::table_view{{input}},
-                                             indices.begin(),
-                                             indices.end(),
+                                             indices,
                                              cudf::out_of_bounds_policy::DONT_CHECK,
+                                             cudf::detail::negative_index_policy::NOT_ALLOWED,
                                              stream,
                                              mr);
 
@@ -304,10 +307,6 @@ struct scatter_gather_functor {
   }
 };
 
-}  // namespace
-
-namespace cudf {
-namespace detail {
 /**
  * @brief Filters `input` using a Filter function object
  *

diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh
@@ -152,8 +152,8 @@ __launch_bounds__(block_size) __global__
  * @param filter      Function of type `FilterFn` which determines for index `i` where to get the
  *                    corresponding output value from
  * @param out_type    `cudf::data_type` of the returned column
- * @param mr          Device memory resource used to allocate the returned column's device memory
  * @param stream      CUDA stream used for device memory operations and kernel launches.
+ * @param mr          Device memory resource used to allocate the returned column's device memory
  * @return            A new column that contains the values from either `lhs` or `rhs` as determined
  *                    by `filter[i]`
  */

diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
@@ -640,8 +640,8 @@ void gather_bitmask(table_view const& source,
  * use `DONT_CHECK` when they are certain that the gather_map contains only valid indices for
  * better performance. In case there are out-of-bound indices in the gather map, the behavior
  * is undefined. Defaults to `DONT_CHECK`.
- * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @return cudf::table Result of the gather
  */
 template <typename MapIterator>

diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,10 +16,10 @@
 #pragma once
 
 #include <cudf/column/column_view.hpp>
-#include <cudf/table/table_view.hpp>
-
 #include <cudf/copying.hpp>
 #include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -55,10 +55,10 @@ enum class negative_index_policy : bool { ALLOWED, NOT_ALLOWED };
  * indices. If `policy` is set to `DONT_CHECK` and there are out-of-bounds indices in `gather_map`,
  * the behavior is undefined.
  * @param[in] negative_index_policy Interpret each negative index `i` in the
- * gathermap as the positive index `i+num_source_rows`.
- * @param[in] mr Device memory resource used to allocate the returned table's device memory
+ * `gather_map` as the positive index `i+num_source_rows`.
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- * @return cudf::table Result of the gather
+ * @param[in] mr Device memory resource used to allocate the returned table's device memory
+ * @return Result of the gather
  */
 std::unique_ptr<table> gather(
   table_view const& source_table,
@@ -67,5 +67,21 @@ std::unique_ptr<table> gather(
   negative_index_policy neg_indices,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @copydoc cudf::detail::gather(table_view const&,column_view const&,table_view
+ * const&,cudf::out_of_bounds_policy,cudf::detail::negative_index_policy,rmm::cuda_stream_view,
+ * rmm::mr::device_memory_resource*)
+ *
+ * @throws cudf::logic_error if `gather_map` span size is larger than max of `size_type`.
+ */
+std::unique_ptr<table> gather(
+  table_view const& source_table,
+  device_span<size_type const> const gather_map,
+  out_of_bounds_policy bounds_policy,
+  negative_index_policy neg_indices,
+  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 }  // namespace detail
 }  // namespace cudf