Merge branch 'branch-21.06' into struct_scalar

rapidsai · May 17, 2021 · a242363 · a242363
2 parents d58938d + 8406522
commit a242363
Show file tree

Hide file tree

Showing 38 changed files with 2,899 additions and 1,861 deletions.
diff --git a/README.md b/README.md
@@ -65,15 +65,15 @@ Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapids
 
 cuDF can be installed with conda ([miniconda](https://conda.io/miniconda.html), or the full [Anaconda distribution](https://www.anaconda.com/download)) from the `rapidsai` channel:
 
-For `cudf version == 0.19` :
+For `cudf version == 21.06` :
 ```bash
 # for CUDA 10.1
 conda install -c rapidsai -c nvidia -c numba -c conda-forge \
-    cudf=0.19 python=3.7 cudatoolkit=10.1
+    cudf=21.06 python=3.7 cudatoolkit=10.1
 
 # or, for CUDA 10.2
 conda install -c rapidsai -c nvidia -c numba -c conda-forge \
-    cudf=0.19 python=3.7 cudatoolkit=10.2
+    cudf=21.06 python=3.7 cudatoolkit=10.2
 
 ```
 

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
@@ -87,6 +87,7 @@ test:
     - test -f $PREFIX/include/cudf/detail/reshape.hpp
     - test -f $PREFIX/include/cudf/detail/rolling.hpp
     - test -f $PREFIX/include/cudf/detail/round.hpp
+    - test -f $PREFIX/include/cudf/detail/scan.hpp
     - test -f $PREFIX/include/cudf/detail/scatter.hpp
     - test -f $PREFIX/include/cudf/detail/search.hpp
     - test -f $PREFIX/include/cudf/detail/sequence.hpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -28,7 +28,7 @@ elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "")
   set(CUDF_BUILD_FOR_DETECTED_ARCHS TRUE)
 endif()
 
-project(CUDF VERSION 0.20.0 LANGUAGES C CXX)
+project(CUDF VERSION 21.06.00 LANGUAGES C CXX)
 
 # Needed because GoogleBenchmark changes the state of FindThreads.cmake,
 # causing subsequent runs to have different values for the `Threads::Threads` target.
@@ -293,7 +293,9 @@ add_library(cudf
     src/reductions/nth_element.cu
     src/reductions/product.cu
     src/reductions/reductions.cpp
-    src/reductions/scan.cu
+    src/reductions/scan/scan.cpp
+    src/reductions/scan/scan_exclusive.cu
+    src/reductions/scan/scan_inclusive.cu
     src/reductions/std.cu
     src/reductions/sum.cu
     src/reductions/sum_of_squares.cu

diff --git a/cpp/cmake/thrust.patch b/cpp/cmake/thrust.patch
@@ -42,6 +42,45 @@ index 1ffeef0..5e80800 100644
            for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ++ITEM)
            {
              if (ITEMS_PER_THREAD * tid + ITEM < num_remaining)
+diff a/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/device/dispatch/dispatch_radix_sort.cuh
+index 41eb1d2..f2893b4 100644
+--- a/cub/device/dispatch/dispatch_radix_sort.cuh
++++ b/cub/device/dispatch/dispatch_radix_sort.cuh
+@@ -723,7 +723,7 @@ struct DeviceRadixSortPolicy
+
+
+     /// SM60 (GP100)
+-    struct Policy600 : ChainedPolicy<600, Policy600, Policy500>
++    struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
+     {
+         enum {
+             PRIMARY_RADIX_BITS      = (sizeof(KeyT) > 1) ? 7 : 5,    // 6.9B 32b keys/s (Quadro P100)
+diff a/cub/device/dispatch/dispatch_reduce.cuh b/cub/device/dispatch/dispatch_reduce.cuh
+index f6aee45..dd64301 100644
+--- a/cub/device/dispatch/dispatch_reduce.cuh
++++ b/cub/device/dispatch/dispatch_reduce.cuh
+@@ -284,7 +284,7 @@ struct DeviceReducePolicy
+     };
+
+     /// SM60
+-    struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
++    struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
+     {
+         // ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items)
+         typedef AgentReducePolicy<
+diff a/cub/device/dispatch/dispatch_scan.cuh b/cub/device/dispatch/dispatch_scan.cuh
+index c0c6d59..937ee31 100644
+--- a/cub/device/dispatch/dispatch_scan.cuh
++++ b/cub/device/dispatch/dispatch_scan.cuh
+@@ -178,7 +178,7 @@ struct DeviceScanPolicy
+     };
+
+     /// SM600
+-    struct Policy600 : ChainedPolicy<600, Policy600, Policy520>
++    struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
+     {
+         typedef AgentScanPolicy<
+                 128, 15,                                        ///< Threads per block, items per thread
 diff --git a/thrust/system/cuda/detail/scan_by_key.h b/thrust/system/cuda/detail/scan_by_key.h
 index fe4b321c..b3974c69 100644
 --- a/thrust/system/cuda/detail/scan_by_key.h

diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "libcudf"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 0.20.0
+PROJECT_NUMBER         = 21.06.00
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
@@ -2167,7 +2167,7 @@ SKIP_FUNCTION_MACROS   = YES
 # the path). If a tag file is not located in the directory in which doxygen is
 # run, you must also specify the path to the tagfile here.
 
-TAGFILES               = rmm.tag=https://docs.rapids.ai/api/librmm/0.20
+TAGFILES               = rmm.tag=https://docs.rapids.ai/api/librmm/21.06
 
 # When a file name is specified after GENERATE_TAGFILE, doxygen will create a
 # tag file that is based on the input files it reads. See section "Linking to

diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/column/column_view.hpp>
+#include <cudf/detail/aggregation/aggregation.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+namespace cudf {
+namespace detail {
+
+/**
+ * @brief Computes the exclusive scan of a column.
+ *
+ * The null values are skipped for the operation, and if an input element
+ * at `i` is null, then the output element at `i` will also be null.
+ *
+ * The identity value for the column type as per the aggregation type
+ * is used for the value of the first element in the output column.
+ *
+ * @throws cudf::logic_error if column data_type is not an arithmetic type.
+ *
+ * @param input The input column view for the scan
+ * @param agg unique_ptr to aggregation operator applied by the scan
+ * @param null_handling Exclude null values when computing the result if
+ *                      null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE.
+ *                      Any operation with a null results in a null.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @returns Column with scan results
+ */
+std::unique_ptr<column> scan_exclusive(column_view const& input,
+                                       std::unique_ptr<aggregation> const& agg,
+                                       null_policy null_handling,
+                                       rmm::cuda_stream_view stream,
+                                       rmm::mr::device_memory_resource* mr);
+
+/**
+ * @brief Computes the inclusive scan of a column.
+ *
+ * The null values are skipped for the operation, and if an input element
+ * at `i` is null, then the output element at `i` will also be null.
+ *
+ * String columns are allowed with aggregation types Min and Max.
+ *
+ * @throws cudf::logic_error if column data_type is not an arithmetic type
+ *                           or string type but the `agg` is not Min or Max
+ *
+ * @param input The input column view for the scan
+ * @param agg unique_ptr to aggregation operator applied by the scan
+ * @param null_handling Exclude null values when computing the result if
+ *                      null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE.
+ *                      Any operation with a null results in a null.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @returns Column with scan results
+ */
+std::unique_ptr<column> scan_inclusive(column_view const& input,
+                                       std::unique_ptr<aggregation> const& agg,
+                                       null_policy null_handling,
+                                       rmm::cuda_stream_view stream,
+                                       rmm::mr::device_memory_resource* mr);
+
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp
@@ -184,28 +184,27 @@ std::unique_ptr<column> concatenate(
  * s = [ {'aa', 'bb', 'cc'}, null, {'', 'dd'}, {'ee', null}, {'ff', 'gg'} ]
  * sep  = ['::', '%%',  '!',  '*',  null]
  *
- * r1 = concatenate(s, sep)
+ * r1 = strings::concatenate_list_elements(s, sep)
  * r1 is ['aa::bb::cc', null, '!dd', null, null]
  *
- * r2 = concatenate(s, sep, ':', '_')
+ * r2 = strings::concatenate_list_elements(s, sep, ':', '_')
  * r2 is ['aa::bb::cc', null,  '!dd', 'ee*_', 'ff:gg']
  * @endcode
  *
  * @throw cudf::logic_error if input column is not lists of strings column.
  * @throw cudf::logic_error if the number of rows from `separators` and `lists_strings_column` do
- * not match
- *
- * @param lists_strings_column   Column containing lists of strings to concatenate
- * @param separators             Strings column that provides separators for concatenation
- * @param separator_narep        String that should be used to replace null separator, default is an
- * invalid-scalar denoting that rows containing null separator will result in null string in the
- * corresponding output rows
- * @param string_narep           String that should be used to replace null strings in any
- * non-null list row, default is an invalid-scalar denoting that list rows containing null strings
- * will result in null string in the corresponding output rows
- * @param mr                     Device memory resource used to allocate the returned column's
- * device memory
- * @return                       New strings column with concatenated results
+ *        not match
+ *
+ * @param lists_strings_column Column containing lists of strings to concatenate.
+ * @param separators Strings column that provides separators for concatenation.
+ * @param separator_narep String that should be used to replace null separator, default is an
+ *        invalid-scalar denoting that rows containing null separator will result in null string in
+ *        the corresponding output rows.
+ * @param string_narep String that should be used to replace null strings in any non-null list row,
+ *        default is an invalid-scalar denoting that list rows containing null strings will result
+ *        in null string in the corresponding output rows.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return New strings column with concatenated results.
  */
 std::unique_ptr<column> concatenate_list_elements(
   const lists_column_view& lists_strings_column,
@@ -229,25 +228,24 @@ std::unique_ptr<column> concatenate_list_elements(
  * Example:
  * s = [ {'aa', 'bb', 'cc'}, null, {'', 'dd'}, {'ee', null}, {'ff'} ]
  *
- * r1 = concatenate(s)
+ * r1 = strings::concatenate_list_elements(s)
  * r1 is ['aabbcc', null, 'dd', null, 'ff']
  *
- * r2 = concatenate(s, ':', '_')
+ * r2 = strings::concatenate_list_elements(s, ':', '_')
  * r2 is ['aa:bb:cc', null,  ':dd', 'ee:_', 'ff']
  * @endcode
  *
  * @throw cudf::logic_error if input column is not lists of strings column.
  * @throw cudf::logic_error if separator is not valid.
  *
- * @param lists_strings_column   Column containing lists of strings to concatenate
- * @param separator              String that should inserted between strings of each list row,
- * default is an empty string
- * @param narep                  String that should be used to replace null strings in any non-null
- * list row, default is an invalid-scalar denoting that list rows containing null strings will
- * result in null string in the corresponding output rows
- * @param mr                     Device memory resource used to allocate the returned column's
- * device memory
- * @return                       New strings column with concatenated results
+ * @param lists_strings_column Column containing lists of strings to concatenate.
+ * @param separator String that should inserted between strings of each list row, default is an
+ *        empty string.
+ * @param narep String that should be used to replace null strings in any non-null list row, default
+ *        is an invalid-scalar denoting that list rows containing null strings will result in null
+ *        string in the corresponding output rows.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return New strings column with concatenated results.
  */
 std::unique_ptr<column> concatenate_list_elements(
   const lists_column_view& lists_strings_column,

diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt
@@ -15,7 +15,7 @@
 #=============================================================================
 cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
 
-project(CUDA_KAFKA VERSION 0.20.0 LANGUAGES CXX)
+project(CUDA_KAFKA VERSION 21.06.00 LANGUAGES CXX)
 
 ###################################################################################################
 # - Build options

diff --git a/cpp/src/lists/concatenate_rows.cu b/cpp/src/lists/concatenate_rows.cu
@@ -37,6 +37,10 @@ namespace cudf {
 namespace lists {
 namespace detail {
 namespace {
+/**
+ * @brief Concatenate lists within the same row into one list, ignoring any null list during
+ * concatenation.
+ */
 std::unique_ptr<column> concatenate_rows_ignore_null(table_view const& input,
                                                      bool has_null_mask,
                                                      rmm::cuda_stream_view stream,
@@ -57,7 +61,7 @@ std::unique_ptr<column> concatenate_rows_ignore_null(table_view const& input,
   auto const d_offsets = list_offsets->mutable_view().template begin<offset_type>();
 
   // The array of int8_t to store validities for list elements.
-  // Since we combine multiple lists, we need to recompute list validities.
+  // Since we combine multiple lists, we may need to recompute list validities.
   auto validities = rmm::device_uvector<int8_t>(has_null_mask ? num_output_lists : 0, stream);
 
   // For an input table of `n` columns, if after interleaving we have the list offsets are
@@ -169,8 +173,8 @@ generate_list_offsets_and_validities(table_view const& input,
  *
  * This functor is called only when (has_null_mask == true and null_policy == NULLIFY_OUTPUT_ROW).
  * It is executed twice. In the first pass, the sizes and validities of the output strings will be
- * computed. In the second pass, this will concatenate the lists of strings of the given table of
- * lists columns in a row-wise manner.
+ * computed. In the second pass, this will concatenate the lists of strings on the same row from the
+ * given input table.
  */
 struct compute_string_sizes_and_concatenate_lists_fn {
   table_device_view const table_dv;
@@ -182,16 +186,15 @@ struct compute_string_sizes_and_concatenate_lists_fn {
   offset_type* d_offsets{nullptr};
 
   // If d_chars == nullptr: only compute sizes and validities of the output strings.
-  // If d_chars != nullptr: only concatenate strings.
+  // If d_chars != nullptr: only concatenate lists of strings.
   char* d_chars{nullptr};
 
   // We need to set `1` or `0` for the validities of the strings in the child column.
   int8_t* d_validities{nullptr};
 
   __device__ void operator()(size_type const idx)
   {
-    // The current row contain null, which has been identified during `dst_list_offsets`
-    // computation.
+    // The current row contain null, which has been identified during offsets computation.
     if (dst_list_offsets[idx + 1] == dst_list_offsets[idx]) { return; }
 
     // read_idx and write_idx are indices of string elements.
@@ -205,7 +208,7 @@ struct compute_string_sizes_and_concatenate_lists_fn {
         auto const str_offsets =
           str_col.child(strings_column_view::offsets_column_index).template data<offset_type>();
 
-        // The indices of the strings within the source list.
+        // The range of indices of the strings within the source list.
         auto const start_str_idx = list_offsets[idx];
         auto const end_str_idx   = list_offsets[idx + 1];
 
@@ -305,7 +308,7 @@ struct concatenate_lists_fn {
                                       lists_col.offset();
             auto const& data_col = lists_col.child(lists_column_view::child_column_index);
 
-            // The indices of the entries within the source list.
+            // The range of indices of the entries within the source list.
             auto const start_idx = list_offsets[idx];
             auto const end_idx   = list_offsets[idx + 1];
 

diff --git a/cpp/src/lists/interleave_columns.cu b/cpp/src/lists/interleave_columns.cu
@@ -106,7 +106,7 @@ struct compute_string_sizes_and_interleave_lists_fn {
   offset_type* d_offsets{nullptr};
 
   // If d_chars == nullptr: only compute sizes and validities of the output strings.
-  // If d_chars != nullptr: only concatenate strings.
+  // If d_chars != nullptr: only interleave lists of strings.
   char* d_chars{nullptr};
 
   // We need to set `1` or `0` for the validities of the strings in the child column.
@@ -128,7 +128,7 @@ struct compute_string_sizes_and_interleave_lists_fn {
     auto const str_offsets =
       str_col.child(strings_column_view::offsets_column_index).template data<offset_type>();
 
-    // The indices of the strings within the source list.
+    // The range of indices of the strings within the source list.
     auto const start_str_idx = list_offsets[list_id];
     auto const end_str_idx   = list_offsets[list_id + 1];
 
@@ -243,9 +243,10 @@ struct interleave_list_entries_fn {
           lists_col.offset();
         auto const& data_col = lists_col.child(lists_column_view::child_column_index);
 
-        // The indices of the entries within the source list.
-        auto const start_idx   = list_offsets[list_id];
-        auto const end_idx     = list_offsets[list_id + 1];
+        // The range of indices of the entries within the source list.
+        auto const start_idx = list_offsets[list_id];
+        auto const end_idx   = list_offsets[list_id + 1];
+
         auto const write_start = d_offsets[idx];
 
         // Fill the validities array if necessary.