Merge branch 'branch-0.18' into cs_nested_empty_strings

rapidsai · Jan 4, 2021 · ed6d025 · ed6d025
2 parents 523aef7 + fc92bb9
commit ed6d025
Show file tree

Hide file tree

Showing 249 changed files with 5,919 additions and 4,018 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,22 +2,30 @@
 
 ## New Features
 - PR #6856 Add groupby idxmin, idxmax aggregation
-
 - PR #6847 Add a cmake find module for cuFile in JNI code
 - PR #6902 Implement `DataFrame.quantile` for `datetime` and `timedelta` data types
 - PR #6814 Implement `cudf::reduce` for `decimal32` and `decimal64` (part 1)
 - PR #6929 Add `Index.set_names` api
 - PR #6907 Add `replace_null` API with `replace_policy` parameter, `fixed_width` column support
+- PR #6885 Share `factorize` implementation with Index and cudf module
+
+- PR #6775 Implement cudf.DateOffset for months
 
 ## Improvements
 
+- PR #6938 Pass numeric scalars of the same dtype through numeric binops
 - PR #6275 Update to official libcu++ on Github
 - PR #6838 Fix `columns` & `index` handling in dataframe constructor
 - PR #6750 Remove **kwargs from string/categorical methods
+- PR #6909 Support reading byte array backed decimal columns from parquet files
 - PR #6939 Use simplified `rmm::exec_policy`
+- PR #6512 Refactor rolling.cu to reduce compile time
+- PR #6982 Disable some pragma unroll statements in thrust `sort.h`
 
 ## Bug Fixes
 
+- PR #6884 Correct the sampling range when sampling with replacement
+- PR #6903 Add null count test for apply_boolean_mask
 - PR #6922 Fix N/A detection for empty fields in CSV reader
 - PR #6912 Fix rmm_mode=managed parameter for gtests
 - PR #6943 Fix join with nulls not equal performance

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -205,15 +205,15 @@ fi
 
 cd $WORKSPACE/python/cudf
 gpuci_logger "Python py.test for cuDF"
-py.test --cache-clear --basetemp=${WORKSPACE}/cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-cudf.xml -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:${WORKSPACE}/python/cudf/cudf-coverage.xml --cov-report term
+py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-cudf.xml -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:${WORKSPACE}/python/cudf/cudf-coverage.xml --cov-report term
 
 cd $WORKSPACE/python/dask_cudf
 gpuci_logger "Python py.test for dask-cudf"
-py.test --cache-clear --basetemp=${WORKSPACE}/dask-cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-dask-cudf.xml -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:${WORKSPACE}/python/dask_cudf/dask-cudf-coverage.xml --cov-report term
+py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/dask-cudf-cuda-tmp --junitxml=${WORKSPACE}/junit-dask-cudf.xml -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:${WORKSPACE}/python/dask_cudf/dask-cudf-coverage.xml --cov-report term
 
 cd $WORKSPACE/python/custreamz
 gpuci_logger "Python py.test for cuStreamz"
-py.test --cache-clear --basetemp=${WORKSPACE}/custreamz-cuda-tmp --junitxml=${WORKSPACE}/junit-custreamz.xml -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:${WORKSPACE}/python/custreamz/custreamz-coverage.xml --cov-report term
+py.test -n 6 --cache-clear --basetemp=${WORKSPACE}/custreamz-cuda-tmp --junitxml=${WORKSPACE}/junit-custreamz.xml -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:${WORKSPACE}/python/custreamz/custreamz-coverage.xml --cov-report term
 
 gpuci_logger "Test notebooks"
 ${WORKSPACE}/ci/gpu/test-notebooks.sh 2>&1 | tee nbtest.log

diff --git a/conda/environments/cudf_dev_cuda10.1.yml b/conda/environments/cudf_dev_cuda10.1.yml
@@ -23,6 +23,7 @@ dependencies:
   - fsspec>=0.6.0
   - pytest
   - pytest-benchmark
+  - pytest-xdist
   - sphinx
   - sphinx_rtd_theme
   - sphinxcontrib-websupport

diff --git a/conda/environments/cudf_dev_cuda10.2.yml b/conda/environments/cudf_dev_cuda10.2.yml
@@ -23,6 +23,7 @@ dependencies:
   - fsspec>=0.6.0
   - pytest
   - pytest-benchmark
+  - pytest-xdist
   - sphinx
   - sphinx_rtd_theme
   - sphinxcontrib-websupport

diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
@@ -23,6 +23,7 @@ dependencies:
   - fsspec>=0.6.0
   - pytest
   - pytest-benchmark
+  - pytest-xdist
   - sphinx
   - sphinx_rtd_theme
   - sphinxcontrib-websupport

diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml
@@ -26,7 +26,7 @@ requirements:
     - cmake >=3.17.0
   host:
     - libcudf {{ version }}
-    - librdkafka 1.5
+    - librdkafka >=1.5.0,<1.5.3
   run:
     - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not
 

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -164,7 +164,8 @@ ConfigureBench(SEARCH_BENCH "${SEARCH_BENCH_SRC}")
 # - sort benchmark --------------------------------------------------------------------------------
 
 set(SORT_BENCH_SRC
-  "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cu")
+  "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cu"
+  "${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_strings_benchmark.cu")
 
 ConfigureBench(SORT_BENCH "${SORT_BENCH_SRC}")
 

diff --git a/cpp/benchmarks/common/generate_benchmark_input.cpp b/cpp/benchmarks/common/generate_benchmark_input.cpp
@@ -307,7 +307,7 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
  */
 struct string_column_data {
   std::vector<char> chars;
-  std::vector<int32_t> offsets;
+  std::vector<cudf::size_type> offsets;
   std::vector<cudf::bitmask_type> null_mask;
   explicit string_column_data(cudf::size_type rows, cudf::size_type size)
   {

diff --git a/cpp/benchmarks/sort/sort_strings_benchmark.cu b/cpp/benchmarks/sort/sort_strings_benchmark.cu
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmark/benchmark.h>
+#include <benchmarks/common/generate_benchmark_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
+#include <cudf/sorting.hpp>
+#include <cudf/types.hpp>
+
+class SortStrings : public cudf::benchmark {
+};
+
+static void BM_sort(benchmark::State& state)
+{
+  cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
+
+  auto const table = create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows});
+
+  for (auto _ : state) {
+    cuda_event_timer raii(state, true, 0);
+    cudf::sort(table->view());
+  }
+}
+
+#define SORT_BENCHMARK_DEFINE(name)          \
+  BENCHMARK_DEFINE_F(SortStrings, name)      \
+  (::benchmark::State & st) { BM_sort(st); } \
+  BENCHMARK_REGISTER_F(SortStrings, name)    \
+    ->RangeMultiplier(8)                     \
+    ->Ranges({{1 << 10, 1 << 24}})           \
+    ->UseManualTime()                        \
+    ->Unit(benchmark::kMillisecond);
+
+SORT_BENCHMARK_DEFINE(stringssort)
diff --git a/cpp/benchmarks/synchronization/synchronization.hpp b/cpp/benchmarks/synchronization/synchronization.hpp
@@ -17,7 +17,7 @@
 /**
  * @file synchronization.hpp
  * @brief This is the header file for `cuda_event_timer`.
- **/
+ */
 
 /**
  * @brief  This class serves as a wrapper for using `cudaEvent_t` as the user
@@ -54,7 +54,7 @@
     BENCHMARK(sample_cuda_benchmark)->UseManualTime();
 
 
- **/
+ */
 
 #ifndef CUDF_BENCH_SYNCHRONIZATION_H
 #define CUDF_BENCH_SYNCHRONIZATION_H
@@ -79,7 +79,7 @@ class cuda_event_timer {
    * @param[in] flush_l2_cache_ whether or not to flush the L2 cache before
    *                            every iteration.
    * @param[in] stream_ The CUDA stream we are measuring time on.
-   **/
+   */
   cuda_event_timer(benchmark::State& state,
                    bool flush_l2_cache,
                    rmm::cuda_stream_view stream = rmm::cuda_stream_default);

diff --git a/cpp/docs/TRANSITIONGUIDE.md b/cpp/docs/TRANSITIONGUIDE.md
@@ -777,7 +777,7 @@ namespace experimental{
  * @param mr Memory resource used to allocate device memory for the returned
  * output column
  * @return std::unique_ptr<column> Newly allocated output column
- **/
+ */
 std::unique_ptr<column> new_function(cudf::column_view input, 
                                      cudf::mutable_column_view in_out, 
                                      cudf::table_view input_table,

diff --git a/cpp/include/cudf/ast/detail/linearizer.hpp b/cpp/include/cudf/ast/detail/linearizer.hpp
@@ -39,7 +39,6 @@ namespace detail {
  *
  * This enum is device-specific. For instance, intermediate data references are generated by the
  * linearization process but cannot be explicitly created by the user.
- *
  */
 enum class device_data_reference_type {
   COLUMN,       // A value in a table column
@@ -52,7 +51,6 @@ enum class device_data_reference_type {
  *
  * This is a POD class used to create references describing data type and locations for consumption
  * by the `row_evaluator`.
- *
  */
 struct alignas(8) device_data_reference {
   device_data_reference(device_data_reference_type reference_type,
@@ -85,7 +83,6 @@ class linearizer;
  *
  * This class is a part of a "visitor" pattern with the `linearizer` class.
  * Nodes inheriting from this class can accept visitors.
- *
  */
 class node {
   friend class detail::linearizer;
@@ -104,7 +101,6 @@ class node {
  * the nodes and constructing vectors of information that are later used by the device for
  * evaluating the abstract syntax tree as a "linear" list of operators whose input dependencies are
  * resolved into intermediate data storage in shared memory.
- *
  */
 class linearizer {
   friend class literal;

diff --git a/cpp/include/cudf/ast/detail/operators.hpp b/cpp/include/cudf/ast/detail/operators.hpp
@@ -931,7 +931,6 @@ struct dispatch_unary_operator_types {
 
 /**
  * @brief Functor performing a type dispatch for a unary operator.
- *
  */
 struct type_dispatch_unary_op {
   template <ast_operator op, typename F, typename... Ts>
@@ -968,7 +967,6 @@ CUDA_HOST_DEVICE_CALLABLE constexpr void unary_operator_dispatcher(ast_operator
 
 /**
  * @brief Functor to determine the return type of an operator from its input types.
- *
  */
 struct return_type_functor {
   /**
@@ -1057,7 +1055,6 @@ inline cudf::data_type ast_operator_return_type(ast_operator op,
 
 /**
  * @brief Functor to determine the arity (number of operands) of an operator.
- *
  */
 struct arity_functor {
   template <ast_operator op>

diff --git a/cpp/include/cudf/ast/detail/transform.cuh b/cpp/include/cudf/ast/detail/transform.cuh
@@ -126,7 +126,6 @@ struct binary_row_output : public row_output {
  * This class is designed for n-ary transform evaluation. Currently this class assumes that there's
  * only one relevant "row index" in its methods, which corresponds to a row in a single input table
  * and the same row index in an output column.
- *
  */
 struct row_evaluator {
   friend struct row_output;

diff --git a/cpp/include/cudf/ast/linearizer.hpp b/cpp/include/cudf/ast/linearizer.hpp
@@ -32,7 +32,6 @@ namespace ast {
  * @brief Enum of table references.
  *
  * This determines which table to use in cases with two tables (e.g. joins).
- *
  */
 enum class table_reference {
   LEFT,   // Column index in the left table
@@ -47,7 +46,6 @@ class expression;
 
 /**
  * @brief A literal value used in an abstract syntax tree.
- *
  */
 class literal : public detail::node {
   friend class detail::linearizer;
@@ -114,7 +112,6 @@ class literal : public detail::node {
 
 /**
  * @brief A node referring to data from a column in a table.
- *
  */
 class column_reference : public detail::node {
   friend class detail::linearizer;
@@ -194,7 +191,6 @@ class column_reference : public detail::node {
 
 /**
  * @brief An expression node holds an operator and zero or more operands.
- *
  */
 class expression : public detail::node {
   friend class detail::linearizer;

diff --git a/cpp/include/cudf/ast/operators.hpp b/cpp/include/cudf/ast/operators.hpp
@@ -21,7 +21,6 @@ namespace ast {
 
 /**
  * @brief Enum of supported operators.
- *
  */
 enum class ast_operator {
   // Binary operators

diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp
@@ -53,7 +53,7 @@ class column {
    * @brief Construct a new column by deep copying the contents of `other`.
    *
    * @param other The column to copy
-   **/
+   */
   column(column const& other);
 
   /**
@@ -77,7 +77,7 @@ class column {
    * After the move, `other.size() == 0` and `other.type() = {EMPTY}`
    *
    * @param other The column whose contents will be moved into the new column
-   **/
+   */
   column(column&& other) noexcept;
 
   /**
@@ -95,7 +95,7 @@ class column {
    * `UNKNOWN_NULL_COUNT` to indicate that the null count should be computed on
    * the first invocation of `null_count()`.
    * @param children Optional, vector of child columns
-   **/
+   */
   template <typename B1, typename B2 = rmm::device_buffer>
   column(data_type dtype,
          size_type size,

diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
@@ -50,7 +50,6 @@ namespace detail {
  * not-obvious computation of null count, which could lead to undesirable performance issues.
  * This information is also generally not needed in device code, and on the host-side
  * is easily accessible from the associated column_view.
- *
  */
 class alignas(16) column_device_view_base {
  public:
@@ -795,6 +794,29 @@ __device__ inline numeric::decimal64 const column_device_view::element<numeric::
 }
 
 namespace detail {
+
+/**
+ * @brief Convenience function to get offset word from a bitmask
+ *
+ * @see copy_offset_bitmask
+ * @see offset_bitmask_and
+ */
+__device__ inline bitmask_type get_mask_offset_word(bitmask_type const* __restrict__ source,
+                                                    size_type destination_word_index,
+                                                    size_type source_begin_bit,
+                                                    size_type source_end_bit)
+{
+  size_type source_word_index = destination_word_index + word_index(source_begin_bit);
+  bitmask_type curr_word      = source[source_word_index];
+  bitmask_type next_word      = 0;
+  if (word_index(source_end_bit) >
+      word_index(source_begin_bit +
+                 destination_word_index * detail::size_in_bits<bitmask_type>())) {
+    next_word = source[source_word_index + 1];
+  }
+  return __funnelshift_r(curr_word, next_word, source_begin_bit);
+}
+
 /**
  * @brief value accessor of column without null bitmask
  * A unary functor returns scalar value at `id`.

diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
@@ -583,7 +583,6 @@ std::unique_ptr<cudf::column> make_lists_column(
  * @param[in] null_mask The bits specifying the null struct values in the column.
  * @param[in] stream Optional stream for use with all memory allocation and device kernels.
  * @param[in] mr Optional resource to use for device memory allocation.
- *
  */
 std::unique_ptr<cudf::column> make_structs_column(
   size_type num_rows,
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,7 +21,6 @@ namespace ast { @@
     /**
      * @brief Enum of supported operators.
-     *
      */
     enum class ast_operator {
       // Binary operators
@@ Expand Down @@