Merge branch 'branch-22.02' of https://github.com/rapidsai/cudf into …

…fea-parquet-dec128
rapidsai · Dec 7, 2021 · 8486647 · 8486647
2 parents df8a8e3 + a72f19e
commit 8486647
Show file tree

Hide file tree

Showing 111 changed files with 1,819 additions and 1,610 deletions.
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
@@ -30,13 +30,13 @@ function sed_runner() {
 }
 
 # cpp update
-sed_runner 's/'"CUDF VERSION .* LANGUAGES"'/'"CUDF VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/CMakeLists.txt
+sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/CMakeLists.txt
 
 # cpp libcudf_kafka update
-sed_runner 's/'"CUDA_KAFKA VERSION .* LANGUAGES"'/'"CUDA_KAFKA VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' cpp/libcudf_kafka/CMakeLists.txt
+sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/libcudf_kafka/CMakeLists.txt
 
 # cpp cudf_jni update
-sed_runner 's/'"CUDF_JNI VERSION .* LANGUAGES"'/'"CUDF_JNI VERSION ${NEXT_FULL_TAG} LANGUAGES"'/g' java/src/main/native/CMakeLists.txt
+sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' java/src/main/native/CMakeLists.txt
 
 # rapids-cmake version
 sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cmake"'/g' fetch_rapids.cmake

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -25,7 +25,7 @@ rapids_cuda_init_architectures(CUDF)
 
 project(
   CUDF
-  VERSION 21.12.00
+  VERSION 22.02.00
   LANGUAGES C CXX CUDA
 )
 
@@ -185,6 +185,7 @@ add_library(
   src/binaryop/compiled/LogicalOr.cu
   src/binaryop/compiled/Mod.cu
   src/binaryop/compiled/Mul.cu
+  src/binaryop/compiled/NullEquals.cu
   src/binaryop/compiled/NullMax.cu
   src/binaryop/compiled/NullMin.cu
   src/binaryop/compiled/PMod.cu

diff --git a/cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp
@@ -50,14 +50,14 @@ void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
 }
 
 // TODO tparam boolean for null.
-#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut)                    \
+#define BINARYOP_BENCHMARK_DEFINE(name, TypeLhs, TypeRhs, binop, TypeOut)              \
   BENCHMARK_TEMPLATE_DEFINE_F(                                                         \
-    COMPILED_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop) \
+    COMPILED_BINARYOP, name, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop)  \
   (::benchmark::State & st)                                                            \
   {                                                                                    \
     BM_compiled_binaryop<TypeLhs, TypeRhs, TypeOut>(st, cudf::binary_operator::binop); \
   }                                                                                    \
-  BENCHMARK_REGISTER_F(COMPILED_BINARYOP, binop)                                       \
+  BENCHMARK_REGISTER_F(COMPILED_BINARYOP, name)                                        \
     ->Unit(benchmark::kMicrosecond)                                                    \
     ->UseManualTime()                                                                  \
     ->Arg(10000)      /* 10k */                                                        \
@@ -70,30 +70,36 @@ using namespace cudf;
 using namespace numeric;
 
 // clang-format off
-BINARYOP_BENCHMARK_DEFINE(float,        int64_t,      ADD,                  int32_t);
-BINARYOP_BENCHMARK_DEFINE(duration_s,   duration_D,   SUB,                  duration_ms);
-BINARYOP_BENCHMARK_DEFINE(float,        float,        MUL,                  int64_t);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      DIV,                  int64_t);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      TRUE_DIV,             int64_t);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      FLOOR_DIV,            int64_t);
-BINARYOP_BENCHMARK_DEFINE(double,       double,       MOD,                  double);
-BINARYOP_BENCHMARK_DEFINE(int32_t,      int64_t,      PMOD,                 double);
-BINARYOP_BENCHMARK_DEFINE(int32_t,      uint8_t,      PYMOD,                int64_t);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      POW,                  double);
-BINARYOP_BENCHMARK_DEFINE(float,        double,       LOG_BASE,             double);
-BINARYOP_BENCHMARK_DEFINE(float,        double,       ATAN2,                double);
-BINARYOP_BENCHMARK_DEFINE(int,          int,          SHIFT_LEFT,           int);
-BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      SHIFT_RIGHT,          int);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int32_t,      SHIFT_RIGHT_UNSIGNED, int64_t);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int32_t,      BITWISE_AND,          int16_t);
-BINARYOP_BENCHMARK_DEFINE(int16_t,      int32_t,      BITWISE_OR,           int64_t);
-BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      BITWISE_XOR,          int32_t);
-BINARYOP_BENCHMARK_DEFINE(double,       int8_t,       LOGICAL_AND,          bool);
-BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      LOGICAL_OR,           bool);
-BINARYOP_BENCHMARK_DEFINE(duration_ms,  duration_ns,  EQUAL,                bool);
-BINARYOP_BENCHMARK_DEFINE(decimal32,    decimal32,    NOT_EQUAL,            bool);
-BINARYOP_BENCHMARK_DEFINE(timestamp_s,  timestamp_s,  LESS,                 bool);
-BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s,  GREATER,              bool);
-BINARYOP_BENCHMARK_DEFINE(duration_ms,  duration_ns,  NULL_EQUALS,          bool);
-BINARYOP_BENCHMARK_DEFINE(decimal32,    decimal32,    NULL_MAX,             decimal32);
-BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s,   NULL_MIN,             timestamp_s);
+BINARYOP_BENCHMARK_DEFINE(ADD_1,          float,        float,        ADD,                  float);
+BINARYOP_BENCHMARK_DEFINE(ADD_2,          timestamp_s,  duration_s,   ADD,                  timestamp_s);
+BINARYOP_BENCHMARK_DEFINE(SUB_1,          duration_s,   duration_D,   SUB,                  duration_ms);
+BINARYOP_BENCHMARK_DEFINE(SUB_2,          int64_t,      int64_t,      SUB,                  int64_t);
+BINARYOP_BENCHMARK_DEFINE(MUL_1,          float,        float,        MUL,                  int64_t);
+BINARYOP_BENCHMARK_DEFINE(MUL_2,          duration_s,   int64_t,      MUL,                  duration_s);
+BINARYOP_BENCHMARK_DEFINE(DIV_1,          int64_t,      int64_t,      DIV,                  int64_t);
+BINARYOP_BENCHMARK_DEFINE(DIV_2,          duration_ms,  int32_t,      DIV,                  duration_ms);
+BINARYOP_BENCHMARK_DEFINE(TRUE_DIV,       int64_t,      int64_t,      TRUE_DIV,             int64_t);
+BINARYOP_BENCHMARK_DEFINE(FLOOR_DIV,      int64_t,      int64_t,      FLOOR_DIV,            int64_t);
+BINARYOP_BENCHMARK_DEFINE(MOD_1,          double,       double,       MOD,                  double);
+BINARYOP_BENCHMARK_DEFINE(MOD_2,          duration_ms,  int64_t,      MOD,                  duration_ms);
+BINARYOP_BENCHMARK_DEFINE(PMOD,           int32_t,      int64_t,      PMOD,                 double);
+BINARYOP_BENCHMARK_DEFINE(PYMOD,          int32_t,      uint8_t,      PYMOD,                int64_t);
+BINARYOP_BENCHMARK_DEFINE(POW,            int64_t,      int64_t,      POW,                  double);
+BINARYOP_BENCHMARK_DEFINE(LOG_BASE,       float,        double,       LOG_BASE,             double);
+BINARYOP_BENCHMARK_DEFINE(ATAN2,          float,        double,       ATAN2,                double);
+BINARYOP_BENCHMARK_DEFINE(SHIFT_LEFT,     int,          int,          SHIFT_LEFT,           int);
+BINARYOP_BENCHMARK_DEFINE(SHIFT_RIGHT,    int16_t,      int64_t,      SHIFT_RIGHT,          int);
+BINARYOP_BENCHMARK_DEFINE(USHIFT_RIGHT,   int64_t,      int32_t,      SHIFT_RIGHT_UNSIGNED, int64_t);
+BINARYOP_BENCHMARK_DEFINE(BITWISE_AND,    int64_t,      int32_t,      BITWISE_AND,          int16_t);
+BINARYOP_BENCHMARK_DEFINE(BITWISE_OR,     int16_t,      int32_t,      BITWISE_OR,           int64_t);
+BINARYOP_BENCHMARK_DEFINE(BITWISE_XOR,    int16_t,      int64_t,      BITWISE_XOR,          int32_t);
+BINARYOP_BENCHMARK_DEFINE(LOGICAL_AND,    double,       int8_t,       LOGICAL_AND,          bool);
+BINARYOP_BENCHMARK_DEFINE(LOGICAL_OR,     int16_t,      int64_t,      LOGICAL_OR,           bool);
+BINARYOP_BENCHMARK_DEFINE(EQUAL_1,        int32_t,      int64_t,      EQUAL,                bool);
+BINARYOP_BENCHMARK_DEFINE(EQUAL_2,        duration_ms,  duration_ns,  EQUAL,                bool);
+BINARYOP_BENCHMARK_DEFINE(NOT_EQUAL,      decimal32,    decimal32,    NOT_EQUAL,            bool);
+BINARYOP_BENCHMARK_DEFINE(LESS,           timestamp_s,  timestamp_s,  LESS,                 bool);
+BINARYOP_BENCHMARK_DEFINE(GREATER,        timestamp_ms, timestamp_s,  GREATER,              bool);
+BINARYOP_BENCHMARK_DEFINE(NULL_EQUALS,    duration_ms,  duration_ns,  NULL_EQUALS,          bool);
+BINARYOP_BENCHMARK_DEFINE(NULL_MAX,       decimal32,    decimal32,    NULL_MAX,             decimal32);
+BINARYOP_BENCHMARK_DEFINE(NULL_MIN,       timestamp_D,  timestamp_s,  NULL_MIN,             timestamp_s);
diff --git a/cpp/benchmarks/groupby/group_sum_benchmark.cu b/cpp/benchmarks/groupby/group_sum_benchmark.cu
@@ -44,7 +44,6 @@ void BM_basic_sum(benchmark::State& state)
 {
   using wrapper = cudf::test::fixed_width_column_wrapper<int64_t>;
 
-  // const cudf::size_type num_columns{(cudf::size_type)state.range(0)};
   const cudf::size_type column_size{(cudf::size_type)state.range(0)};
 
   auto data_it = cudf::detail::make_counting_transform_iterator(
@@ -53,7 +52,7 @@ void BM_basic_sum(benchmark::State& state)
   wrapper keys(data_it, data_it + column_size);
   wrapper vals(data_it, data_it + column_size);
 
-  cudf::groupby::groupby gb_obj(cudf::table_view({keys}));
+  cudf::groupby::groupby gb_obj(cudf::table_view({keys, keys, keys}));
 
   std::vector<cudf::groupby::aggregation_request> requests;
   requests.emplace_back(cudf::groupby::aggregation_request());
@@ -73,7 +72,9 @@ BENCHMARK_REGISTER_F(Groupby, Basic)
   ->UseManualTime()
   ->Unit(benchmark::kMillisecond)
   ->Arg(10000)
-  ->Arg(10000000);
+  ->Arg(1000000)
+  ->Arg(10000000)
+  ->Arg(100000000);
 
 void BM_pre_sorted_sum(benchmark::State& state)
 {

diff --git a/cpp/benchmarks/hashing/hash_benchmark.cpp b/cpp/benchmarks/hashing/hash_benchmark.cpp
@@ -25,27 +25,40 @@
 class HashBenchmark : public cudf::benchmark {
 };
 
-static void BM_hash(benchmark::State& state, cudf::hash_id hid)
+enum contains_nulls { no_nulls, nulls };
+
+static void BM_hash(benchmark::State& state, cudf::hash_id hid, contains_nulls has_nulls)
 {
   cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
   auto const data = create_random_table({cudf::type_id::INT64}, 1, row_count{n_rows});
+  if (has_nulls == contains_nulls::no_nulls)
+    data->get_column(0).set_null_mask(rmm::device_buffer{}, 0);
 
   for (auto _ : state) {
     cuda_event_timer raii(state, true, rmm::cuda_stream_default);
     cudf::hash(data->view(), hid);
   }
 }
 
-#define HASH_BENCHMARK_DEFINE(name)                               \
-  BENCHMARK_DEFINE_F(HashBenchmark, name)                         \
-  (::benchmark::State & st) { BM_hash(st, cudf::hash_id::name); } \
-  BENCHMARK_REGISTER_F(HashBenchmark, name)                       \
-    ->RangeMultiplier(4)                                          \
-    ->Ranges({{1 << 14, 1 << 24}})                                \
-    ->UseManualTime()                                             \
+#define concat(a, b, c) a##b##c
+
+#define H_BENCHMARK_DEFINE(name, hid, n)                                            \
+  BENCHMARK_DEFINE_F(HashBenchmark, name)                                           \
+  (::benchmark::State & st) { BM_hash(st, cudf::hash_id::hid, contains_nulls::n); } \
+  BENCHMARK_REGISTER_F(HashBenchmark, name)                                         \
+    ->RangeMultiplier(4)                                                            \
+    ->Ranges({{1 << 14, 1 << 24}})                                                  \
+    ->UseManualTime()                                                               \
     ->Unit(benchmark::kMillisecond);
 
-HASH_BENCHMARK_DEFINE(HASH_MURMUR3)
-HASH_BENCHMARK_DEFINE(HASH_MD5)
-HASH_BENCHMARK_DEFINE(HASH_SERIAL_MURMUR3)
-HASH_BENCHMARK_DEFINE(HASH_SPARK_MURMUR3)
+#define HASH_BENCHMARK_DEFINE(hid, n) H_BENCHMARK_DEFINE(concat(hid, _, n), hid, n)
+
+HASH_BENCHMARK_DEFINE(HASH_MURMUR3, nulls)
+HASH_BENCHMARK_DEFINE(HASH_MD5, nulls)
+HASH_BENCHMARK_DEFINE(HASH_SERIAL_MURMUR3, nulls)
+HASH_BENCHMARK_DEFINE(HASH_SPARK_MURMUR3, nulls)
+
+HASH_BENCHMARK_DEFINE(HASH_MURMUR3, no_nulls)
+HASH_BENCHMARK_DEFINE(HASH_MD5, no_nulls)
+HASH_BENCHMARK_DEFINE(HASH_SERIAL_MURMUR3, no_nulls)
+HASH_BENCHMARK_DEFINE(HASH_SPARK_MURMUR3, no_nulls)
diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake
@@ -21,7 +21,7 @@ function(find_and_configure_cucollections)
     cuco 0.0
     GLOBAL_TARGETS cuco::cuco
     CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections
-    GIT_TAG f0eecb203590f1f4ac4a9f1700229f4434ac64dc
+    GIT_TAG 6433e8ad7571f14cc5384051b049029c60dd1ce0
     OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF"
   )
 

diff --git a/cpp/docs/DOCUMENTATION.md b/cpp/docs/DOCUMENTATION.md
@@ -9,7 +9,7 @@ The following is the license header comment that should appear at the beginning
 
 ```c++
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,7 +30,7 @@ The comment should start with `/*` and not `/**` so it is not processed by doxyg
 Also, here are the rules for the copyright year.
 
 - A new file should have the year in which it was created
-- A modified file should span the year it was created and the year it was modified (e.g. `2019-2020`)
+- A modified file should span the year it was created and the year it was modified (e.g. `2019-2021`)
 
 Changing the copyright year may not be necessary if no content has changed (e.g. reformatting only).
 
@@ -48,7 +48,7 @@ Here are some of the custom options in the Doxyfile for libcudf.
 | Option | Setting | Description |
 | ------ | ------- | ----------- |
 | PROJECT_NAME | libcudf | Title used on the main page |
-| PROJECT_NUMBER | 0.14 | Version number |
+| PROJECT_NUMBER | 22.02.00 | Version number |
 | EXTENSION_MAPPING | cu=C++ cuh=C++ | Process `cu` and `cuh` as C++ |
 | INPUT | main_page.md regex.md unicode.md ../include | Embedded markdown files and source code directories to process |
 | FILE_PATTERNS | *.cpp *.hpp *.h *.c *.cu *.cuh | File extensions to process |
@@ -459,17 +459,14 @@ We recommend installing Doxygen using conda (`conda install doxygen`) or a Linux
 Alternatively you can [build and install doxygen from source](http://www.doxygen.nl/manual/install.html).
 
 To build the libcudf HTML documentation simply run the `doxygen` command from the `cpp/doxygen` directory containing the `Doxyfile`.
+The libcudf documentation can also be built using `make docs_cudf` from the cmake build directory (e.g. `cpp/build`).
 Doxygen reads and processes all appropriate source files under the `cpp/include/` directory.
 The output is generated in the `cpp/doxygen/html/` directory.
 You can load the local `index.html` file generated there into any web browser to view the result.
 
 To view docs built on a remote server, you can run a simple HTTP server using Python: `cd html && python -m http.server`.
 Then open `http://<IP address>:8000` in your local web browser, inserting the IP address of the machine on which you ran the HTTP server.
 
-By default, doxygen uses the `graphviz dot` tool to build diagrams of the class, namespace, and module relationships.
-If the `dot` tool cannot be found then doxygen generates output without diagrams.
-The doxygen installation page does not include instructions for downloading and installing `graphviz dot`.
-
 The doxygen output is intended for building documentation only for the public APIs and classes.
 For example, the output should not include documentation for `detail` or `/src` files, and these directories are excluded in the `Doxyfile` configuration.
 When published by the build/CI system, the doxygen output will appear on our external [RAPIDS web site](https://docs.rapids.ai/api/libcudf/stable/index.html).