Merge branch 'branch-0.20' into remove-boost

rapidsai · May 10, 2021 · db60e37 · db60e37
2 parents c7c3272 + 99df69f
commit db60e37
Show file tree

Hide file tree

Showing 268 changed files with 26,384 additions and 13,810 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -41,6 +41,12 @@ repos:
                 entry: mypy --config-file=python/cudf/setup.cfg python/cudf/cudf
                 language: system
                 types: [python]
+      -   repo: https://github.com/pycqa/pydocstyle
+          rev: 6.0.0
+          hooks:
+              - id: pydocstyle
+                args: ["--config=python/.flake8"]
+
 
 default_language_version:
       python: python3
diff --git a/ci/checks/style.sh b/ci/checks/style.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2018, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 #####################
 # cuDF Style Tester #
 #####################
@@ -33,6 +33,10 @@ FLAKE_CYTHON_RETVAL=$?
 MYPY_CUDF=`mypy --config=python/cudf/setup.cfg python/cudf/cudf`
 MYPY_CUDF_RETVAL=$?
 
+# Run pydocstyle and get results/return code
+PYDOCSTYLE=`pydocstyle --config=python/.flake8 python`
+PYDOCSTYLE_RETVAL=$?
+
 # Run clang-format and check for a consistent code format
 CLANG_FORMAT=`python cpp/scripts/run-clang-format.py 2>&1`
 CLANG_FORMAT_RETVAL=$?
@@ -78,6 +82,14 @@ else
   echo -e "\n\n>>>> PASSED: mypy style check\n\n"
 fi
 
+if [ "$PYDOCSTYLE_RETVAL" != "0" ]; then
+  echo -e "\n\n>>>> FAILED: pydocstyle style check; begin output\n\n"
+  echo -e "$PYDOCSTYLE"
+  echo -e "\n\n>>>> FAILED: pydocstyle style check; end output\n\n"
+else
+  echo -e "\n\n>>>> PASSED: pydocstyle style check\n\n"
+fi
+
 if [ "$CLANG_FORMAT_RETVAL" != "0" ]; then
   echo -e "\n\n>>>> FAILED: clang format check; begin output\n\n"
   echo -e "$CLANG_FORMAT"
@@ -91,7 +103,7 @@ HEADER_META=`ci/checks/headers_test.sh`
 HEADER_META_RETVAL=$?
 echo -e "$HEADER_META"
 
-RETVALS=($ISORT_RETVAL $BLACK_RETVAL $FLAKE_RETVAL $FLAKE_CYTHON_RETVAL $CLANG_FORMAT_RETVAL $HEADER_META_RETVAL $MYPY_CUDF_RETVAL)
+RETVALS=($ISORT_RETVAL $BLACK_RETVAL $FLAKE_RETVAL $FLAKE_CYTHON_RETVAL $PYDOCSTYLE_RETVAL $CLANG_FORMAT_RETVAL $HEADER_META_RETVAL $MYPY_CUDF_RETVAL)
 IFS=$'\n'
 RETVAL=`echo "${RETVALS[*]}" | sort -nr | head -n1`
 

diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
@@ -42,6 +42,11 @@ gpuci_logger "Activate conda env"
 . /opt/conda/etc/profile.d/conda.sh
 conda activate rapids
 
+# Remove rapidsai-nightly channel if we are building main branch
+if [ "$SOURCE_BRANCH" = "main" ]; then
+  conda config --system --remove channels rapidsai-nightly
+fi
+
 gpuci_logger "Check compiler versions"
 python --version
 $CC --version

diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
@@ -6,7 +6,6 @@ channels:
   - nvidia
   - rapidsai-nightly
   - conda-forge
-  - defaults
 dependencies:
   - clang=8.0.1
   - clang-tools=8.0.1
@@ -59,6 +58,7 @@ dependencies:
   - protobuf
   - nvtx>=0.2.1
   - cachetools
+  - transformers
   - pip:
       - git+https://github.com/dask/dask.git@main
       - git+https://github.com/dask/distributed.git@main

diff --git a/conda/environments/cudf_dev_cuda11.1.yml b/conda/environments/cudf_dev_cuda11.1.yml
@@ -6,7 +6,6 @@ channels:
   - nvidia
   - rapidsai-nightly
   - conda-forge
-  - defaults
 dependencies:
   - clang=8.0.1
   - clang-tools=8.0.1
@@ -59,6 +58,7 @@ dependencies:
   - protobuf
   - nvtx>=0.2.1
   - cachetools
+  - transformers
   - pip:
       - git+https://github.com/dask/dask.git@main
       - git+https://github.com/dask/distributed.git@main

diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml
@@ -6,7 +6,6 @@ channels:
   - nvidia
   - rapidsai-nightly
   - conda-forge
-  - defaults
 dependencies:
   - clang=8.0.1
   - clang-tools=8.0.1
@@ -59,6 +58,7 @@ dependencies:
   - protobuf
   - nvtx>=0.2.1
   - cachetools
+  - transformers
   - pip:
       - git+https://github.com/dask/dask.git@main
       - git+https://github.com/dask/distributed.git@main

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
@@ -76,6 +76,7 @@ test:
     - test -f $PREFIX/include/cudf/detail/groupby/sort_helper.hpp
     - test -f $PREFIX/include/cudf/detail/hashing.hpp
     - test -f $PREFIX/include/cudf/detail/interop.hpp
+    - test -f $PREFIX/include/cudf/detail/is_element_valid.hpp
     - test -f $PREFIX/include/cudf/detail/null_mask.hpp
     - test -f $PREFIX/include/cudf/detail/nvtx/nvtx3.hpp
     - test -f $PREFIX/include/cudf/detail/nvtx/ranges.hpp
@@ -84,6 +85,7 @@ test:
     - test -f $PREFIX/include/cudf/detail/repeat.hpp
     - test -f $PREFIX/include/cudf/detail/replace.hpp
     - test -f $PREFIX/include/cudf/detail/reshape.hpp
+    - test -f $PREFIX/include/cudf/detail/rolling.hpp
     - test -f $PREFIX/include/cudf/detail/round.hpp
     - test -f $PREFIX/include/cudf/detail/scatter.hpp
     - test -f $PREFIX/include/cudf/detail/search.hpp
@@ -133,7 +135,9 @@ test:
     - test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp
     - test -f $PREFIX/include/cudf/lists/detail/copying.hpp
     - test -f $PREFIX/include/cudf/lists/detail/drop_list_duplicates.hpp
+    - test -f $PREFIX/include/cudf/lists/detail/interleave_columns.hpp
     - test -f $PREFIX/include/cudf/lists/detail/sorting.hpp
+    - test -f $PREFIX/include/cudf/lists/concatenate_rows.hpp
     - test -f $PREFIX/include/cudf/lists/count_elements.hpp
     - test -f $PREFIX/include/cudf/lists/explode.hpp
     - test -f $PREFIX/include/cudf/lists/drop_list_duplicates.hpp
@@ -150,6 +154,7 @@ test:
     - test -f $PREFIX/include/cudf/replace.hpp
     - test -f $PREFIX/include/cudf/reshape.hpp
     - test -f $PREFIX/include/cudf/rolling.hpp
+    - test -f $PREFIX/include/cudf/rolling/range_window_bounds.hpp
     - test -f $PREFIX/include/cudf/round.hpp
     - test -f $PREFIX/include/cudf/scalar/scalar_factories.hpp
     - test -f $PREFIX/include/cudf/scalar/scalar.hpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -153,6 +153,7 @@ add_library(cudf
     src/binaryop/compiled/binary_ops.cu
     src/labeling/label_bins.cu
     src/bitmask/null_mask.cu
+    src/bitmask/is_element_valid.cpp
     src/column/column.cu
     src/column/column_device_view.cu
     src/column/column_factories.cpp
@@ -209,6 +210,8 @@ add_library(cudf
     src/groupby/sort/group_sum_scan.cu
     src/groupby/sort/sort_helper.cu
     src/hash/hashing.cu
+    src/hash/md5_hash.cu
+    src/hash/murmur_hash.cu
     src/interop/dlpack.cpp
     src/interop/from_arrow.cu
     src/interop/to_arrow.cu
@@ -252,6 +255,7 @@ add_library(cudf
     src/io/utilities/datasource.cpp
     src/io/utilities/file_io_utilities.cpp
     src/io/utilities/parsing_utils.cu
+    src/io/utilities/trie.cu
     src/io/utilities/type_conversion.cpp
     src/jit/cache.cpp
     src/jit/parser.cpp
@@ -261,14 +265,16 @@ add_library(cudf
     src/join/join.cu
     src/join/semi_join.cu
     src/lists/contains.cu
+    src/lists/concatenate_rows.cu
     src/lists/copying/concatenate.cu
     src/lists/copying/copying.cu
     src/lists/copying/gather.cu
     src/lists/copying/segmented_gather.cu
     src/lists/count_elements.cu
+    src/lists/drop_list_duplicates.cu
     src/lists/explode.cu
     src/lists/extract.cu
-    src/lists/drop_list_duplicates.cu
+    src/lists/interleave_columns.cu
     src/lists/lists_column_factories.cu
     src/lists/lists_column_view.cu
     src/lists/segmented_sort.cu
@@ -300,6 +306,7 @@ add_library(cudf
     src/reshape/tile.cu
     src/rolling/grouped_rolling.cu
     src/rolling/rolling.cu
+    src/rolling/range_window_bounds.cpp
     src/round/round.cu
     src/scalar/scalar.cpp
     src/scalar/scalar_factories.cpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -97,7 +97,7 @@ ConfigureBench(ITERATOR_BENCH iterator/iterator_benchmark.cu)
 
 ###################################################################################################
 # - search benchmark ------------------------------------------------------------------------------
-ConfigureBench(SEARCH_BENCH search/search_benchmark.cu)
+ConfigureBench(SEARCH_BENCH search/search_benchmark.cpp)
 
 ###################################################################################################
 # - sort benchmark --------------------------------------------------------------------------------
@@ -124,6 +124,11 @@ ConfigureBench(REDUCTION_BENCH
   reduction/scan_benchmark.cpp
   reduction/minmax_benchmark.cpp)
 
+###################################################################################################
+# - reduction benchmark ---------------------------------------------------------------------------
+ConfigureBench(REPLACE_BENCH
+  replace/clamp_benchmark.cpp)
+
 ###################################################################################################
 # - filling benchmark -----------------------------------------------------------------------------
 ConfigureBench(FILL_BENCH
@@ -138,7 +143,9 @@ ConfigureBench(GROUPBY_BENCH
 
 ###################################################################################################
 # - hashing benchmark -----------------------------------------------------------------------------
-ConfigureBench(HASHING_BENCH hashing/hashing_benchmark.cpp)
+ConfigureBench(HASHING_BENCH
+  hashing/hash_benchmark.cpp
+  hashing/partition_benchmark.cpp)
 
 ###################################################################################################
 # - merge benchmark -------------------------------------------------------------------------------
@@ -182,7 +189,9 @@ ConfigureBench(AST_BENCH ast/transform_benchmark.cpp)
 
 ###################################################################################################
 # - binaryop benchmark ----------------------------------------------------------------------------
-ConfigureBench(BINARYOP_BENCH binaryop/binaryop_benchmark.cu)
+ConfigureBench(BINARYOP_BENCH
+  binaryop/binaryop_benchmark.cpp
+  binaryop/jit_binaryop_benchmark.cpp)
 
 ###################################################################################################
 # - nvtext benchmark -------------------------------------------------------------------

diff --git a/...benchmarks/binaryop/binaryop_benchmark.cu → ...enchmarks/binaryop/binaryop_benchmark.cpp b/...benchmarks/binaryop/binaryop_benchmark.cu → ...enchmarks/binaryop/binaryop_benchmark.cpp
diff --git a/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fixture/benchmark_fixture.hpp>
+#include <synchronization/synchronization.hpp>
+
+#include <cudf_test/column_wrapper.hpp>
+
+#include <cudf/binaryop.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+
+template <typename TypeLhs, typename TypeRhs, typename TypeOut>
+class JIT_BINARYOP : public cudf::benchmark {
+};
+
+template <typename TypeLhs, typename TypeRhs, typename TypeOut>
+void BM_binaryop(benchmark::State& state, cudf::binary_operator binop)
+{
+  const cudf::size_type column_size{(cudf::size_type)state.range(0)};
+
+  auto data_it = thrust::make_counting_iterator(0);
+  cudf::test::fixed_width_column_wrapper<TypeLhs> input1(data_it, data_it + column_size);
+  cudf::test::fixed_width_column_wrapper<TypeRhs> input2(data_it, data_it + column_size);
+
+  auto lhs          = cudf::column_view(input1);
+  auto rhs          = cudf::column_view(input2);
+  auto output_dtype = cudf::data_type(cudf::type_to_id<TypeOut>());
+
+  // Call once for hot cache.
+  cudf::binary_operation(lhs, rhs, binop, output_dtype);
+
+  for (auto _ : state) {
+    cuda_event_timer timer(state, true);
+    cudf::binary_operation(lhs, rhs, binop, output_dtype);
+  }
+}
+
+// TODO tparam boolean for null.
+#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut)           \
+  BENCHMARK_TEMPLATE_DEFINE_F(JIT_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut) \
+  (::benchmark::State & st)                                                   \
+  {                                                                           \
+    BM_binaryop<TypeLhs, TypeRhs, TypeOut>(st, cudf::binary_operator::binop); \
+  }                                                                           \
+  BENCHMARK_REGISTER_F(JIT_BINARYOP, binop)                                   \
+    ->Unit(benchmark::kMillisecond)                                           \
+    ->UseManualTime()                                                         \
+    ->Arg(10000)      /* 10k */                                               \
+    ->Arg(100000)     /* 100k */                                              \
+    ->Arg(1000000)    /* 1M */                                                \
+    ->Arg(10000000)   /* 10M */                                               \
+    ->Arg(100000000); /* 100M */
+
+using namespace cudf;
+
+// clang-format off
+BINARYOP_BENCHMARK_DEFINE(float,        int64_t,      ADD,                  int32_t);
+BINARYOP_BENCHMARK_DEFINE(duration_s,   duration_D,   SUB,                  duration_ms);
+BINARYOP_BENCHMARK_DEFINE(float,        float,        MUL,                  int64_t);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      DIV,                  int64_t);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      TRUE_DIV,             int64_t);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      FLOOR_DIV,            int64_t);
+BINARYOP_BENCHMARK_DEFINE(double,       double,       MOD,                  double);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      POW,                  double);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int32_t,      BITWISE_AND,          int16_t);
+BINARYOP_BENCHMARK_DEFINE(int16_t,      int32_t,      BITWISE_OR,           int64_t);
+BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      BITWISE_XOR,          int32_t);
+BINARYOP_BENCHMARK_DEFINE(double,       int8_t,       LOGICAL_AND,          int16_t);
+BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      LOGICAL_OR,           bool);
+BINARYOP_BENCHMARK_DEFINE(timestamp_s,  timestamp_s,  LESS,                 bool);
+BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s,  GREATER,              bool);
+BINARYOP_BENCHMARK_DEFINE(int,          int,          SHIFT_LEFT,           int);
+BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      SHIFT_RIGHT,          int);
+BINARYOP_BENCHMARK_DEFINE(int64_t,      int32_t,      SHIFT_RIGHT_UNSIGNED, int64_t);
+BINARYOP_BENCHMARK_DEFINE(int32_t,      int64_t,      PMOD,                 double);
+BINARYOP_BENCHMARK_DEFINE(float,        double,       ATAN2,                double);
diff --git a/cpp/benchmarks/hashing/hash_benchmark.cpp b/cpp/benchmarks/hashing/hash_benchmark.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmark/benchmark.h>
+#include <benchmarks/common/generate_benchmark_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
+#include <cudf/hashing.hpp>
+#include <cudf/table/table.hpp>
+
+class HashBenchmark : public cudf::benchmark {
+};
+
+static void BM_hash(benchmark::State& state, cudf::hash_id hid)
+{
+  cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
+  auto const data = create_random_table({cudf::type_id::INT64}, 1, row_count{n_rows});
+
+  for (auto _ : state) {
+    cuda_event_timer raii(state, true, rmm::cuda_stream_default);
+    cudf::hash(data->view(), hid);
+  }
+}
+
+#define HASH_BENCHMARK_DEFINE(name)                               \
+  BENCHMARK_DEFINE_F(HashBenchmark, name)                         \
+  (::benchmark::State & st) { BM_hash(st, cudf::hash_id::name); } \
+  BENCHMARK_REGISTER_F(HashBenchmark, name)                       \
+    ->RangeMultiplier(4)                                          \
+    ->Ranges({{1 << 14, 1 << 24}})                                \
+    ->UseManualTime()                                             \
+    ->Unit(benchmark::kMillisecond);
+
+HASH_BENCHMARK_DEFINE(HASH_MURMUR3)
+HASH_BENCHMARK_DEFINE(HASH_MD5)
+HASH_BENCHMARK_DEFINE(HASH_SERIAL_MURMUR3)
+HASH_BENCHMARK_DEFINE(HASH_SPARK_MURMUR3)
diff --git a/cpp/benchmarks/hashing/hashing_benchmark.cpp → ...enchmarks/hashing/partition_benchmark.cpp b/cpp/benchmarks/hashing/hashing_benchmark.cpp → ...enchmarks/hashing/partition_benchmark.cpp