Merge branch 'branch-0.17' into list-parquet-chunked-write

rapidsai · Dec 3, 2020 · 2ce59fc · 2ce59fc
2 parents 39114c3 + a2d2726
commit 2ce59fc
Show file tree

Hide file tree

Showing 164 changed files with 6,456 additions and 2,124 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## New Features
 
 - PR #6116 Add `filters` parameter to Python `read_orc` function or filtering
+- PR #6848 Added Java bindings for writing parquet files with INT96 timestamps
 - PR #6460 Add is_timestamp format check API
 - PR #6647 Implement `cudf::round` floating point and integer types (`HALF_EVEN`)
 - PR #6562 Implement `cudf::round` floating point and integer types (`HALF_UP`)
@@ -11,6 +12,9 @@
 - PR #6777 Implement `cudf::unary_operation` for `decimal32` & `decimal64`
 - PR #6729 Implement `cudf::cast` for `decimal32/64` to/from different `type_id`
 - PR #6792 Implement `cudf::clamp` for `decimal32` and `decimal64`
+- PR #6845 Implement `cudf::copy_if_else` for `decimal32` and `decimal64`
+- PR #6805 Implement `cudf::detail::copy_if` for `decimal32` and `decimal64`
+- PR #6843 Implement `cudf::copy_range` for `decimal32` and `decimal64`
 - PR #6528 Enable `fixed_point` binary operations
 - PR #6460 Add is_timestamp format check API
 - PR #6568 Add function to create hashed vocabulary file from raw vocabulary
@@ -25,10 +29,13 @@
 - PR #6652 Add support for struct columns in concatenate
 - PR #6675 Add DecimalDtype to cuDF
 - PR #6739 Add Java bindings for is_timestamp
+- PR #6811 First class support for unbounded window function bounds
 - PR #6768 Add support for scatter() on list columns
 - PR #6796 Add create_metadata_file in dask_cudf
 - PR #6765 Cupy fallback for __array_function__ and __array_ufunc__ for cudf.Series
+- PR #6817 Add support for scatter() on lists-of-struct columns
 - PR #6805 Implement `cudf::detail::copy_if` for `decimal32` and `decimal64`
+- PR #6619 Improve Dockerfile
 - PR #6831 Added parquet chunked writing ability for list columns
 
 ## Improvements
@@ -47,6 +54,7 @@
 - PR #6471 Replace index type-dispatch call with indexalator in cudf::strings::substring
 - PR #6485 Add File IO to cuIO benchmarks
 - PR #6504 Update Java bindings version to 0.17-SNAPSHOT
+- PR #6875 Remove bounds check for `cudf::gather`
 - PR #6489 Add `AVRO` fuzz tests with varying function parameters
 - PR #6540 Add dictionary support to `cudf::unary_operation`
 - PR #6537 Refactor ORC timezone
@@ -67,6 +75,7 @@
 - PR #6597 Use thread-local to track CUDA device in JNI
 - PR #6599 Replace `size()==0` with `empty()`, `is_empty()`
 - PR #6514 Initial work for decimal type in Java/JNI
+- PR #6605 Reduce HtoD copies in `cudf::concatenate` of string columns
 - PR #6608 Improve subword tokenizer docs
 - PR #6610 Add ability to set scalar values in `cudf.DataFrame`
 - PR #6612 Update JNI to new RMM cuda_stream_view API
@@ -80,6 +89,7 @@
 - PR #6644 Cover different CSV reader/writer options in benchmarks
 - PR #6741 Cover different ORC and Parquet reader/writer options in benchmarks
 - PR #6651 Add cudf::dictionary::make_dictionary_pair_iterator
+- PR #6666 Add dictionary support to `cudf::reduce`
 - PR #6635 Add cudf::test::dictionary_column_wrapper class
 - PR #6702 Fix orc read corruption on boolean column
 - PR #6676 Add dictionary support to `cudf::quantile`
@@ -99,11 +109,19 @@
 - PR #6776 Use `void` return type for kernel wrapper functions instead of returning `cudaError_t`
 - PR #6786 Add nested type support to ColumnVector#getDeviceMemorySize
 - PR #6780 Move `cudf::cast` tests to separate test file
+- PR #6809 size_type overflow checking when concatenating columns
 - PR #6789 Rename `unary_op` to `unary_operator`
 - PR #6770 Support building decimal columns with Table.TestBuilder
+- PR #6800 Push DeviceScalar to cython-only
+- PR #6822 Split out `cudf::distinct_count` from `drop_duplicates.cu`
+- PR #6813 Enable `expand=False` in `.str.split` and `.str.rsplit`
 - PR #6829 Enable workaround to write categorical columns in csv
 - PR #6819 Use CMake 3.19 for RMM when building cuDF jar
 - PR #6833 Use settings.xml if existing for internal build
+- PR #6835 Move template param to member var to improve compile of hash/groupby.cu
+- PR #6837 Avoid gather when copying strings view from start of strings column
+- PR #6859 Move align_ptr_for_type() from cuda.cuh to alignment.hpp
+- PR #6807 Refactor `std::array` usage in row group index writing in ORC
 
 ## Bug Fixes
 
@@ -150,14 +168,21 @@
 - PR #6742 Fix concat bug in dask_cudf Series/Index creation
 - PR #6632 Fix DataFrame initialization from list of dicts
 - PR #6767 Fix sort order of parameters in `test_scalar_invalid_implicit_conversion` pytest
+- PR #6771 Fix index handling in parquet reader and writer
 - PR #6787 Update java reduction APIs to reflect C++ changes
 - PR #6790 Fix result representation in groupby.apply
 - PR #6794 Fix AVRO reader issues with empty input
 - PR #6798 Fix `read_avro` docs
 - PR #6824 Fix JNI build
 - PR #6826 Fix resource management in Java ColumnBuilder
 - PR #6830 Fix categorical scalar insertion
-
+- PR #6844 Fix uint32_t undefined errors
+- PR #6854 Fix the parameter order of writeParquetBufferBegin
+- PR #6855 Fix `.str.replace_with_backrefs` docs examples
+- PR #6853 Fix contiguous split of null string columns
+- PR #6861 Fix compile error in type_dispatch_benchmark.cu
+- PR #6869 Avoid dependency resolution failure in latest version of pip by explicitly specifying versions for dask and distributed
+- PR #6806 Force install of local conda artifacts
 
 # cuDF 0.16.0 (21 Oct 2020)
 
@@ -190,6 +215,8 @@
 - PR #6301 Add JNI bindings to nvcomp
 - PR #6328 Java and JNI bindings for getMapValue/map_lookup
 - PR #6371 Use ColumnViewAccess on Host side
+- PR #6392 add hash based groupby mean aggregation
+- PR #6511 Add LogicalType to Parquet reader
 - PR #6297 cuDF Python Scalars
 - PR #6723 Support creating decimal vectors from scalar
 
@@ -299,6 +326,7 @@
 - PR #6653 Replaced SHFL_XOR calls with cub::WarpReduce
 - PR #6751 Rework ColumnViewAccess and its usage
 - PR #6698 Remove macros from ORC reader and writer
+- PR #6782 Replace cuio macros with constexpr and inline functions
 
 ## Bug Fixes
 

diff --git a/Dockerfile b/Dockerfile
@@ -13,12 +13,15 @@ ARG CC=5
 ARG CXX=5
 RUN apt update -y --fix-missing && \
     apt upgrade -y && \
-    apt install -y \
+    apt install -y --no-install-recommends \
       git \
       gcc-${CC} \
       g++-${CXX} \
       libboost-all-dev \
-      tzdata
+      tzdata && \
+    apt-get autoremove -y && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
 # Install conda
 ADD https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh /miniconda.sh
@@ -70,7 +73,7 @@ RUN source activate cudf && \
     mkdir -p /cudf/cpp/build && \
     cd /cudf/cpp/build && \
     cmake .. -DCMAKE_INSTALL_PREFIX=${CONDA_PREFIX} && \
-    make -j install
+    make -j"$(nproc)" install
 
 # cuDF build/install
 RUN source activate cudf && \

diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
@@ -75,10 +75,10 @@ conda install "rmm=$MINOR_VERSION.*" "cudatoolkit=$CUDA_REL" \
 # conda install "your-pkg=1.0.0"
 
 # Install the master version of dask, distributed, and streamz
-logger "pip install git+https://github.com/dask/distributed.git --upgrade --no-deps"
-pip install "git+https://github.com/dask/distributed.git" --upgrade --no-deps
-logger "pip install git+https://github.com/dask/dask.git --upgrade --no-deps"
-pip install "git+https://github.com/dask/dask.git" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/distributed.git@master --upgrade --no-deps"
+pip install "git+https://github.com/dask/distributed.git@master" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/dask.git@master --upgrade --no-deps"
+pip install "git+https://github.com/dask/dask.git@master" --upgrade --no-deps
 logger "pip install git+https://github.com/python-streamz/streamz.git --upgrade --no-deps"
 pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps
 

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -85,14 +85,6 @@ gpuci_conda_retry install -y \
 # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
 # gpuci_conda_retry install -y "your-pkg=1.0.0"
 
-# Install the master version of dask, distributed, and streamz
-gpuci_logger "Install the master version of dask, distributed, and streamz"
-set -x
-pip install "git+https://github.com/dask/distributed.git" --upgrade --no-deps
-pip install "git+https://github.com/dask/dask.git" --upgrade --no-deps
-pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps
-set +x
-
 gpuci_logger "Check compiler versions"
 python --version
 $CC --version
@@ -103,8 +95,20 @@ conda info
 conda config --show-sources
 conda list --show-channel-urls
 
+function install_dask {
+    # Install the master version of dask, distributed, and streamz
+    gpuci_logger "Install the master version of dask, distributed, and streamz"
+    set -x
+    pip install "git+https://github.com/dask/distributed.git@master" --upgrade --no-deps
+    pip install "git+https://github.com/dask/dask.git@master" --upgrade --no-deps
+    pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps
+    set +x
+}
+
 if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
 
+    install_dask
+
     ################################################################################
     # BUILD - Build libcudf, cuDF, libcudf_kafka, and dask_cudf from source
     ################################################################################
@@ -165,8 +169,17 @@ else
         ${gt} --gtest_output=xml:${WORKSPACE}/test-results/
     done
 
-    gpuci_logger "Installing libcudf & libcudf_kafka"
-    conda install -c $WORKSPACE/ci/artifacts/cudf/cpu/conda-bld/ libcudf libcudf_kafka
+    CUDF_CONDA_FILE=`find $WORKSPACE/ci/artifacts/cudf/cpu/conda-bld/ -name "libcudf-*.tar.bz2"`
+    CUDF_CONDA_FILE=`basename "$CUDF_CONDA_FILE" .tar.bz2` #get filename without extension
+    CUDF_CONDA_FILE=${CUDF_CONDA_FILE//-/=} #convert to conda install
+    KAFKA_CONDA_FILE=`find $WORKSPACE/ci/artifacts/cudf/cpu/conda-bld/ -name "libcudf_kafka-*.tar.bz2"`
+    KAFKA_CONDA_FILE=`basename "$KAFKA_CONDA_FILE" .tar.bz2` #get filename without extension
+    KAFKA_CONDA_FILE=${KAFKA_CONDA_FILE//-/=} #convert to conda install
+
+    gpuci_logger "Installing $CUDF_CONDA_FILE & $KAFKA_CONDA_FILE"
+    conda install -c $WORKSPACE/ci/artifacts/cudf/cpu/conda-bld/ "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE"
+
+    install_dask
 
     gpuci_logger "Build python libs from source"
     if [[ ${BUILD_MODE} == "pull-request" ]]; then

diff --git a/conda/environments/cudf_dev_cuda10.1.yml b/conda/environments/cudf_dev_cuda10.1.yml
@@ -56,6 +56,6 @@ dependencies:
   - protobuf
   - nvtx>=0.2.1
   - pip:
-      - git+https://github.com/dask/dask.git
-      - git+https://github.com/dask/distributed.git
+      - git+https://github.com/dask/dask.git@master
+      - git+https://github.com/dask/distributed.git@master
       - git+https://github.com/python-streamz/streamz.git
diff --git a/conda/environments/cudf_dev_cuda10.2.yml b/conda/environments/cudf_dev_cuda10.2.yml
@@ -56,6 +56,6 @@ dependencies:
   - protobuf
   - nvtx>=0.2.1
   - pip:
-      - git+https://github.com/dask/dask.git
-      - git+https://github.com/dask/distributed.git
+      - git+https://github.com/dask/dask.git@master
+      - git+https://github.com/dask/distributed.git@master
       - git+https://github.com/python-streamz/streamz.git
diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
@@ -56,6 +56,6 @@ dependencies:
   - protobuf
   - nvtx>=0.2.1
   - pip:
-      - git+https://github.com/dask/dask.git
-      - git+https://github.com/dask/distributed.git
+      - git+https://github.com/dask/dask.git@master
+      - git+https://github.com/dask/distributed.git@master
       - git+https://github.com/python-streamz/streamz.git
diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh
@@ -9,11 +9,11 @@ function logger() {
 }
 
 # Install the latest version of dask and distributed
-logger "pip install git+https://github.com/dask/distributed.git --upgrade --no-deps"
-pip install "git+https://github.com/dask/distributed.git" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/distributed.git@master --upgrade --no-deps"
+pip install "git+https://github.com/dask/distributed.git@master" --upgrade --no-deps
 
-logger "pip install git+https://github.com/dask/dask.git --upgrade --no-deps"
-pip install "git+https://github.com/dask/dask.git" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/dask.git@master --upgrade --no-deps"
+pip install "git+https://github.com/dask/dask.git@master" --upgrade --no-deps
 
 logger "python -c 'import dask_cudf'"
 python -c "import dask_cudf"
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
@@ -71,12 +71,12 @@ test:
     - test -f $PREFIX/include/cudf/detail/null_mask.hpp
     - test -f $PREFIX/include/cudf/detail/nvtx/nvtx3.hpp
     - test -f $PREFIX/include/cudf/detail/nvtx/ranges.hpp
+    - test -f $PREFIX/include/cudf/detail/quantiles.hpp
     - test -f $PREFIX/include/cudf/detail/reduction_functions.hpp
     - test -f $PREFIX/include/cudf/detail/repeat.hpp
     - test -f $PREFIX/include/cudf/detail/replace.hpp
     - test -f $PREFIX/include/cudf/detail/reshape.hpp
     - test -f $PREFIX/include/cudf/detail/round.hpp
-    - test -f $PREFIX/include/cudf/detail/quantiles.hpp
     - test -f $PREFIX/include/cudf/detail/scatter.hpp
     - test -f $PREFIX/include/cudf/detail/search.hpp
     - test -f $PREFIX/include/cudf/detail/sequence.hpp
@@ -85,6 +85,7 @@ test:
     - test -f $PREFIX/include/cudf/detail/transform.hpp
     - test -f $PREFIX/include/cudf/detail/transpose.hpp
     - test -f $PREFIX/include/cudf/detail/unary.hpp
+    - test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp
     - test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
     - test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h
     - test -f $PREFIX/include/cudf/dictionary/detail/concatenate.hpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -128,6 +128,14 @@ set(APPLY_BOOLEAN_MASK_BENCH_SRC
 
 ConfigureBench(APPLY_BOOLEAN_MASK_BENCH "${APPLY_BOOLEAN_MASK_BENCH_SRC}")
 
+###################################################################################################
+# - stream_compaction benchmark -------------------------------------------------------------------
+
+set(STREAM_COMPACTION_BENCH_SRC
+  "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/drop_duplicates_benchmark.cpp")
+
+ConfigureBench(STREAM_COMPACTION_BENCH "${STREAM_COMPACTION_BENCH_SRC}")
+
 ###################################################################################################
 # - join benchmark --------------------------------------------------------------------------------
 

diff --git a/cpp/benchmarks/stream_compaction/drop_duplicates_benchmark.cpp b/cpp/benchmarks/stream_compaction/drop_duplicates_benchmark.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column_view.hpp>
+#include <cudf/stream_compaction.hpp>
+#include <cudf/types.hpp>
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <fixture/benchmark_fixture.hpp>
+#include <synchronization/synchronization.hpp>
+
+#include <memory>
+#include <random>
+
+class Compaction : public cudf::benchmark {
+};
+
+template <typename Type>
+void BM_compaction(benchmark::State& state, cudf::duplicate_keep_option keep)
+{
+  auto const n_rows = static_cast<cudf::size_type>(state.range(0));
+
+  cudf::test::UniformRandomGenerator<long> rand_gen(0, 100);
+  auto elements = cudf::test::make_counting_transform_iterator(
+    0, [&rand_gen](auto row) { return rand_gen.generate(); });
+  auto valids = cudf::test::make_counting_transform_iterator(
+    0, [](auto i) { return i % 100 == 0 ? false : true; });
+  cudf::test::fixed_width_column_wrapper<Type, long> values(elements, elements + n_rows, valids);
+
+  auto input_column = cudf::column_view(values);
+  auto input_table  = cudf::table_view({input_column, input_column, input_column, input_column});
+
+  for (auto _ : state) {
+    cuda_event_timer timer(state, true);
+    auto result = cudf::drop_duplicates(input_table, {0}, keep);
+  }
+}
+
+#define concat(a, b, c) a##b##c
+#define get_keep(op) cudf::duplicate_keep_option::KEEP_##op
+
+// TYPE, OP
+#define RBM_BENCHMARK_DEFINE(name, type, keep)                     \
+  BENCHMARK_DEFINE_F(Compaction, name)(::benchmark::State & state) \
+  {                                                                \
+    BM_compaction<type>(state, get_keep(keep));                    \
+  }                                                                \
+  BENCHMARK_REGISTER_F(Compaction, name)                           \
+    ->UseManualTime()                                              \
+    ->Arg(10000)    /* 10k */                                      \
+    ->Arg(100000)   /* 100k */                                     \
+    ->Arg(1000000)  /* 1M */                                       \
+    ->Arg(10000000) /* 10M */
+
+#define COMPACTION_BENCHMARK_DEFINE(type, keep) \
+  RBM_BENCHMARK_DEFINE(concat(type, _, keep), type, keep)
+
+COMPACTION_BENCHMARK_DEFINE(bool, NONE);
+COMPACTION_BENCHMARK_DEFINE(int8_t, NONE);
+COMPACTION_BENCHMARK_DEFINE(int32_t, NONE);
+COMPACTION_BENCHMARK_DEFINE(int32_t, FIRST);
+COMPACTION_BENCHMARK_DEFINE(int32_t, LAST);
+using cudf::timestamp_ms;
+COMPACTION_BENCHMARK_DEFINE(timestamp_ms, NONE);
+COMPACTION_BENCHMARK_DEFINE(float, NONE);
diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu
@@ -88,15 +88,12 @@ __global__ void host_dispatching_kernel(mutable_column_device_view source_column
 template <FunctorType functor_type>
 struct ColumnHandle {
   template <typename ColumnType>
-  void operator()(mutable_column_device_view source_column,
-                  int work_per_thread,
-                  rmm::cuda_stream_view stream = rmm::cuda_stream_default)
+  void operator()(mutable_column_device_view source_column, int work_per_thread)
   {
     cudf::detail::grid_1d grid_config{source_column.size(), block_size};
     int grid_size = grid_config.num_blocks;
     // Launch the kernel.
-    host_dispatching_kernel<functor_type, ColumnType>
-      <<<grid_size, block_size, 0, stream.value()>>>(source_column);
+    host_dispatching_kernel<functor_type, ColumnType><<<grid_size, block_size>>>(source_column);
   }
 };