Merge branch 'branch-22.06' of https://github.com/rapidsai/cudf into …

…feature/diff-non-numeric
rapidsai · Apr 14, 2022 · 1eee122 · 1eee122
2 parents ca12c80 + ac27757
commit 1eee122
Show file tree

Hide file tree

Showing 79 changed files with 3,605 additions and 2,850 deletions.
diff --git a/build.sh b/build.sh
@@ -17,12 +17,13 @@ ARGS=$*
 # script, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean libcudf cudf dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
-HELP="$0 [clean] [libcudf] [cudf] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
+VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n -l --allgpuarch --disable_nvtx --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
+HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
    clean                         - remove all existing build artifacts and configuration (start
                                    over)
    libcudf                       - build the cudf C++ code only
    cudf                          - build the cudf Python package
+   cudfjar                       - build cudf JAR with static libcudf using devtoolset toolchain
    dask_cudf                     - build the dask_cudf Python package
    benchmarks                    - build benchmarks
    tests                         - build tests
@@ -50,7 +51,9 @@ CUDF_KAFKA_BUILD_DIR=${REPODIR}/python/cudf_kafka/build
 CUDF_BUILD_DIR=${REPODIR}/python/cudf/build
 DASK_CUDF_BUILD_DIR=${REPODIR}/python/dask_cudf/build
 CUSTREAMZ_BUILD_DIR=${REPODIR}/python/custreamz/build
-BUILD_DIRS="${LIB_BUILD_DIR} ${CUDF_BUILD_DIR} ${DASK_CUDF_BUILD_DIR} ${KAFKA_LIB_BUILD_DIR} ${CUDF_KAFKA_BUILD_DIR} ${CUSTREAMZ_BUILD_DIR}"
+CUDF_JAR_JAVA_BUILD_DIR="$REPODIR/java/target"
+
+BUILD_DIRS="${LIB_BUILD_DIR} ${CUDF_BUILD_DIR} ${DASK_CUDF_BUILD_DIR} ${KAFKA_LIB_BUILD_DIR} ${CUDF_KAFKA_BUILD_DIR} ${CUSTREAMZ_BUILD_DIR} ${CUDF_JAR_JAVA_BUILD_DIR}"
 
 # Set defaults for vars modified by flags to this script
 VERBOSE_FLAG=""
@@ -101,6 +104,58 @@ function buildAll {
     ((${NUMARGS} == 0 )) || !(echo " ${ARGS} " | grep -q " [^-]\+ ")
 }
 
+function buildLibCudfJniInDocker {
+    local cudaVersion="11.5.0"
+    local imageName="cudf-build:${cudaVersion}-devel-centos7"
+    local CMAKE_GENERATOR="${CMAKE_GENERATOR:-Ninja}"
+    local workspaceDir="/rapids"
+    local localMavenRepo=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"}
+    local workspaceRepoDir="$workspaceDir/cudf"
+    local workspaceMavenRepoDir="$workspaceDir/.m2/repository"
+    mkdir -p "$CUDF_JAR_JAVA_BUILD_DIR/libcudf-cmake-build"
+    nvidia-docker build \
+        -f java/ci/Dockerfile.centos7 \
+        --build-arg CUDA_VERSION=${cudaVersion} \
+        -t $imageName .
+    nvidia-docker run -it -u $(id -u):$(id -g) --rm \
+        -v "/etc/group:/etc/group:ro" \
+        -v "/etc/passwd:/etc/passwd:ro" \
+        -v "/etc/shadow:/etc/shadow:ro" \
+        -v "/etc/sudoers.d:/etc/sudoers.d:ro" \
+        -v "$REPODIR:$workspaceRepoDir:rw" \
+        -v "$localMavenRepo:$workspaceMavenRepoDir:rw" \
+        --workdir "$workspaceRepoDir/java/target/libcudf-cmake-build" \
+        ${imageName} \
+        scl enable devtoolset-9 \
+            "cmake $workspaceRepoDir/cpp \
+                -G${CMAKE_GENERATOR} \
+                -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+                -DCUDA_STATIC_RUNTIME=ON \
+                -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
+                -DCMAKE_INSTALL_PREFIX==/usr/local/rapids \
+                -DUSE_NVTX=ON -DCUDF_USE_ARROW_STATIC=ON \
+                -DCUDF_ENABLE_ARROW_S3=OFF \
+                -DBUILD_TESTS=OFF \
+                -DPER_THREAD_DEFAULT_STREAM=ON \
+                -DRMM_LOGGING_LEVEL=OFF \
+                -DBUILD_SHARED_LIBS=OFF && \
+             cmake --build . --parallel ${PARALLEL_LEVEL} && \
+             cd $workspaceRepoDir/java && \
+             mvn ${MVN_PHASES:-"package"} \
+                -Dmaven.repo.local=$workspaceMavenRepoDir \
+                -DskipTests=${SKIP_TESTS:-false} \
+                -Dparallel.level=${PARALLEL_LEVEL} \
+                -DCUDF_CPP_BUILD_DIR=$workspaceRepoDir/java/target/libcudf-cmake-build \
+                -DCUDA_STATIC_RUNTIME=ON \
+                -DPER_THREAD_DEFAULT_STREAM=ON \
+                -DRMM_LOGGING_LEVEL=OFF \
+                -DUSE_GDS=ON \
+                -DGPU_ARCHS=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
+                -DCUDF_JNI_ARROW_STATIC=ON \
+                -DCUDF_JNI_LIBCUDF_STATIC=ON \
+                -Dtest=*,!CuFileTest"
+}
+
 if hasArg -h || hasArg --h || hasArg --help; then
     echo "${HELP}"
     exit 0
@@ -178,15 +233,21 @@ fi
 ################################################################################
 # Configure, build, and install libcudf
 
-if buildAll || hasArg libcudf; then
+if buildAll || hasArg libcudf || hasArg cudfjar; then
     if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
-        CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=NATIVE"
-        echo "Building for the architecture of the GPU in the system..."
+        CUDF_CMAKE_CUDA_ARCHITECTURES="${CUDF_CMAKE_CUDA_ARCHITECTURES:-NATIVE}"
+        if [[ "$CUDF_CMAKE_CUDA_ARCHITECTURES" == "NATIVE" ]]; then
+            echo "Building for the architecture of the GPU in the system..."
+        else
+            echo "Building for the GPU architecture(s) $CUDF_CMAKE_CUDA_ARCHITECTURES ..."
+        fi
     else
-        CUDF_CMAKE_CUDA_ARCHITECTURES="-DCMAKE_CUDA_ARCHITECTURES=ALL"
+        CUDF_CMAKE_CUDA_ARCHITECTURES="ALL"
         echo "Building for *ALL* supported GPU architectures..."
     fi
+fi
 
+if buildAll || hasArg libcudf; then
     # get the current count before the compile starts
     if [[ "$BUILD_REPORT_INCL_CACHE_STATS" == "ON" && -x "$(command -v sccache)" ]]; then
         # zero the sccache statistics
@@ -195,7 +256,7 @@ if buildAll || hasArg libcudf; then
 
     cmake -S $REPODIR/cpp -B ${LIB_BUILD_DIR} \
           -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-          ${CUDF_CMAKE_CUDA_ARCHITECTURES} \
+          -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
           -DUSE_NVTX=${BUILD_NVTX} \
           -DBUILD_TESTS=${BUILD_TESTS} \
           -DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \
@@ -262,6 +323,10 @@ if buildAll || hasArg dask_cudf; then
     fi
 fi
 
+if hasArg cudfjar; then
+    buildLibCudfJniInDocker
+fi
+
 # Build libcudf_kafka library
 if hasArg libcudf_kafka; then
     cmake -S $REPODIR/cpp/libcudf_kafka -B ${KAFKA_LIB_BUILD_DIR} \

diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
@@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
 export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=0
+export INSTALL_DASK_MAIN=1
 
 function remove_libcudf_kernel_cache_dir {
     EXITCODE=$?
@@ -82,8 +82,8 @@ if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
     gpuci_logger "gpuci_mamba_retry update dask"
     gpuci_mamba_retry update dask
 else
-    gpuci_logger "gpuci_mamba_retry install conda-forge::dask==2022.03.0 conda-forge::distributed==2022.03.0 conda-forge::dask-core==2022.03.0 --force-reinstall"
-    gpuci_mamba_retry install conda-forge::dask==2022.03.0 conda-forge::distributed==2022.03.0 conda-forge::dask-core==2022.03.0 --force-reinstall
+    gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.03.0 conda-forge::distributed>=2022.03.0 conda-forge::dask-core>=2022.03.0 --force-reinstall"
+    gpuci_mamba_retry install conda-forge::dask>=2022.03.0 conda-forge::distributed>=2022.03.0 conda-forge::dask-core>=2022.03.0 --force-reinstall
 fi
 
 # Install the master version of streamz

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
 export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=0
+export INSTALL_DASK_MAIN=1
 
 # ucx-py version
 export UCX_PY_VERSION='0.26.*'
@@ -112,8 +112,8 @@ function install_dask {
         gpuci_mamba_retry update dask
         conda list
     else
-        gpuci_logger "gpuci_mamba_retry install conda-forge::dask==2022.03.0 conda-forge::distributed==2022.03.0 conda-forge::dask-core==2022.03.0 --force-reinstall"
-        gpuci_mamba_retry install conda-forge::dask==2022.03.0 conda-forge::distributed==2022.03.0 conda-forge::dask-core==2022.03.0 --force-reinstall
+        gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.03.0 conda-forge::distributed>=2022.03.0 conda-forge::dask-core>=2022.03.0 --force-reinstall"
+        gpuci_mamba_retry install conda-forge::dask>=2022.03.0 conda-forge::distributed>=2022.03.0 conda-forge::dask-core>=2022.03.0 --force-reinstall
     fi
     # Install the main version of streamz
     gpuci_logger "Install the main version of streamz"

diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
@@ -12,7 +12,7 @@ dependencies:
   - clang-tools=11.1.0
   - cupy>=9.5.0,<11.0.0a0
   - rmm=22.06.*
-  - cmake>=3.20.1,<3.23
+  - cmake>=3.20.1,!=3.23.0
   - cmake_setuptools>=0.1.3
   - python>=3.7,<3.9
   - numba>=0.54
@@ -43,8 +43,8 @@ dependencies:
   - pydocstyle=6.1.1
   - typing_extensions
   - pre-commit
-  - dask==2022.03.0
-  - distributed==2022.03.0
+  - dask>=2022.03.0
+  - distributed>=2022.03.0
   - streamz
   - arrow-cpp=7.0.0
   - dlpack>=0.5,<0.6.0a0

diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml
@@ -25,7 +25,7 @@ build:
 
 requirements:
   build:
-    - cmake >=3.20.1,<3.23
+    - cmake >=3.20.1,!=3.23.0
   host:
     - python
     - cython >=0.29,<0.30

diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
@@ -32,8 +32,8 @@ requirements:
     - python
     - streamz
     - cudf {{ version }}
-    - dask==2022.03.0
-    - distributed==2022.03.0
+    - dask>=2022.03.0
+    - distributed>=2022.03.0
     - python-confluent-kafka >=1.7.0,<1.8.0a0
     - cudf_kafka {{ version }}
 

diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
@@ -27,14 +27,14 @@ requirements:
   host:
     - python
     - cudf {{ version }}
-    - dask==2022.03.0
-    - distributed==2022.03.0
+    - dask>=2022.03.0
+    - distributed>=2022.03.0
     - cudatoolkit {{ cuda_version }}
   run:
     - python
     - cudf {{ version }}
-    - dask==2022.03.0
-    - distributed==2022.03.0
+    - dask>=2022.03.0
+    - distributed>=2022.03.0
     - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
 
 test:                                   # [linux64]

diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
@@ -1,5 +1,5 @@
 cmake_version:
-  - ">=3.20.1,<3.23"
+  - ">=3.20.1,!=3.23.0"
 
 gtest_version:
   - "=1.10.0"

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
@@ -110,6 +110,7 @@ outputs:
         - test -f $PREFIX/include/cudf/detail/transpose.hpp
         - test -f $PREFIX/include/cudf/detail/unary.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp
+        - test -f $PREFIX/include/cudf/detail/utilities/column.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h
         - test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -181,7 +181,7 @@ ConfigureBench(
   REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/minmax.cpp
   reduction/reduce.cpp reduction/scan.cpp
 )
-ConfigureNVBench(REDUCTION_NVBENCH reduction/segment_reduce.cu)
+ConfigureNVBench(REDUCTION_NVBENCH reduction/segment_reduce.cu reduction/rank.cpp)
 
 # ##################################################################################################
 # * reduction benchmark ---------------------------------------------------------------------------

diff --git a/cpp/benchmarks/reduction/rank.cpp b/cpp/benchmarks/reduction/rank.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+
+#include <cudf/detail/scan.hpp>
+#include <cudf/filling.hpp>
+#include <cudf/lists/list_view.cuh>
+
+#include <nvbench/nvbench.cuh>
+
+template <typename type>
+static void nvbench_reduction_scan(nvbench::state& state, nvbench::type_list<type>)
+{
+  cudf::rmm_pool_raii pool_raii;
+
+  auto const dtype = cudf::type_to_id<type>();
+
+  double const null_frequency = state.get_float64("null_frequency");
+  size_t const size           = state.get_int64("data_size");
+
+  data_profile table_data_profile;
+  table_data_profile.set_distribution_params(dtype, distribution_id::UNIFORM, 0, 5);
+  table_data_profile.set_null_frequency(null_frequency);
+
+  auto const table = create_random_table({dtype}, table_size_bytes{size / 2}, table_data_profile);
+
+  auto const new_tbl = cudf::repeat(table->view(), 2);
+  cudf::column_view input(new_tbl->view().column(0));
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    rmm::cuda_stream_view stream_view{launch.get_stream()};
+    auto result = cudf::detail::inclusive_dense_rank_scan(
+      input, stream_view, rmm::mr::get_current_device_resource());
+  });
+}
+
+using data_type = nvbench::type_list<int32_t, cudf::list_view>;
+
+NVBENCH_BENCH_TYPES(nvbench_reduction_scan, NVBENCH_TYPE_AXES(data_type))
+  .set_name("rank_scan")
+  .add_float64_axis("null_frequency", {0, 0.1, 0.5, 0.9})
+  .add_int64_axis("data_size",
+                  {
+                    10000,      // 10k
+                    100000,     // 100k
+                    1000000,    // 1M
+                    10000000,   // 10M
+                    100000000,  // 100M
+                  });
diff --git a/cpp/benchmarks/sort/sort_structs.cpp b/cpp/benchmarks/sort/sort_structs.cpp
@@ -80,5 +80,5 @@ void nvbench_sort_struct(nvbench::state& state)
 NVBENCH_BENCH(nvbench_sort_struct)
   .set_name("sort_struct")
   .add_int64_power_of_two_axis("NumRows", {10, 18, 26})
-  .add_int64_axis("Depth", {1, 8})
+  .add_int64_axis("Depth", {0, 1, 8})
   .add_int64_axis("Nulls", {0, 1});