Merge branch 'branch-24.04' into test-cuda-12.2

rapidsai · Feb 9, 2024 · 0a0bcd0 · 0a0bcd0
2 parents 8090e50 + 6638b52
commit 0a0bcd0
Show file tree

Hide file tree

Showing 101 changed files with 2,833 additions and 1,636 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -164,6 +164,8 @@ jobs:
       matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
       build_type: pull-request
       script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
+      # Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit.
+      test_summary_show: "none"
   #pandas-tests-diff:
   #  # diff the results of running the Pandas unit tests and publish a job summary
   #  needs: [pandas-tests-main, pandas-tests-pr]

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -95,7 +95,8 @@ repos:
         # DeprecationWarning: https://github.com/pandas-dev/pandas/issues/54970
         exclude: |
           (?x)^(
-            ^python/cudf/cudf/core/dtypes.py
+            ^python/cudf/cudf/core/dtypes.py|
+            ^python/cudf/cudf/tests/pytest.ini
           )
       - id: no-programmatic-xfail
         name: no-programmatic-xfail

diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh
@@ -1,12 +1,14 @@
 #!/usr/bin/env bash
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
+set -euo pipefail
+
 PANDAS_TESTS_BRANCH=${1}
 
 rapids-logger "Running Pandas tests using $PANDAS_TESTS_BRANCH branch"
-rapids-logger "PR number: $RAPIDS_REF_NAME"
+rapids-logger "PR number: ${RAPIDS_REF_NAME:-"unknown"}"
 
 # Set the manylinux version used for downloading the wheels so that we test the
 # newer ABI wheels on the newer images that support their installation.
@@ -25,14 +27,16 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
 python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,pandas-tests]
 
-git checkout $COMMIT
+RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
+mkdir -p "${RAPIDS_TESTS_DIR}"
 
 bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
   -n 10 \
   --tb=line \
-  --skip-slow \
+  -m "not slow" \
   --max-worker-restart=3 \
-  --import-mode=importlib \
+  --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-pandas.xml" \
   --report-log=${PANDAS_TESTS_BRANCH}.json 2>&1
 
 # summarize the results and save them to artifacts:

diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 source "$(dirname "$0")/test_cpp_common.sh"
 
@@ -12,14 +12,14 @@ export GTEST_OUTPUT=xml:${RAPIDS_TESTS_DIR}/
 
 pushd $CONDA_PREFIX/bin/gtests/libcudf/
 rapids-logger "Run libcudf gtests"
-ctest -j20 --output-on-failure
+ctest -j20 --output-on-failure --no-tests=error
 SUITEERROR=$?
 popd
 
 if (( ${SUITEERROR} == 0 )); then
     pushd $CONDA_PREFIX/bin/gtests/libcudf_kafka/
     rapids-logger "Run libcudf_kafka gtests"
-    ctest -j20 --output-on-failure
+    ctest -j20 --output-on-failure --no-tests=error
     SUITEERROR=$?
     popd
 fi

diff --git a/ci/test_wheel_cudf.sh b/ci/test_wheel_cudf.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 set -eou pipefail
 
@@ -22,9 +22,22 @@ RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-downloa
 # echo to expand wildcard before adding `[extra]` requires for pip
 python -m pip install $(echo ./dist/cudf*.whl)[test]
 
+RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
+mkdir -p "${RAPIDS_TESTS_DIR}"
+
 # Run smoke tests for aarch64 pull requests
 if [[ "$(arch)" == "aarch64" && ${RAPIDS_BUILD_TYPE} == "pull-request" ]]; then
+    rapids-logger "Run smoke tests for cudf"
     python ./ci/wheel_smoke_test_cudf.py
 else
-    python -m pytest -n 8 ./python/cudf/cudf/tests
+    rapids-logger "pytest cudf"
+    pushd python/cudf/cudf/tests
+    python -m pytest \
+      --cache-clear \
+      --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf.xml" \
+      --numprocesses=8 \
+      --dist=loadscope \
+      .
+    popd
 fi
diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 set -eou pipefail
 
@@ -26,5 +26,15 @@ python -m pip install --no-deps ./local-cudf-dep/cudf*.whl
 # echo to expand wildcard before adding `[extra]` requires for pip
 python -m pip install $(echo ./dist/dask_cudf*.whl)[test]
 
+RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
+mkdir -p "${RAPIDS_TESTS_DIR}"
+
 # Run tests in dask_cudf/tests and dask_cudf/io/tests
-python -m pytest -n 8 ./python/dask_cudf/dask_cudf/
+rapids-logger "pytest dask_cudf"
+pushd python/dask_cudf/dask_cudf
+python -m pytest \
+  --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \
+  --numprocesses=8 \
+  .
+popd
diff --git a/cpp/include/cudf/detail/sequence.hpp b/cpp/include/cudf/detail/sequence.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,7 +16,6 @@
 
 #pragma once
 
-#include <cudf/detail/sequence.hpp>
 #include <cudf/filling.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>

diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
@@ -2346,29 +2346,20 @@ auto convert_table_to_orc_data(table_view const& input,
   auto const padded_block_header_size =
     util::round_up_unsafe<size_t>(block_header_size, compressed_block_align);
 
-  auto bounce_buffer = [&]() {
-    size_t max_stream_size = 0;
-    bool all_device_write  = true;
-
-    for (auto& ss : strm_descs.host_view().flat_view()) {
-      if (!out_sink.is_device_write_preferred(ss.stream_size)) { all_device_write = false; }
-      size_t stream_size = ss.stream_size;
-      if (compression_kind != NONE) {
-        ss.first_block = num_compressed_blocks;
-        ss.bfr_offset  = compressed_bfr_size;
-
-        auto num_blocks =
-          std::max<uint32_t>((stream_size + compression_blocksize - 1) / compression_blocksize, 1);
-        stream_size += num_blocks * block_header_size;
-        num_compressed_blocks += num_blocks;
-        compressed_bfr_size +=
-          (padded_block_header_size + padded_max_compressed_block_size) * num_blocks;
-      }
-      max_stream_size = std::max(max_stream_size, stream_size);
+  for (auto& ss : strm_descs.host_view().flat_view()) {
+    size_t stream_size = ss.stream_size;
+    if (compression_kind != NONE) {
+      ss.first_block = num_compressed_blocks;
+      ss.bfr_offset  = compressed_bfr_size;
+
+      auto num_blocks =
+        std::max<uint32_t>((stream_size + compression_blocksize - 1) / compression_blocksize, 1);
+      stream_size += num_blocks * block_header_size;
+      num_compressed_blocks += num_blocks;
+      compressed_bfr_size +=
+        (padded_block_header_size + padded_max_compressed_block_size) * num_blocks;
     }
-
-    return cudf::detail::pinned_host_vector<uint8_t>(all_device_write ? 0 : max_stream_size);
-  }();
+  }
 
   // Compress the data streams
   rmm::device_uvector<uint8_t> compressed_data(compressed_bfr_size, stream);
@@ -2399,6 +2390,18 @@ auto convert_table_to_orc_data(table_view const& input,
     comp_results.device_to_host_sync(stream);
   }
 
+  auto const max_out_stream_size = [&]() {
+    uint32_t max_stream_size = 0;
+    for (auto const& ss : strm_descs.host_view().flat_view()) {
+      if (!out_sink.is_device_write_preferred(ss.stream_size)) {
+        max_stream_size = std::max(max_stream_size, ss.stream_size);
+      }
+    }
+    return max_stream_size;
+  }();
+
+  cudf::detail::pinned_host_vector<uint8_t> bounce_buffer(max_out_stream_size);
+
   auto intermediate_stats = gather_statistic_blobs(stats_freq, orc_table, segmentation, stream);
 
   return std::tuple{std::move(enc_data),

diff --git a/cpp/src/strings/split/split.cu b/cpp/src/strings/split/split.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,7 +19,6 @@
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/get_value.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/strings/detail/split_utils.cuh>
@@ -123,7 +122,7 @@ std::unique_ptr<table> split_fn(strings_column_view const& input,
 
   // builds the offsets and the vector of all tokens
   auto [offsets, tokens] = split_helper(input, tokenizer, stream, mr);
-  auto const d_offsets   = offsets->view().template data<size_type>();
+  auto const d_offsets   = cudf::detail::offsetalator_factory::make_input_iterator(offsets->view());
   auto const d_tokens    = tokens.data();
 
   // compute the maximum number of tokens for any string
@@ -132,7 +131,7 @@ std::unique_ptr<table> split_fn(strings_column_view const& input,
     thrust::make_counting_iterator<size_type>(0),
     thrust::make_counting_iterator<size_type>(input.size()),
     cuda::proclaim_return_type<size_type>([d_offsets] __device__(auto idx) -> size_type {
-      return d_offsets[idx + 1] - d_offsets[idx];
+      return static_cast<size_type>(d_offsets[idx + 1] - d_offsets[idx]);
     }),
     0,
     thrust::maximum{});
@@ -144,7 +143,7 @@ std::unique_ptr<table> split_fn(strings_column_view const& input,
       cuda::proclaim_return_type<string_index_pair>(
         [d_tokens, d_offsets, col] __device__(size_type idx) {
           auto const offset      = d_offsets[idx];
-          auto const token_count = d_offsets[idx + 1] - offset;
+          auto const token_count = static_cast<size_type>(d_offsets[idx + 1] - offset);
           return (col < token_count) ? d_tokens[offset + col] : string_index_pair{nullptr, 0};
         }));
     results.emplace_back(make_strings_column(itr, itr + input.size(), stream, mr));
@@ -360,12 +359,11 @@ std::unique_ptr<table> whitespace_split_fn(size_type strings_count,
   }
 
   // get the positions for every token
-  rmm::device_uvector<string_index_pair> tokens(columns_count * strings_count, stream);
+  rmm::device_uvector<string_index_pair> tokens(
+    static_cast<int64_t>(columns_count) * static_cast<int64_t>(strings_count), stream);
   string_index_pair* d_tokens = tokens.data();
-  thrust::fill(rmm::exec_policy(stream),
-               d_tokens,
-               d_tokens + (columns_count * strings_count),
-               string_index_pair{nullptr, 0});
+  thrust::fill(
+    rmm::exec_policy(stream), tokens.begin(), tokens.end(), string_index_pair{nullptr, 0});
   thrust::for_each_n(rmm::exec_policy(stream),
                      thrust::make_counting_iterator<size_type>(0),
                      strings_count,