Merge branch 'branch-24.12' into json-quote-char-parsing-fix

rapidsai · Oct 21, 2024 · ca8ee32 · ca8ee32
2 parents 293521f + 074ab74
commit ca8ee32
Show file tree

Hide file tree

Showing 278 changed files with 10,671 additions and 8,351 deletions.
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -47,11 +47,23 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]
     with:
-      build_type: pull-request
+      build_type: nightly
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
       # Use the wheel container so we can skip conda solves and since our
       # primary static consumers (Spark) are not in conda anyway.
       container_image: "rapidsai/ci-wheel:latest"
       run_script: "ci/configure_cpp_static.sh"
+  clang-tidy:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/[email protected]
+    with:
+      build_type: nightly
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
+      run_script: "ci/clang_tidy.sh"
   conda-python-cudf-tests:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -95,6 +95,16 @@ repos:
         entry: 'pytest\.xfail'
         language: pygrep
         types: [python]
+      - id: no-unseeded-default-rng
+        name: no-unseeded-default-rng
+        description: 'Enforce that no non-seeded default_rng is used and default_rng is used instead of np.random.seed'
+        entry: |
+          # Check for usage of default_rng without seeding
+          default_rng\(\)|
+          # Check for usage of np.random.seed
+          np.random.seed\(
+        language: pygrep
+        types: [python]
       - id: cmake-format
         name: cmake-format
         entry: ./cpp/scripts/run-cmake-format.sh cmake-format

diff --git a/ci/build_python.sh b/ci/build_python.sh
@@ -52,5 +52,10 @@ RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
   --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
   conda/recipes/custreamz
 
+RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \
+  --no-test \
+  --channel "${CPP_CHANNEL}" \
+  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
+  conda/recipes/cudf-polars
 
 rapids-upload-conda-to-s3 python
diff --git a/ci/clang_tidy.sh b/ci/clang_tidy.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+rapids-logger "Create clang-tidy conda environment"
+. /opt/conda/etc/profile.d/conda.sh
+
+ENV_YAML_DIR="$(mktemp -d)"
+
+rapids-dependency-file-generator \
+  --output conda \
+  --file-key clang_tidy \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"
+
+rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n clang_tidy
+
+# Temporarily allow unbound variables for conda activation.
+set +u
+conda activate clang_tidy
+set -u
+
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
+source rapids-configure-sccache
+
+# Run the build via CMake, which will run clang-tidy when CUDF_CLANG_TIDY is enabled.
+cmake -S cpp -B cpp/build -DCMAKE_BUILD_TYPE=Release -DCUDF_CLANG_TIDY=ON -GNinja
+cmake --build cpp/build
diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh
@@ -14,7 +14,8 @@ rapids-mamba-retry install \
   --channel "${PYTHON_CHANNEL}" \
   "dask-cudf=${RAPIDS_VERSION}" \
   "cudf_kafka=${RAPIDS_VERSION}" \
-  "custreamz=${RAPIDS_VERSION}"
+  "custreamz=${RAPIDS_VERSION}" \
+  "cudf-polars=${RAPIDS_VERSION}"
 
 rapids-logger "Check GPU usage"
 nvidia-smi
@@ -37,7 +38,7 @@ rapids-logger "pytest dask_cudf (legacy)"
 DASK_DATAFRAME__QUERY_PLANNING=False ./ci/run_dask_cudf_pytests.sh \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \
   --numprocesses=8 \
-  --dist=loadscope \
+  --dist=worksteal \
   .
 
 rapids-logger "pytest cudf_kafka"
@@ -54,5 +55,19 @@ rapids-logger "pytest custreamz"
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/custreamz-coverage.xml" \
   --cov-report=term
 
+# Note that cudf-polars uses rmm.mr.CudaAsyncMemoryResource() which allocates
+# half the available memory. This doesn't play well with multiple workers, so
+# we keep --numprocesses=1 for now. This should be resolved by
+# https://github.com/rapidsai/cudf/issues/16723.
+rapids-logger "pytest cudf-polars"
+./ci/run_cudf_polars_pytests.sh \
+  --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-polars.xml" \
+  --numprocesses=1 \
+  --dist=worksteal \
+  --cov-config=./pyproject.toml \
+  --cov=cudf_polars \
+  --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-polars-coverage.xml" \
+  --cov-report=term
+
 rapids-logger "Test script exiting with value: $EXITCODE"
 exit ${EXITCODE}
diff --git a/conda/recipes/cudf-polars/build.sh b/conda/recipes/cudf-polars/build.sh
@@ -0,0 +1,4 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# This assumes the script is executed from the root of the repo directory
+./build.sh cudf_polars
diff --git a/conda/recipes/cudf-polars/meta.yaml b/conda/recipes/cudf-polars/meta.yaml
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
+{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
+{% set py_version = environ['CONDA_PY'] %}
+{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
+{% set cuda_major = cuda_version.split('.')[0] %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
+
+package:
+  name: cudf-polars
+  version: {{ version }}
+
+source:
+  path: ../../..
+
+build:
+  number: {{ GIT_DESCRIBE_NUMBER }}
+  string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  script_env:
+    - AWS_ACCESS_KEY_ID
+    - AWS_SECRET_ACCESS_KEY
+    - AWS_SESSION_TOKEN
+    - CMAKE_C_COMPILER_LAUNCHER
+    - CMAKE_CUDA_COMPILER_LAUNCHER
+    - CMAKE_CXX_COMPILER_LAUNCHER
+    - CMAKE_GENERATOR
+    - PARALLEL_LEVEL
+    - SCCACHE_BUCKET
+    - SCCACHE_IDLE_TIMEOUT
+    - SCCACHE_REGION
+    - SCCACHE_S3_KEY_PREFIX=cudf-polars-aarch64 # [aarch64]
+    - SCCACHE_S3_KEY_PREFIX=cudf-polars-linux64 # [linux64]
+    - SCCACHE_S3_USE_SSL
+    - SCCACHE_S3_NO_CREDENTIALS
+
+requirements:
+  host:
+    - python
+    - rapids-build-backend >=0.3.0,<0.4.0.dev0
+    - setuptools
+    - cuda-version ={{ cuda_version }}
+  run:
+    - python
+    - pylibcudf ={{ version }}
+    - polars >=1.8,<1.9
+    - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
+
+test:
+  requires:
+    - cuda-version ={{ cuda_version }}
+  imports:
+    - cudf_polars
+
+
+about:
+  home: https://rapids.ai/
+  license: Apache-2.0
+  license_family: APACHE
+  license_file: LICENSE
+  summary: cudf-polars library
diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy
@@ -39,7 +39,7 @@ Checks:
        -clang-analyzer-optin.core.EnumCastOutOfRange,
        -clang-analyzer-optin.cplusplus.UninitializedObject'
 
-WarningsAsErrors: ''
+WarningsAsErrors: '*'
 HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
 ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
 FormatStyle:     none

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -88,6 +88,7 @@ option(
   ${DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL}
 )
 mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)
+option(CUDF_CLANG_TIDY "Enable clang-tidy checking" OFF)
 
 message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}")
 message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}")
@@ -144,6 +145,58 @@ if(NOT CUDF_GENERATED_INCLUDE_DIR)
   set(CUDF_GENERATED_INCLUDE_DIR ${CUDF_BINARY_DIR})
 endif()
 
+# ##################################################################################################
+# * clang-tidy configuration ----------------------------------------------------------------------
+if(CUDF_CLANG_TIDY)
+  find_program(
+    CLANG_TIDY_EXE
+    NAMES "clang-tidy"
+    DOC "Path to clang-tidy executable" REQUIRED
+  )
+
+  execute_process(
+    COMMAND ${CLANG_TIDY_EXE} --version
+    OUTPUT_VARIABLE CLANG_TIDY_OUTPUT
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+  )
+  string(REGEX MATCH "LLVM version ([0-9]+\\.[0-9]+)\\.[0-9]+" LLVM_VERSION_MATCH
+               "${CLANG_TIDY_OUTPUT}"
+  )
+  # Discard the patch version and allow it to float. Empirically, results between patch versions are
+  # mostly stable, and looking at available packages on some package managers sometimes patch
+  # versions are skipped so we don't want to constrain to a patch version that the user can't
+  # install.
+  set(LLVM_VERSION "${CMAKE_MATCH_1}")
+  set(expected_clang_tidy_version 19.1)
+  if(NOT expected_clang_tidy_version VERSION_EQUAL LLVM_VERSION)
+    message(
+      FATAL_ERROR
+        "clang-tidy version ${expected_clang_tidy_version} is required, but found ${LLVM_VERSION}"
+    )
+  endif()
+endif()
+
+# Turn on the clang-tidy property for a target excluding the files specified in SKIPPED_FILES.
+function(enable_clang_tidy target)
+  set(_tidy_options)
+  set(_tidy_one_value)
+  set(_tidy_multi_value SKIPPED_FILES)
+  cmake_parse_arguments(
+    _TIDY "${_tidy_options}" "${_tidy_one_value}" "${_tidy_multi_value}" ${ARGN}
+  )
+
+  if(CUDF_CLANG_TIDY)
+    # clang will complain about unused link libraries on the compile line unless we specify
+    # -Qunused-arguments.
+    set_target_properties(
+      ${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments"
+    )
+    foreach(file IN LISTS _TIDY_SKIPPED_FILES)
+      set_source_files_properties(${file} PROPERTIES SKIP_LINTING ON)
+    endforeach()
+  endif()
+endfunction()
+
 # ##################################################################################################
 # * conda environment -----------------------------------------------------------------------------
 rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH)
@@ -315,8 +368,13 @@ add_library(
   src/filling/repeat.cu
   src/filling/sequence.cu
   src/groupby/groupby.cu
+  src/groupby/hash/compute_groupby.cu
+  src/groupby/hash/compute_single_pass_aggs.cu
+  src/groupby/hash/create_sparse_results_table.cu
   src/groupby/hash/flatten_single_pass_aggs.cpp
   src/groupby/hash/groupby.cu
+  src/groupby/hash/hash_compound_agg_finalizer.cu
+  src/groupby/hash/sparse_to_dense_results.cu
   src/groupby/sort/aggregate.cpp
   src/groupby/sort/group_argmax.cu
   src/groupby/sort/group_argmin.cu
@@ -714,6 +772,7 @@ target_compile_options(
   cudf PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>"
                "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>"
 )
+enable_clang_tidy(cudf SKIPPED_FILES src/io/comp/cpu_unbz2.cpp src/io/comp/brotli_dict.cpp)
 
 if(CUDF_BUILD_STACKTRACE_DEBUG)
   # Remove any optimization level to avoid nvcc warning "incompatible redefinition for option

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -384,6 +384,7 @@ ConfigureNVBench(
   string/join_strings.cpp
   string/lengths.cpp
   string/like.cpp
+  string/make_strings_column.cu
   string/replace_re.cpp
   string/reverse.cpp
   string/slice.cpp

diff --git a/cpp/benchmarks/string/make_strings_column.cu b/cpp/benchmarks/string/make_strings_column.cu
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/pair.h>
+#include <thrust/tabulate.h>
+
+#include <nvbench/nvbench.cuh>
+
+#include <vector>
+
+namespace {
+
+constexpr int min_row_width = 0;
+constexpr int max_row_width = 50;
+
+using string_index_pair = thrust::pair<char const*, cudf::size_type>;
+
+template <bool batch_construction>
+std::vector<std::unique_ptr<cudf::column>> make_strings_columns(
+  std::vector<cudf::device_span<string_index_pair const>> const& input,
+  rmm::cuda_stream_view stream)
+{
+  if constexpr (batch_construction) {
+    return cudf::make_strings_column_batch(input, stream);
+  } else {
+    std::vector<std::unique_ptr<cudf::column>> output;
+    output.reserve(input.size());
+    for (auto const& column_input : input) {
+      output.emplace_back(cudf::make_strings_column(column_input, stream));
+    }
+    return output;
+  }
+}
+
+}  // namespace
+
+static void BM_make_strings_column_batch(nvbench::state& state)
+{
+  auto const num_rows   = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const batch_size = static_cast<cudf::size_type>(state.get_int64("batch_size"));
+  auto const has_nulls  = true;
+
+  data_profile const table_profile =
+    data_profile_builder()
+      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_row_width, max_row_width)
+      .null_probability(has_nulls ? std::optional<double>{0.1} : std::nullopt);
+  auto const data_table = create_random_table(
+    cycle_dtypes({cudf::type_id::STRING}, batch_size), row_count{num_rows}, table_profile);
+
+  auto const stream = cudf::get_default_stream();
+  auto input_data   = std::vector<rmm::device_uvector<string_index_pair>>{};
+  auto input        = std::vector<cudf::device_span<string_index_pair const>>{};
+  input_data.reserve(batch_size);
+  input.reserve(batch_size);
+  for (auto const& cv : data_table->view()) {
+    auto const d_data_ptr = cudf::column_device_view::create(cv, stream);
+    auto batch_input      = rmm::device_uvector<string_index_pair>(cv.size(), stream);
+    thrust::tabulate(rmm::exec_policy(stream),
+                     batch_input.begin(),
+                     batch_input.end(),
+                     [data_col = *d_data_ptr] __device__(auto const idx) {
+                       if (data_col.is_null(idx)) { return string_index_pair{nullptr, 0}; }
+                       auto const row = data_col.element<cudf::string_view>(idx);
+                       return string_index_pair{row.data(), row.size_bytes()};
+                     });
+    input_data.emplace_back(std::move(batch_input));
+    input.emplace_back(input_data.back());
+  }
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    [[maybe_unused]] auto const output = make_strings_columns<true>(input, stream);
+  });
+}
+
+NVBENCH_BENCH(BM_make_strings_column_batch)
+  .set_name("make_strings_column_batch")
+  .add_int64_axis("num_rows", {100'000, 500'000, 1'000'000, 2'000'000})
+  .add_int64_axis("batch_size", {10, 20, 50, 100});