Merge remote-tracking branch 'upstream/branch-24.02' into enh/to_date…

…time/utc
rapidsai · Jan 19, 2024 · 602aeb7 · 602aeb7
2 parents a71e2d4 + 446da75
commit 602aeb7
Show file tree

Hide file tree

Showing 424 changed files with 5,512 additions and 3,916 deletions.
diff --git a/README.md b/README.md
@@ -56,6 +56,10 @@ print(tips_df.groupby("size").tip_percentage.mean())
 - [libcudf (C++/CUDA) documentation](https://docs.rapids.ai/api/libcudf/stable/)
 - [RAPIDS Community](https://rapids.ai/learn-more/#get-involved): Get help, contribute, and collaborate.
 
+See the [RAPIDS install page](https://docs.rapids.ai/install) for
+the most up-to-date information and commands for installing cuDF
+and other RAPIDS packages.
+
 ## Installation
 
 ### CUDA/GPU requirements
@@ -64,6 +68,24 @@ print(tips_df.groupby("size").tip_percentage.mean())
 * NVIDIA driver 450.80.02+
 * Volta architecture or better (Compute Capability >=7.0)
 
+### Pip
+
+cuDF can be installed via `pip` from the NVIDIA Python Package Index.
+Be sure to select the appropriate cuDF package depending
+on the major version of CUDA available in your environment:
+
+For CUDA 11.x:
+
+```bash
+pip install --extra-index-url=https://pypi.nvidia.com cudf-cu11
+```
+
+For CUDA 12.x:
+
+```bash
+pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12
+```
+
 ### Conda
 
 cuDF can be installed with conda (via [miniconda](https://docs.conda.io/projects/miniconda/en/latest/) or the full [Anaconda distribution](https://www.anaconda.com/download) from the `rapidsai` channel:

diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
@@ -1,9 +1,13 @@
 #!/bin/bash
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
-source rapids-env-update
+rapids-configure-conda-channels
+
+source rapids-configure-sccache
+
+source rapids-date-string
 
 export CMAKE_GENERATOR=Ninja
 

diff --git a/ci/build_docs.sh b/ci/build_docs.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 

diff --git a/ci/build_python.sh b/ci/build_python.sh
@@ -1,9 +1,13 @@
 #!/bin/bash
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
-source rapids-env-update
+rapids-configure-conda-channels
+
+source rapids-configure-sccache
+
+source rapids-date-string
 
 export CMAKE_GENERATOR=Ninja
 

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
@@ -48,7 +48,7 @@ fi
 if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
     sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
     sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
-    sed -i "/ptxcompiler/d" ${pyproject_file}
+    sed -i "s/ptxcompiler/pynvjitlink/g" ${pyproject_file}
     sed -i "/cubinlinker/d" ${pyproject_file}
 fi
 

diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh
@@ -1,11 +1,11 @@
 #!/bin/bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 set -euo pipefail
 
 package_dir="python/cudf"
 
-export SKBUILD_CONFIGURE_OPTIONS="-DUSE_LIBARROW_FROM_PYARROW=ON"
+export SKBUILD_CMAKE_ARGS="-DUSE_LIBARROW_FROM_PYARROW=ON"
 
 ./ci/build_wheel.sh cudf ${package_dir}
 

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -12,6 +12,7 @@ dependencies:
 - benchmark==1.8.0
 - boto3>=1.21.21
 - botocore>=1.24.21
+- breathe>=4.35.0
 - c-compiler
 - cachetools
 - clang-tools=16.0.6
@@ -74,7 +75,7 @@ dependencies:
 - pydata-sphinx-theme!=0.14.2
 - pytest
 - pytest-benchmark
-- pytest-cases
+- pytest-cases>=3.8.2
 - pytest-cov
 - pytest-xdist
 - python-confluent-kafka>=1.9.0,<1.10.0a0

diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -12,6 +12,7 @@ dependencies:
 - benchmark==1.8.0
 - boto3>=1.21.21
 - botocore>=1.24.21
+- breathe>=4.35.0
 - c-compiler
 - cachetools
 - clang-tools=16.0.6
@@ -69,9 +70,10 @@ dependencies:
 - protobuf>=4.21,<5
 - pyarrow==14.0.1.*
 - pydata-sphinx-theme!=0.14.2
+- pynvjitlink
 - pytest
 - pytest-benchmark
-- pytest-cases
+- pytest-cases>=3.8.2
 - pytest-cov
 - pytest-xdist
 - python-confluent-kafka>=1.9.0,<1.10.0a0

diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
@@ -98,6 +98,7 @@ requirements:
     # xref: https://github.com/rapidsai/cudf/issues/12822
     - cuda-nvrtc
     - cuda-python >=12.0,<13.0a0
+    - pynvjitlink
     {% endif %}
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
     - nvtx >=0.2.1

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -380,6 +380,8 @@ add_library(
   src/io/orc/dict_enc.cu
   src/io/orc/orc.cpp
   src/io/orc/reader_impl.cu
+  src/io/orc/reader_impl_helpers.cpp
+  src/io/orc/reader_impl_preprocess.cu
   src/io/orc/stats_enc.cu
   src/io/orc/stripe_data.cu
   src/io/orc/stripe_enc.cu

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -183,6 +183,11 @@ ConfigureNVBench(
   sort/sort_lists.cpp sort/sort_structs.cpp
 )
 
+# ##################################################################################################
+# * structs benchmark
+# --------------------------------------------------------------------------------
+ConfigureNVBench(STRUCT_CREATION_NVBENCH structs/create_structs.cpp)
+
 # ##################################################################################################
 # * quantiles benchmark
 # --------------------------------------------------------------------------------

diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -540,7 +540,7 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
   return cudf::make_strings_column(
     num_rows,
     std::make_unique<cudf::column>(std::move(offsets), rmm::device_buffer{}, 0),
-    std::make_unique<cudf::column>(std::move(chars), rmm::device_buffer{}, 0),
+    chars.release(),
     null_count,
     profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{});
 }

diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <benchmark/benchmark.h>
+#include <rmm/cuda_device.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
 #include <rmm/mr/device/owning_wrapper.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
@@ -33,7 +34,8 @@ inline auto make_pool_instance()
 {
   static rmm::mr::cuda_memory_resource cuda_mr;
   static auto pool_mr =
-    std::make_shared<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>>(&cuda_mr);
+    std::make_shared<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>>(
+      &cuda_mr, rmm::percent_of_free_device_memory(50));
   return pool_mr;
 }
 }  // namespace

diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 
 #include <cudf/utilities/error.hpp>
 
+#include <rmm/cuda_device.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/cuda_async_memory_resource.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
@@ -42,7 +43,8 @@ struct nvbench_base_fixture {
 
   inline auto make_pool()
   {
-    return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
+    return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
+      make_cuda(), rmm::percent_of_free_device_memory(50));
   }
 
   inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
@@ -56,7 +58,8 @@ struct nvbench_base_fixture {
 
   inline auto make_managed_pool()
   {
-    return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_managed());
+    return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
+      make_managed(), rmm::percent_of_free_device_memory(50));
   }
 
   inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(

diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ static void bench_hash(nvbench::state& state)
 
   // collect statistics
   cudf::strings_column_view input(data->get_column(1).view());
-  auto const chars_size = input.chars_size();
+  auto const chars_size = input.chars_size(stream);
   // add memory read from string column
   state.add_global_memory_reads<nvbench::int8_t>(chars_size);
   // add memory read from int64_t column

diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,19 +31,19 @@
 
 #include <cassert>
 
-__global__ static void init_curand(curandState* state, int const nstates)
+CUDF_KERNEL void init_curand(curandState* state, int const nstates)
 {
   int ithread = threadIdx.x + blockIdx.x * blockDim.x;
 
   if (ithread < nstates) { curand_init(1234ULL, ithread, 0, state + ithread); }
 }
 
 template <typename key_type, typename size_type>
-__global__ static void init_build_tbl(key_type* const build_tbl,
-                                      size_type const build_tbl_size,
-                                      int const multiplicity,
-                                      curandState* state,
-                                      int const num_states)
+CUDF_KERNEL void init_build_tbl(key_type* const build_tbl,
+                                size_type const build_tbl_size,
+                                int const multiplicity,
+                                curandState* state,
+                                int const num_states)
 {
   auto const start_idx = blockIdx.x * blockDim.x + threadIdx.x;
   auto const stride    = blockDim.x * gridDim.x;
@@ -61,14 +61,14 @@ __global__ static void init_build_tbl(key_type* const build_tbl,
 }
 
 template <typename key_type, typename size_type>
-__global__ void init_probe_tbl(key_type* const probe_tbl,
-                               size_type const probe_tbl_size,
-                               size_type const build_tbl_size,
-                               key_type const rand_max,
-                               double const selectivity,
-                               int const multiplicity,
-                               curandState* state,
-                               int const num_states)
+CUDF_KERNEL void init_probe_tbl(key_type* const probe_tbl,
+                                size_type const probe_tbl_size,
+                                size_type const build_tbl_size,
+                                key_type const rand_max,
+                                double const selectivity,
+                                int const multiplicity,
+                                curandState* state,
+                                int const num_states)
 {
   auto const start_idx = blockIdx.x * blockDim.x + threadIdx.x;
   auto const stride    = blockDim.x * gridDim.x;

diff --git a/cpp/benchmarks/json/json.cu b/cpp/benchmarks/json/json.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -177,10 +177,10 @@ auto build_json_string_column(int desired_bytes, int num_rows)
   auto d_store_order = cudf::column_device_view::create(float_2bool_columns->get_column(2));
   json_benchmark_row_builder jb{
     desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order};
-  auto children = cudf::strings::detail::make_strings_children(
+  auto [offsets, chars] = cudf::strings::detail::make_strings_children(
     jb, num_rows, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
   return cudf::make_strings_column(
-    num_rows, std::move(children.first), std::move(children.second), 0, {});
+    num_rows, std::move(offsets), std::move(chars->release().data.release()[0]), 0, {});
 }
 
 void BM_case(benchmark::State& state, std::string query_arg)
@@ -190,7 +190,7 @@ void BM_case(benchmark::State& state, std::string query_arg)
   int desired_bytes = state.range(1);
   auto input        = build_json_string_column(desired_bytes, num_rows);
   cudf::strings_column_view scv(input->view());
-  size_t num_chars = scv.chars().size();
+  size_t num_chars = scv.chars_size(cudf::get_default_stream());
 
   std::string json_path(query_arg);
 

diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -43,28 +43,28 @@ void bench_case(nvbench::state& state)
   if (encoding == "ascii") {
     data_profile ascii_profile = data_profile_builder().no_validity().distribution(
       cudf::type_id::INT8, distribution_id::UNIFORM, 32, 126);  // nice ASCII range
-    auto input = cudf::strings_column_view(col_view);
-    auto ascii_column =
-      create_random_column(cudf::type_id::INT8, row_count{input.chars_size()}, ascii_profile);
+    auto input        = cudf::strings_column_view(col_view);
+    auto ascii_column = create_random_column(
+      cudf::type_id::INT8, row_count{input.chars_size(cudf::get_default_stream())}, ascii_profile);
     auto ascii_data = ascii_column->view();
 
     col_view = cudf::column_view(col_view.type(),
                                  col_view.size(),
-                                 nullptr,
+                                 ascii_data.data<char>(),
                                  col_view.null_mask(),
                                  col_view.null_count(),
                                  0,
-                                 {input.offsets(), ascii_data});
+                                 {input.offsets()});
 
     ascii_contents = ascii_column->release();
   }
   auto input = cudf::strings_column_view(col_view);
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
 
-  state.add_element_count(input.chars_size(), "chars_size");
-  state.add_global_memory_reads<nvbench::int8_t>(input.chars_size());
-  state.add_global_memory_writes<nvbench::int8_t>(input.chars_size());
+  state.add_element_count(input.chars_size(cudf::get_default_stream()), "chars_size");
+  state.add_global_memory_reads<nvbench::int8_t>(input.chars_size(cudf::get_default_stream()));
+  state.add_global_memory_writes<nvbench::int8_t>(input.chars_size(cudf::get_default_stream()));
 
   state.exec(nvbench::exec_tag::sync,
              [&](nvbench::launch& launch) { auto result = cudf::strings::to_lower(input); });