Merge branch 'branch-24.12' into bug/pivot/list_like

rapidsai · Nov 20, 2024 · 7f4bebb · 7f4bebb
2 parents 9a2d187 + be9ba6c
commit 7f4bebb
Show file tree

Hide file tree

Showing 14 changed files with 1,087 additions and 206 deletions.
diff --git a/ci/run_cudf_polars_polars_tests.sh b/ci/run_cudf_polars_polars_tests.sh
@@ -13,6 +13,8 @@ DESELECTED_TESTS=(
     "tests/unit/test_cpu_check.py::test_check_cpu_flags_skipped_no_flags" # Mock library error
     "tests/docs/test_user_guide.py" # No dot binary in CI image
     "tests/unit/test_polars_import.py::test_fork_safety" # test started to hang in polars-1.14
+    "tests/unit/operations/test_join.py::test_join_4_columns_with_validity" # fails in some systems, see https://github.com/pola-rs/polars/issues/19870
+    "tests/unit/io/test_csv.py::test_read_web_file" # fails in rockylinux8 due to SSL CA issues
 )
 
 if [[ $(arch) == "aarch64" ]]; then

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -286,6 +286,12 @@ ConfigureNVBench(
 ConfigureBench(HASHING_BENCH hashing/partition.cpp)
 ConfigureNVBench(HASHING_NVBENCH hashing/hash.cpp)
 
+# ##################################################################################################
+# * interop benchmark ------------------------------------------------------------------------------
+ConfigureNVBench(INTEROP_NVBENCH interop/interop.cpp)
+target_link_libraries(INTEROP_NVBENCH PRIVATE nanoarrow)
+target_include_directories(INTEROP_NVBENCH PRIVATE ${CMAKE_SOURCE_DIR}/tests/interop)
+
 # ##################################################################################################
 # * merge benchmark -------------------------------------------------------------------------------
 ConfigureBench(MERGE_BENCH merge/merge.cpp)

diff --git a/cpp/benchmarks/interop/interop.cpp b/cpp/benchmarks/interop/interop.cpp
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/common/table_utilities.hpp>
+
+#include <cudf/interop.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+
+#include <nanoarrow/nanoarrow.hpp>
+#include <nanoarrow/nanoarrow_device.h>
+#include <nanoarrow_utils.hpp>
+#include <nvbench/nvbench.cuh>
+
+#include <algorithm>
+#include <iterator>
+#include <vector>
+
+template <cudf::type_id data_type>
+void BM_to_arrow_device(nvbench::state& state, nvbench::type_list<nvbench::enum_type<data_type>>)
+{
+  auto const num_rows     = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const num_columns  = static_cast<cudf::size_type>(state.get_int64("num_columns"));
+  auto const num_elements = static_cast<int64_t>(num_rows) * num_columns;
+
+  std::vector<cudf::type_id> types(num_columns, data_type);
+
+  auto const table         = create_random_table(types, row_count{num_rows});
+  int64_t const size_bytes = estimate_size(table->view());
+
+  state.add_element_count(num_elements, "num_elements");
+  state.add_global_memory_reads(size_bytes);
+  state.add_global_memory_writes(size_bytes);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    cudf::to_arrow_device(table->view(), rmm::cuda_stream_view{launch.get_stream()});
+  });
+}
+
+template <cudf::type_id data_type>
+void BM_to_arrow_host(nvbench::state& state, nvbench::type_list<nvbench::enum_type<data_type>>)
+{
+  auto const num_rows     = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const num_columns  = static_cast<cudf::size_type>(state.get_int64("num_columns"));
+  auto const num_elements = static_cast<int64_t>(num_rows) * num_columns;
+
+  std::vector<cudf::type_id> types(num_columns, data_type);
+
+  auto const table         = create_random_table(types, row_count{num_rows});
+  int64_t const size_bytes = estimate_size(table->view());
+
+  state.add_element_count(num_elements, "num_elements");
+  state.add_global_memory_reads(size_bytes);
+  state.add_global_memory_writes(size_bytes);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    cudf::to_arrow_host(table->view(), rmm::cuda_stream_view{launch.get_stream()});
+  });
+}
+
+template <cudf::type_id data_type>
+void BM_from_arrow_device(nvbench::state& state, nvbench::type_list<nvbench::enum_type<data_type>>)
+{
+  auto const num_rows     = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const num_columns  = static_cast<cudf::size_type>(state.get_int64("num_columns"));
+  auto const num_elements = static_cast<int64_t>(num_rows) * num_columns;
+
+  std::vector<cudf::type_id> types(num_columns, data_type);
+
+  data_profile profile;
+  profile.set_struct_depth(1);
+  profile.set_list_depth(1);
+
+  auto const table            = create_random_table(types, row_count{num_rows}, profile);
+  cudf::table_view table_view = table->view();
+  int64_t const size_bytes    = estimate_size(table_view);
+
+  std::vector<cudf::column_metadata> table_metadata;
+
+  std::transform(thrust::make_counting_iterator(0),
+                 thrust::make_counting_iterator(num_columns),
+                 std::back_inserter(table_metadata),
+                 [&](auto const column) {
+                   cudf::column_metadata column_metadata{""};
+                   column_metadata.children_meta = std::vector(
+                     table->get_column(column).num_children(), cudf::column_metadata{""});
+                   return column_metadata;
+                 });
+
+  cudf::unique_schema_t schema      = cudf::to_arrow_schema(table_view, table_metadata);
+  cudf::unique_device_array_t input = cudf::to_arrow_device(table_view);
+
+  state.add_element_count(num_elements, "num_elements");
+  state.add_global_memory_reads(size_bytes);
+  state.add_global_memory_writes(size_bytes);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    cudf::from_arrow_device_column(
+      schema.get(), input.get(), rmm::cuda_stream_view{launch.get_stream()});
+  });
+}
+
+template <cudf::type_id data_type>
+void BM_from_arrow_host(nvbench::state& state, nvbench::type_list<nvbench::enum_type<data_type>>)
+{
+  auto const num_rows     = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const num_columns  = static_cast<cudf::size_type>(state.get_int64("num_columns"));
+  auto const num_elements = static_cast<int64_t>(num_rows) * num_columns;
+
+  std::vector<cudf::type_id> types(num_columns, data_type);
+
+  data_profile profile;
+  profile.set_struct_depth(1);
+  profile.set_list_depth(1);
+
+  auto const table            = create_random_table(types, row_count{num_rows}, profile);
+  cudf::table_view table_view = table->view();
+  int64_t const size_bytes    = estimate_size(table_view);
+
+  std::vector<cudf::column_metadata> table_metadata;
+
+  std::transform(thrust::make_counting_iterator(0),
+                 thrust::make_counting_iterator(num_columns),
+                 std::back_inserter(table_metadata),
+                 [&](auto const column) {
+                   cudf::column_metadata column_metadata{""};
+                   column_metadata.children_meta = std::vector(
+                     table->get_column(column).num_children(), cudf::column_metadata{""});
+                   return column_metadata;
+                 });
+
+  cudf::unique_schema_t schema      = cudf::to_arrow_schema(table_view, table_metadata);
+  cudf::unique_device_array_t input = cudf::to_arrow_host(table_view);
+
+  state.add_element_count(num_elements, "num_elements");
+  state.add_global_memory_reads(size_bytes);
+  state.add_global_memory_writes(size_bytes);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    cudf::from_arrow_host_column(
+      schema.get(), input.get(), rmm::cuda_stream_view{launch.get_stream()});
+  });
+}
+
+using data_types = nvbench::enum_type_list<cudf::type_id::INT8,
+                                           cudf::type_id::INT16,
+                                           cudf::type_id::INT32,
+                                           cudf::type_id::INT64,
+                                           cudf::type_id::UINT8,
+                                           cudf::type_id::UINT16,
+                                           cudf::type_id::UINT32,
+                                           cudf::type_id::UINT64,
+                                           cudf::type_id::FLOAT32,
+                                           cudf::type_id::FLOAT64,
+                                           cudf::type_id::BOOL8,
+                                           cudf::type_id::TIMESTAMP_SECONDS,
+                                           cudf::type_id::TIMESTAMP_MILLISECONDS,
+                                           cudf::type_id::TIMESTAMP_MICROSECONDS,
+                                           cudf::type_id::TIMESTAMP_NANOSECONDS,
+                                           cudf::type_id::DURATION_SECONDS,
+                                           cudf::type_id::DURATION_MILLISECONDS,
+                                           cudf::type_id::DURATION_MICROSECONDS,
+                                           cudf::type_id::DURATION_NANOSECONDS,
+                                           cudf::type_id::STRING,
+                                           cudf::type_id::LIST,
+                                           cudf::type_id::DECIMAL32,
+                                           cudf::type_id::DECIMAL64,
+                                           cudf::type_id::DECIMAL128,
+                                           cudf::type_id::STRUCT>;
+
+static char const* stringify_type(cudf::type_id value)
+{
+  switch (value) {
+    case cudf::type_id::INT8: return "INT8";
+    case cudf::type_id::INT16: return "INT16";
+    case cudf::type_id::INT32: return "INT32";
+    case cudf::type_id::INT64: return "INT64";
+    case cudf::type_id::UINT8: return "UINT8";
+    case cudf::type_id::UINT16: return "UINT16";
+    case cudf::type_id::UINT32: return "UINT32";
+    case cudf::type_id::UINT64: return "UINT64";
+    case cudf::type_id::FLOAT32: return "FLOAT32";
+    case cudf::type_id::FLOAT64: return "FLOAT64";
+    case cudf::type_id::BOOL8: return "BOOL8";
+    case cudf::type_id::TIMESTAMP_DAYS: return "TIMESTAMP_DAYS";
+    case cudf::type_id::TIMESTAMP_SECONDS: return "TIMESTAMP_SECONDS";
+    case cudf::type_id::TIMESTAMP_MILLISECONDS: return "TIMESTAMP_MILLISECONDS";
+    case cudf::type_id::TIMESTAMP_MICROSECONDS: return "TIMESTAMP_MICROSECONDS";
+    case cudf::type_id::TIMESTAMP_NANOSECONDS: return "TIMESTAMP_NANOSECONDS";
+    case cudf::type_id::DURATION_DAYS: return "DURATION_DAYS";
+    case cudf::type_id::DURATION_SECONDS: return "DURATION_SECONDS";
+    case cudf::type_id::DURATION_MILLISECONDS: return "DURATION_MILLISECONDS";
+    case cudf::type_id::DURATION_MICROSECONDS: return "DURATION_MICROSECONDS";
+    case cudf::type_id::DURATION_NANOSECONDS: return "DURATION_NANOSECONDS";
+    case cudf::type_id::DICTIONARY32: return "DICTIONARY32";
+    case cudf::type_id::STRING: return "STRING";
+    case cudf::type_id::LIST: return "LIST";
+    case cudf::type_id::DECIMAL32: return "DECIMAL32";
+    case cudf::type_id::DECIMAL64: return "DECIMAL64";
+    case cudf::type_id::DECIMAL128: return "DECIMAL128";
+    case cudf::type_id::STRUCT: return "STRUCT";
+    default: return "unknown";
+  }
+}
+
+NVBENCH_DECLARE_ENUM_TYPE_STRINGS(cudf::type_id, stringify_type, stringify_type)
+
+NVBENCH_BENCH_TYPES(BM_to_arrow_host, NVBENCH_TYPE_AXES(data_types))
+  .set_type_axes_names({"data_type"})
+  .set_name("to_arrow_host")
+  .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000})
+  .add_int64_axis("num_columns", {1});
+
+NVBENCH_BENCH_TYPES(BM_to_arrow_device, NVBENCH_TYPE_AXES(data_types))
+  .set_type_axes_names({"data_type"})
+  .set_name("to_arrow_device")
+  .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000})
+  .add_int64_axis("num_columns", {1});
+
+NVBENCH_BENCH_TYPES(BM_from_arrow_host, NVBENCH_TYPE_AXES(data_types))
+  .set_type_axes_names({"data_type"})
+  .set_name("from_arrow_host")
+  .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000})
+  .add_int64_axis("num_columns", {1});
+
+NVBENCH_BENCH_TYPES(BM_from_arrow_device, NVBENCH_TYPE_AXES(data_types))
+  .set_type_axes_names({"data_type"})
+  .set_name("from_arrow_device")
+  .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000})
+  .add_int64_axis("num_columns", {1});
diff --git a/cpp/benchmarks/io/json/json_reader_input.cpp b/cpp/benchmarks/io/json/json_reader_input.cpp
@@ -24,17 +24,19 @@
 
 #include <nvbench/nvbench.cuh>
 
-// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to
-// run on most GPUs, but large enough to allow highest throughput
-constexpr size_t data_size         = 512 << 20;
+// Default size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks
+// to run on most GPUs, but large enough to allow highest throughput
+constexpr size_t default_data_size = 512 << 20;
 constexpr cudf::size_type num_cols = 64;
 
 void json_read_common(cuio_source_sink_pair& source_sink,
                       cudf::size_type num_rows_to_read,
-                      nvbench::state& state)
+                      nvbench::state& state,
+                      cudf::io::compression_type comptype = cudf::io::compression_type::NONE,
+                      size_t data_size                    = default_data_size)
 {
   cudf::io::json_reader_options read_opts =
-    cudf::io::json_reader_options::builder(source_sink.make_source_info());
+    cudf::io::json_reader_options::builder(source_sink.make_source_info()).compression(comptype);
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
@@ -57,15 +59,21 @@ void json_read_common(cuio_source_sink_pair& source_sink,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-cudf::size_type json_write_bm_data(cudf::io::sink_info sink,
-                                   std::vector<cudf::type_id> const& dtypes)
+cudf::size_type json_write_bm_data(
+  cudf::io::sink_info sink,
+  std::vector<cudf::type_id> const& dtypes,
+  cudf::io::compression_type comptype = cudf::io::compression_type::NONE,
+  size_t data_size                    = default_data_size)
 {
   auto const tbl = create_random_table(
     cycle_dtypes(dtypes, num_cols), table_size_bytes{data_size}, data_profile_builder());
   auto const view = tbl->view();
 
   cudf::io::json_writer_options const write_opts =
-    cudf::io::json_writer_options::builder(sink, view).na_rep("null").rows_per_chunk(100'000);
+    cudf::io::json_writer_options::builder(sink, view)
+      .na_rep("null")
+      .rows_per_chunk(100'000)
+      .compression(comptype);
   cudf::io::write_json(write_opts);
   return view.num_rows();
 }
@@ -87,6 +95,26 @@ void BM_json_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_typ
   json_read_common(source_sink, num_rows, state);
 }
 
+template <cudf::io::compression_type comptype, io_type IO>
+void BM_json_read_compressed_io(
+  nvbench::state& state, nvbench::type_list<nvbench::enum_type<comptype>, nvbench::enum_type<IO>>)
+{
+  size_t const data_size = state.get_int64("data_size");
+  cuio_source_sink_pair source_sink(IO);
+  auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
+                                         static_cast<int32_t>(data_type::FLOAT),
+                                         static_cast<int32_t>(data_type::DECIMAL),
+                                         static_cast<int32_t>(data_type::TIMESTAMP),
+                                         static_cast<int32_t>(data_type::DURATION),
+                                         static_cast<int32_t>(data_type::STRING),
+                                         static_cast<int32_t>(data_type::LIST),
+                                         static_cast<int32_t>(data_type::STRUCT)});
+  auto const num_rows =
+    json_write_bm_data(source_sink.make_sink_info(), d_type, comptype, data_size);
+
+  json_read_common(source_sink, num_rows, state, comptype, data_size);
+}
+
 template <data_type DataType, io_type IO>
 void BM_json_read_data_type(
   nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IO>>)
@@ -110,8 +138,9 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
 using io_list =
   nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::DEVICE_BUFFER>;
 
-using compression_list =
-  nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
+using compression_list = nvbench::enum_type_list<cudf::io::compression_type::GZIP,
+                                                 cudf::io::compression_type::SNAPPY,
+                                                 cudf::io::compression_type::NONE>;
 
 NVBENCH_BENCH_TYPES(BM_json_read_data_type,
                     NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list<io_type::DEVICE_BUFFER>))
@@ -123,3 +152,10 @@ NVBENCH_BENCH_TYPES(BM_json_read_io, NVBENCH_TYPE_AXES(io_list))
   .set_name("json_read_io")
   .set_type_axes_names({"io"})
   .set_min_samples(4);
+
+NVBENCH_BENCH_TYPES(BM_json_read_compressed_io,
+                    NVBENCH_TYPE_AXES(compression_list, nvbench::enum_type_list<io_type::FILEPATH>))
+  .set_name("json_read_compressed_io")
+  .set_type_axes_names({"compression_type", "io"})
+  .add_int64_power_of_two_axis("data_size", nvbench::range(20, 29, 1))
+  .set_min_samples(4);
diff --git a/cpp/benchmarks/io/nvbench_helpers.hpp b/cpp/benchmarks/io/nvbench_helpers.hpp
@@ -76,6 +76,7 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
   [](auto value) {
     switch (value) {
       case cudf::io::compression_type::SNAPPY: return "SNAPPY";
+      case cudf::io::compression_type::GZIP: return "GZIP";
       case cudf::io::compression_type::NONE: return "NONE";
       default: return "Unknown";
     }

diff --git a/cpp/src/io/utilities/config_utils.cpp b/cpp/src/io/utilities/config_utils.cpp
@@ -56,7 +56,8 @@ void set_up_kvikio()
 {
   static std::once_flag flag{};
   std::call_once(flag, [] {
-    auto const compat_mode = kvikio::detail::getenv_or<bool>("KVIKIO_COMPAT_MODE", true);
+    auto const compat_mode =
+      kvikio::detail::getenv_or("KVIKIO_COMPAT_MODE", kvikio::CompatMode::ON);
     kvikio::defaults::compat_mode_reset(compat_mode);
 
     auto const nthreads = getenv_or<unsigned int>("KVIKIO_NTHREADS", 4u);

diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp
@@ -45,7 +45,7 @@ class file_sink : public data_sink {
       cufile_integration::set_up_kvikio();
       _kvikio_file = kvikio::FileHandle(filepath, "w");
       CUDF_LOG_INFO("Writing a file using kvikIO, with compatibility mode {}.",
-                    _kvikio_file.is_compat_mode_on() ? "on" : "off");
+                    _kvikio_file.is_compat_mode_preferred() ? "on" : "off");
     } else {
       _cufile_out = detail::make_cufile_output(filepath);
     }