Merge branch 'branch-24.02' into support-freq-in-datetime-index

rapidsai · Dec 12, 2023 · 112dbc1 · 112dbc1
2 parents 0d5c452 + 1c6f80d
commit 112dbc1
Show file tree

Hide file tree

Showing 108 changed files with 1,809 additions and 1,678 deletions.
diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh
@@ -5,7 +5,7 @@ set -euo pipefail
 
 package_dir="python/cudf"
 
-export SKBUILD_CONFIGURE_OPTIONS="-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"
+export SKBUILD_CONFIGURE_OPTIONS="-DUSE_LIBARROW_FROM_PYARROW=ON"
 
 ./ci/build_wheel.sh cudf ${package_dir}
 

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -78,6 +78,8 @@ option(CUDA_ENABLE_LINEINFO
 option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
 # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
+option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF)
+mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
 
 set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
 if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS)
@@ -90,9 +92,6 @@ option(
 )
 mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)
 
-option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF)
-mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
-
 message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}")
 message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}")
 message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}")

diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
@@ -53,6 +53,8 @@
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
+#include <cuda/functional>
+
 #include <algorithm>
 #include <cstdint>
 #include <memory>
@@ -247,12 +249,12 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_chrono<T>()>> {
       sec.end(),
       ns.begin(),
       result.begin(),
-      [] __device__(int64_t sec_value, int64_t nanoseconds_value) {
+      cuda::proclaim_return_type<T>([] __device__(int64_t sec_value, int64_t nanoseconds_value) {
         auto const timestamp_ns =
           cudf::duration_s{sec_value} + cudf::duration_ns{nanoseconds_value};
         // Return value in the type's precision
         return T(cuda::std::chrono::duration_cast<typename T::duration>(timestamp_ns));
-      });
+      }));
     return result;
   }
 };
@@ -367,12 +369,13 @@ rmm::device_uvector<cudf::size_type> sample_indices_with_run_length(cudf::size_t
     // This is gather.
     auto avg_repeated_sample_indices_iterator = thrust::make_transform_iterator(
       thrust::make_counting_iterator(0),
-      [rb              = run_lens.begin(),
-       re              = run_lens.end(),
-       samples_indices = samples_indices.begin()] __device__(cudf::size_type i) {
-        auto sample_idx = thrust::upper_bound(thrust::seq, rb, re, i) - rb;
-        return samples_indices[sample_idx];
-      });
+      cuda::proclaim_return_type<cudf::size_type>(
+        [rb              = run_lens.begin(),
+         re              = run_lens.end(),
+         samples_indices = samples_indices.begin()] __device__(cudf::size_type i) {
+          auto sample_idx = thrust::upper_bound(thrust::seq, rb, re, i) - rb;
+          return samples_indices[sample_idx];
+        }));
     rmm::device_uvector<cudf::size_type> repeated_sample_indices(num_rows,
                                                                  cudf::get_default_stream());
     thrust::copy(thrust::device,
@@ -513,7 +516,7 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
     lengths.end(),
     null_mask.begin(),
     lengths.begin(),
-    [] __device__(auto) { return 0; },
+    cuda::proclaim_return_type<cudf::size_type>([] __device__(auto) { return 0; }),
     thrust::logical_not<bool>{});
   auto valid_lengths = thrust::make_transform_iterator(
     thrust::make_zip_iterator(thrust::make_tuple(lengths.begin(), null_mask.begin())),

diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
@@ -201,3 +201,30 @@ void try_drop_l3_cache()
                            [](auto& cmd) { return exec_cmd(cmd).empty(); }),
                "Failed to execute the drop cache command");
 }
+
+cudf::io::io_type retrieve_io_type_enum(std::string_view io_string)
+{
+  if (io_string == "FILEPATH") { return cudf::io::io_type::FILEPATH; }
+  if (io_string == "HOST_BUFFER") { return cudf::io::io_type::HOST_BUFFER; }
+  if (io_string == "DEVICE_BUFFER") { return cudf::io::io_type::DEVICE_BUFFER; }
+  if (io_string == "VOID") { return cudf::io::io_type::VOID; }
+  if (io_string == "USER_IMPLEMENTED") { return cudf::io::io_type::USER_IMPLEMENTED; }
+  CUDF_FAIL("Unsupported io_type.");
+}
+
+cudf::io::compression_type retrieve_compression_type_enum(std::string_view compression_string)
+{
+  if (compression_string == "NONE") { return cudf::io::compression_type::NONE; }
+  if (compression_string == "AUTO") { return cudf::io::compression_type::AUTO; }
+  if (compression_string == "SNAPPY") { return cudf::io::compression_type::SNAPPY; }
+  if (compression_string == "GZIP") { return cudf::io::compression_type::GZIP; }
+  if (compression_string == "BZIP2") { return cudf::io::compression_type::BZIP2; }
+  if (compression_string == "BROTLI") { return cudf::io::compression_type::BROTLI; }
+  if (compression_string == "ZIP") { return cudf::io::compression_type::ZIP; }
+  if (compression_string == "XZ") { return cudf::io::compression_type::XZ; }
+  if (compression_string == "ZLIB") { return cudf::io::compression_type::ZLIB; }
+  if (compression_string == "LZ4") { return cudf::io::compression_type::LZ4; }
+  if (compression_string == "LZO") { return cudf::io::compression_type::LZO; }
+  if (compression_string == "ZSTD") { return cudf::io::compression_type::ZSTD; }
+  CUDF_FAIL("Unsupported compression_type.");
+}
diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
@@ -138,3 +138,27 @@ std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks,
  * @throw cudf::logic_error if the environment variable is set and the command fails
  */
 void try_drop_l3_cache();
+
+/**
+ * @brief Convert a string to the corresponding io_type enum value.
+ *
+ * This function takes a string and returns the matching io_type enum value. It allows you to
+ * convert a string representation of an io_type into its corresponding enum value.
+ *
+ * @param io_string The input string representing the io_type
+ *
+ * @return The io_type enum value
+ */
+cudf::io::io_type retrieve_io_type_enum(std::string_view io_string);
+
+/**
+ * @brief Convert a string to the corresponding compression_type enum value.
+ *
+ * This function takes a string and returns the matching compression_type enum value. It allows you
+ * to convert a string representation of a compression_type into its corresponding enum value.
+ *
+ * @param compression_string The input string representing the compression_type
+ *
+ * @return The compression_type enum value
+ */
+cudf::io::compression_type retrieve_compression_type_enum(std::string_view compression_string);
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
@@ -56,34 +56,30 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts,
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
-template <data_type DataType, cudf::io::io_type IOType>
-void BM_parquet_read_data(
-  nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
+template <data_type DataType>
+void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
 {
-  auto const d_type                 = get_type_or_group(static_cast<int32_t>(DataType));
-  cudf::size_type const cardinality = state.get_int64("cardinality");
-  cudf::size_type const run_length  = state.get_int64("run_length");
-  auto const compression            = cudf::io::compression_type::SNAPPY;
+  auto const d_type      = get_type_or_group(static_cast<int32_t>(DataType));
+  auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
+  auto const run_length  = static_cast<cudf::size_type>(state.get_int64("run_length"));
+  auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
+  auto const compression = cudf::io::compression_type::SNAPPY;
 
   auto const tbl =
     create_random_table(cycle_dtypes(d_type, num_cols),
                         table_size_bytes{data_size},
                         data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(IOType);
+  cuio_source_sink_pair source_sink(source_type);
   cudf::io::parquet_writer_options write_opts =
     cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
       .compression(compression);
 
   parquet_read_common(write_opts, source_sink, state);
 }
 
-template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
-void BM_parquet_read_io_compression(
-  nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
+void BM_parquet_read_io_compression(nvbench::state& state)
 {
   auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
                                          static_cast<int32_t>(data_type::FLOAT),
@@ -94,10 +90,10 @@ void BM_parquet_read_io_compression(
                                          static_cast<int32_t>(data_type::LIST),
                                          static_cast<int32_t>(data_type::STRUCT)});
 
-  cudf::size_type const cardinality = state.get_int64("cardinality");
-  cudf::size_type const run_length  = state.get_int64("run_length");
-  auto const compression            = Compression;
-  auto const source_type            = IOType;
+  auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
+  auto const run_length  = static_cast<cudf::size_type>(state.get_int64("run_length"));
+  auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
+  auto const compression = retrieve_compression_type_enum(state.get_string("compression_type"));
 
   auto const tbl =
     create_random_table(cycle_dtypes(d_type, num_cols),
@@ -113,17 +109,15 @@ void BM_parquet_read_io_compression(
   parquet_read_common(write_opts, source_sink, state);
 }
 
-template <cudf::io::io_type IOType>
-void BM_parquet_read_io_small_mixed(nvbench::state& state,
-                                    nvbench::type_list<nvbench::enum_type<IOType>>)
+void BM_parquet_read_io_small_mixed(nvbench::state& state)
 {
   auto const d_type =
     std::pair<cudf::type_id, cudf::type_id>{cudf::type_id::STRING, cudf::type_id::INT32};
 
-  cudf::size_type const cardinality = state.get_int64("cardinality");
-  cudf::size_type const run_length  = state.get_int64("run_length");
-  cudf::size_type const num_strings = state.get_int64("num_string_cols");
-  auto const source_type            = IOType;
+  auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
+  auto const run_length  = static_cast<cudf::size_type>(state.get_int64("run_length"));
+  auto const num_strings = static_cast<cudf::size_type>(state.get_int64("num_string_cols"));
+  auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
 
   // want 80 pages total, across 4 columns, so 20 pages per column
   cudf::size_type constexpr n_col          = 4;
@@ -145,24 +139,23 @@ void BM_parquet_read_io_small_mixed(nvbench::state& state,
   parquet_read_common(write_opts, source_sink, state);
 }
 
-template <data_type DataType, cudf::io::io_type IOType>
-void BM_parquet_read_chunks(
-  nvbench::state& state,
-  nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
+template <data_type DataType>
+void BM_parquet_read_chunks(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
 {
-  auto const d_type                 = get_type_or_group(static_cast<int32_t>(DataType));
-  cudf::size_type const cardinality = state.get_int64("cardinality");
-  cudf::size_type const run_length  = state.get_int64("run_length");
-  cudf::size_type const byte_limit  = state.get_int64("byte_limit");
-  auto const compression            = cudf::io::compression_type::SNAPPY;
+  auto const d_type      = get_type_or_group(static_cast<int32_t>(DataType));
+  auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
+  auto const run_length  = static_cast<cudf::size_type>(state.get_int64("run_length"));
+  auto const byte_limit  = static_cast<cudf::size_type>(state.get_int64("byte_limit"));
+  auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
+  auto const compression = cudf::io::compression_type::SNAPPY;
 
   auto const tbl =
     create_random_table(cycle_dtypes(d_type, num_cols),
                         table_size_bytes{data_size},
                         data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
   auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(IOType);
+  cuio_source_sink_pair source_sink(source_type);
   cudf::io::parquet_writer_options write_opts =
     cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
       .compression(compression);
@@ -202,43 +195,33 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
                                             data_type::LIST,
                                             data_type::STRUCT>;
 
-using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
-                                        cudf::io::io_type::HOST_BUFFER,
-                                        cudf::io::io_type::DEVICE_BUFFER>;
-
-using compression_list =
-  nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
-
-NVBENCH_BENCH_TYPES(BM_parquet_read_data,
-                    NVBENCH_TYPE_AXES(d_type_list,
-                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
+NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list))
   .set_name("parquet_read_decode")
-  .set_type_axes_names({"data_type", "io"})
+  .set_type_axes_names({"data_type"})
+  .add_string_axis("io_type", {"DEVICE_BUFFER"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});
 
-NVBENCH_BENCH_TYPES(BM_parquet_read_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list))
+NVBENCH_BENCH(BM_parquet_read_io_compression)
   .set_name("parquet_read_io_compression")
-  .set_type_axes_names({"io", "compression"})
+  .add_string_axis("io_type", {"FILEPATH", "HOST_BUFFER", "DEVICE_BUFFER"})
+  .add_string_axis("compression_type", {"SNAPPY", "NONE"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32});
 
-NVBENCH_BENCH_TYPES(BM_parquet_read_chunks,
-                    NVBENCH_TYPE_AXES(d_type_list,
-                                      nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
+NVBENCH_BENCH_TYPES(BM_parquet_read_chunks, NVBENCH_TYPE_AXES(d_type_list))
   .set_name("parquet_read_chunks")
-  .set_type_axes_names({"data_type", "io"})
+  .add_string_axis("io_type", {"DEVICE_BUFFER"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32})
   .add_int64_axis("byte_limit", {0, 500'000});
 
-NVBENCH_BENCH_TYPES(BM_parquet_read_io_small_mixed,
-                    NVBENCH_TYPE_AXES(nvbench::enum_type_list<cudf::io::io_type::FILEPATH>))
+NVBENCH_BENCH(BM_parquet_read_io_small_mixed)
   .set_name("parquet_read_io_small_mixed")
-  .set_type_axes_names({"io"})
+  .add_string_axis("io_type", {"FILEPATH"})
   .set_min_samples(4)
   .add_int64_axis("cardinality", {0, 1000})
   .add_int64_axis("run_length", {1, 32})