Skip to content

Commit

Permalink
Fix merge conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 committed Jun 4, 2024
2 parents af0d9ce + fc31aa3 commit 276c095
Show file tree
Hide file tree
Showing 69 changed files with 1,416 additions and 231 deletions.
20 changes: 12 additions & 8 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
# Documentation for config - https://github.com/actions/labeler#common-examples

cuDF (Python):
Python:
- 'python/**'
- 'notebooks/**'

cudf.pandas:
- 'python/cudf/cudf/pandas/**'
- 'python/cudf/cudf_pandas_tests/**'

cudf.polars:
- 'python/cudf_polars/**'

pylibcudf:
- 'python/cudf/cudf/_lib/pylibcudf/**'

libcudf:
- 'cpp/**'

Expand All @@ -12,11 +22,5 @@ CMake:
- '**/cmake/**'
- '**/*.cmake'

cuDF (Java):
Java:
- 'java/**'

ci:
- 'ci/**'

conda:
- 'conda/**'
16 changes: 6 additions & 10 deletions cpp/benchmarks/io/csv/csv_reader_input.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,9 +28,7 @@ constexpr size_t data_size = 256 << 20;
constexpr cudf::size_type num_cols = 64;

template <typename DataType>
void csv_read_common(DataType const& data_types,
cudf::io::io_type const& source_type,
nvbench::state& state)
void csv_read_common(DataType const& data_types, io_type const& source_type, nvbench::state& state)
{
auto const tbl =
create_random_table(cycle_dtypes(data_types, num_cols), table_size_bytes{data_size});
Expand Down Expand Up @@ -66,7 +64,7 @@ void csv_read_common(DataType const& data_types,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <data_type DataType, cudf::io::io_type IOType>
template <data_type DataType, io_type IOType>
void BM_csv_read_input(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
{
Expand All @@ -76,7 +74,7 @@ void BM_csv_read_input(nvbench::state& state,
csv_read_common(d_type, source_type, state);
}

template <cudf::io::io_type IOType>
template <io_type IOType>
void BM_csv_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IOType>>)
{
auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
Expand All @@ -97,12 +95,10 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
data_type::DURATION,
data_type::STRING>;

using io_list =
nvbench::enum_type_list<cudf::io::io_type::FILEPATH, cudf::io::io_type::HOST_BUFFER>;
using io_list = nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER>;

NVBENCH_BENCH_TYPES(BM_csv_read_input,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list<io_type::DEVICE_BUFFER>))
.set_name("csv_read_data_type")
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4);
Expand Down
8 changes: 3 additions & 5 deletions cpp/benchmarks/io/csv/csv_writer.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,7 +28,7 @@
constexpr size_t data_size = 256 << 20;
constexpr cudf::size_type num_cols = 64;

template <data_type DataType, cudf::io::io_type IO>
template <data_type DataType, io_type IO>
void BM_csv_write_dtype_io(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IO>>)
{
Expand Down Expand Up @@ -112,9 +112,7 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
data_type::DURATION,
data_type::STRING>;

using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
cudf::io::io_type::HOST_BUFFER,
cudf::io::io_type::VOID>;
using io_list = nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::VOID>;

NVBENCH_BENCH_TYPES(BM_csv_write_dtype_io, NVBENCH_TYPE_AXES(d_type_list, io_list))
.set_name("csv_write_dtype_io")
Expand Down
23 changes: 15 additions & 8 deletions cpp/benchmarks/io/cuio_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ cudf::io::source_info cuio_source_sink_pair::make_source_info()
switch (type) {
case io_type::FILEPATH: return cudf::io::source_info(file_name);
case io_type::HOST_BUFFER: return cudf::io::source_info(h_buffer.data(), h_buffer.size());
case io_type::PINNED_BUFFER: {
pinned_buffer.resize(h_buffer.size());
std::copy(h_buffer.begin(), h_buffer.end(), pinned_buffer.begin());
return cudf::io::source_info(pinned_buffer.data(), pinned_buffer.size());
}
case io_type::DEVICE_BUFFER: {
// TODO: make cuio_source_sink_pair stream-friendly and avoid implicit use of the default
// stream
Expand All @@ -71,7 +76,8 @@ cudf::io::sink_info cuio_source_sink_pair::make_sink_info()
switch (type) {
case io_type::VOID: return cudf::io::sink_info(void_sink.get());
case io_type::FILEPATH: return cudf::io::sink_info(file_name);
case io_type::HOST_BUFFER: [[fallthrough]];
case io_type::HOST_BUFFER:
case io_type::PINNED_BUFFER:
case io_type::DEVICE_BUFFER: return cudf::io::sink_info(&h_buffer);
default: CUDF_FAIL("invalid output type");
}
Expand All @@ -84,7 +90,8 @@ size_t cuio_source_sink_pair::size()
case io_type::FILEPATH:
return static_cast<size_t>(
std::ifstream(file_name, std::ifstream::ate | std::ifstream::binary).tellg());
case io_type::HOST_BUFFER: [[fallthrough]];
case io_type::HOST_BUFFER:
case io_type::PINNED_BUFFER:
case io_type::DEVICE_BUFFER: return h_buffer.size();
default: CUDF_FAIL("invalid output type");
}
Expand Down Expand Up @@ -204,13 +211,13 @@ void try_drop_l3_cache()
"Failed to execute the drop cache command");
}

cudf::io::io_type retrieve_io_type_enum(std::string_view io_string)
io_type retrieve_io_type_enum(std::string_view io_string)
{
if (io_string == "FILEPATH") { return cudf::io::io_type::FILEPATH; }
if (io_string == "HOST_BUFFER") { return cudf::io::io_type::HOST_BUFFER; }
if (io_string == "DEVICE_BUFFER") { return cudf::io::io_type::DEVICE_BUFFER; }
if (io_string == "VOID") { return cudf::io::io_type::VOID; }
if (io_string == "USER_IMPLEMENTED") { return cudf::io::io_type::USER_IMPLEMENTED; }
if (io_string == "FILEPATH") { return io_type::FILEPATH; }
if (io_string == "HOST_BUFFER") { return io_type::HOST_BUFFER; }
if (io_string == "PINNED_BUFFER") { return io_type::PINNED_BUFFER; }
if (io_string == "DEVICE_BUFFER") { return io_type::DEVICE_BUFFER; }
if (io_string == "VOID") { return io_type::VOID; }
CUDF_FAIL("Unsupported io_type.");
}

Expand Down
14 changes: 11 additions & 3 deletions cpp/benchmarks/io/cuio_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,20 @@

#include <cudf_test/file_utilities.hpp>

#include <cudf/detail/utilities/pinned_host_vector.hpp>
#include <cudf/io/data_sink.hpp>
#include <cudf/io/datasource.hpp>
#include <cudf/io/types.hpp>

#include <rmm/device_uvector.hpp>

using cudf::io::io_type;
// IO types supported in the benchmarks
enum class io_type {
FILEPATH, // Input/output are both files
HOST_BUFFER, // Input/output are both host buffers (pageable)
PINNED_BUFFER, // Input is a pinned host buffer, output is a host buffer (pageable)
DEVICE_BUFFER, // Input is a device buffer, output is a host buffer (pageable)
VOID
};

std::string random_file_in_dir(std::string const& dir_path);

Expand Down Expand Up @@ -72,6 +79,7 @@ class cuio_source_sink_pair {

io_type const type;
std::vector<char> h_buffer;
cudf::detail::pinned_host_vector<char> pinned_buffer;
rmm::device_uvector<std::byte> d_buffer;
std::string const file_name;
std::unique_ptr<cudf::io::data_sink> void_sink;
Expand Down Expand Up @@ -144,7 +152,7 @@ void try_drop_l3_cache();
*
* @return The io_type enum value
*/
cudf::io::io_type retrieve_io_type_enum(std::string_view io_string);
io_type retrieve_io_type_enum(std::string_view io_string);

/**
* @brief Convert a string to the corresponding compression_type enum value.
Expand Down
14 changes: 6 additions & 8 deletions cpp/benchmarks/io/json/json_reader_input.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -70,7 +70,7 @@ cudf::size_type json_write_bm_data(cudf::io::sink_info sink,
return view.num_rows();
}

template <cudf::io::io_type IO>
template <io_type IO>
void BM_json_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
{
cuio_source_sink_pair source_sink(IO);
Expand All @@ -87,7 +87,7 @@ void BM_json_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_typ
json_read_common(source_sink, num_rows, state);
}

template <data_type DataType, cudf::io::io_type IO>
template <data_type DataType, io_type IO>
void BM_json_read_data_type(
nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IO>>)
{
Expand All @@ -107,16 +107,14 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
data_type::LIST,
data_type::STRUCT>;

using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
cudf::io::io_type::HOST_BUFFER,
cudf::io::io_type::DEVICE_BUFFER>;
using io_list =
nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::DEVICE_BUFFER>;

using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;

NVBENCH_BENCH_TYPES(BM_json_read_data_type,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list<io_type::DEVICE_BUFFER>))
.set_name("json_read_data_type")
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4);
Expand Down
9 changes: 4 additions & 5 deletions cpp/benchmarks/io/json/json_writer.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -52,7 +52,7 @@ void json_write_common(cudf::io::json_writer_options const& write_opts,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <cudf::io::io_type IO>
template <io_type IO>
void BM_json_write_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
{
auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
Expand Down Expand Up @@ -114,9 +114,8 @@ void BM_json_writer_options(nvbench::state& state)
json_write_common(write_opts, source_sink, data_size, state);
}

using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
cudf::io::io_type::HOST_BUFFER,
cudf::io::io_type::DEVICE_BUFFER>;
using io_list =
nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::DEVICE_BUFFER>;

NVBENCH_BENCH_TYPES(BM_json_write_io, NVBENCH_TYPE_AXES(io_list))
.set_name("json_write_io")
Expand Down
11 changes: 6 additions & 5 deletions cpp/benchmarks/io/nvbench_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,14 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
[](auto) { return std::string{}; })

NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
cudf::io::io_type,
io_type,
[](auto value) {
switch (value) {
case cudf::io::io_type::FILEPATH: return "FILEPATH";
case cudf::io::io_type::HOST_BUFFER: return "HOST_BUFFER";
case cudf::io::io_type::DEVICE_BUFFER: return "DEVICE_BUFFER";
case cudf::io::io_type::VOID: return "VOID";
case io_type::FILEPATH: return "FILEPATH";
case io_type::HOST_BUFFER: return "HOST_BUFFER";
case io_type::PINNED_BUFFER: return "PINNED_BUFFER";
case io_type::DEVICE_BUFFER: return "DEVICE_BUFFER";
case io_type::VOID: return "VOID";
default: return "Unknown";
}
},
Expand Down
16 changes: 7 additions & 9 deletions cpp/benchmarks/io/orc/orc_reader_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ void orc_read_common(cudf::size_type num_rows_to_read,

} // namespace

template <data_type DataType, cudf::io::io_type IOType>
template <data_type DataType, io_type IOType>
void BM_orc_read_data(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
{
Expand All @@ -112,7 +112,7 @@ void BM_orc_read_data(nvbench::state& state,
orc_read_common<false>(num_rows_written, source_sink, state);
}

template <cudf::io::io_type IOType, cudf::io::compression_type Compression, bool chunked_read>
template <io_type IOType, cudf::io::compression_type Compression, bool chunked_read>
void orc_read_io_compression(nvbench::state& state)
{
auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL_SIGNED),
Expand Down Expand Up @@ -150,7 +150,7 @@ void orc_read_io_compression(nvbench::state& state)
orc_read_common<chunked_read>(num_rows_written, source_sink, state);
}

template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
template <io_type IOType, cudf::io::compression_type Compression>
void BM_orc_read_io_compression(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
Expand All @@ -163,7 +163,7 @@ void BM_orc_chunked_read_io_compression(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<Compression>>)
{
// Only run benchmark using HOST_BUFFER IO.
return orc_read_io_compression<cudf::io::io_type::HOST_BUFFER, Compression, true>(state);
return orc_read_io_compression<io_type::HOST_BUFFER, Compression, true>(state);
}

using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
Expand All @@ -174,16 +174,14 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
data_type::LIST,
data_type::STRUCT>;

using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
cudf::io::io_type::HOST_BUFFER,
cudf::io::io_type::DEVICE_BUFFER>;
using io_list =
nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::DEVICE_BUFFER>;

using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;

NVBENCH_BENCH_TYPES(BM_orc_read_data,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list<io_type::DEVICE_BUFFER>))
.set_name("orc_read_decode")
.set_type_axes_names({"data_type", "io"})
.set_min_samples(4)
Expand Down
8 changes: 3 additions & 5 deletions cpp/benchmarks/io/orc/orc_writer.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -82,7 +82,7 @@ void BM_orc_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enum
state.add_buffer_size(encoded_file_size, "encoded_file_size", "encoded_file_size");
}

template <cudf::io::io_type IO, cudf::io::compression_type Compression>
template <io_type IO, cudf::io::compression_type Compression>
void BM_orc_write_io_compression(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<IO>, nvbench::enum_type<Compression>>)
Expand Down Expand Up @@ -183,9 +183,7 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
data_type::LIST,
data_type::STRUCT>;

using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
cudf::io::io_type::HOST_BUFFER,
cudf::io::io_type::VOID>;
using io_list = nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::VOID>;

using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
Expand Down
Loading

0 comments on commit 276c095

Please sign in to comment.