Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into bug/pivot/list_like
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr authored Nov 20, 2024
2 parents 9a2d187 + be9ba6c commit 7f4bebb
Show file tree
Hide file tree
Showing 14 changed files with 1,087 additions and 206 deletions.
2 changes: 2 additions & 0 deletions ci/run_cudf_polars_polars_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ DESELECTED_TESTS=(
"tests/unit/test_cpu_check.py::test_check_cpu_flags_skipped_no_flags" # Mock library error
"tests/docs/test_user_guide.py" # No dot binary in CI image
"tests/unit/test_polars_import.py::test_fork_safety" # test started to hang in polars-1.14
"tests/unit/operations/test_join.py::test_join_4_columns_with_validity" # fails in some systems, see https://github.com/pola-rs/polars/issues/19870
"tests/unit/io/test_csv.py::test_read_web_file" # fails in rockylinux8 due to SSL CA issues
)

if [[ $(arch) == "aarch64" ]]; then
Expand Down
6 changes: 6 additions & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,12 @@ ConfigureNVBench(
ConfigureBench(HASHING_BENCH hashing/partition.cpp)
ConfigureNVBench(HASHING_NVBENCH hashing/hash.cpp)

# ##################################################################################################
# * interop benchmark ------------------------------------------------------------------------------
ConfigureNVBench(INTEROP_NVBENCH interop/interop.cpp)
target_link_libraries(INTEROP_NVBENCH PRIVATE nanoarrow)
target_include_directories(INTEROP_NVBENCH PRIVATE ${CMAKE_SOURCE_DIR}/tests/interop)

# ##################################################################################################
# * merge benchmark -------------------------------------------------------------------------------
ConfigureBench(MERGE_BENCH merge/merge.cpp)
Expand Down
244 changes: 244 additions & 0 deletions cpp/benchmarks/interop/interop.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/common/table_utilities.hpp>

#include <cudf/interop.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <nanoarrow/nanoarrow.hpp>
#include <nanoarrow/nanoarrow_device.h>
#include <nanoarrow_utils.hpp>
#include <nvbench/nvbench.cuh>

#include <algorithm>
#include <iterator>
#include <vector>

template <cudf::type_id data_type>
void BM_to_arrow_device(nvbench::state& state, nvbench::type_list<nvbench::enum_type<data_type>>)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_columns = static_cast<cudf::size_type>(state.get_int64("num_columns"));
auto const num_elements = static_cast<int64_t>(num_rows) * num_columns;

std::vector<cudf::type_id> types(num_columns, data_type);

auto const table = create_random_table(types, row_count{num_rows});
int64_t const size_bytes = estimate_size(table->view());

state.add_element_count(num_elements, "num_elements");
state.add_global_memory_reads(size_bytes);
state.add_global_memory_writes(size_bytes);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::to_arrow_device(table->view(), rmm::cuda_stream_view{launch.get_stream()});
});
}

template <cudf::type_id data_type>
void BM_to_arrow_host(nvbench::state& state, nvbench::type_list<nvbench::enum_type<data_type>>)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_columns = static_cast<cudf::size_type>(state.get_int64("num_columns"));
auto const num_elements = static_cast<int64_t>(num_rows) * num_columns;

std::vector<cudf::type_id> types(num_columns, data_type);

auto const table = create_random_table(types, row_count{num_rows});
int64_t const size_bytes = estimate_size(table->view());

state.add_element_count(num_elements, "num_elements");
state.add_global_memory_reads(size_bytes);
state.add_global_memory_writes(size_bytes);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::to_arrow_host(table->view(), rmm::cuda_stream_view{launch.get_stream()});
});
}

template <cudf::type_id data_type>
void BM_from_arrow_device(nvbench::state& state, nvbench::type_list<nvbench::enum_type<data_type>>)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_columns = static_cast<cudf::size_type>(state.get_int64("num_columns"));
auto const num_elements = static_cast<int64_t>(num_rows) * num_columns;

std::vector<cudf::type_id> types(num_columns, data_type);

data_profile profile;
profile.set_struct_depth(1);
profile.set_list_depth(1);

auto const table = create_random_table(types, row_count{num_rows}, profile);
cudf::table_view table_view = table->view();
int64_t const size_bytes = estimate_size(table_view);

std::vector<cudf::column_metadata> table_metadata;

std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(num_columns),
std::back_inserter(table_metadata),
[&](auto const column) {
cudf::column_metadata column_metadata{""};
column_metadata.children_meta = std::vector(
table->get_column(column).num_children(), cudf::column_metadata{""});
return column_metadata;
});

cudf::unique_schema_t schema = cudf::to_arrow_schema(table_view, table_metadata);
cudf::unique_device_array_t input = cudf::to_arrow_device(table_view);

state.add_element_count(num_elements, "num_elements");
state.add_global_memory_reads(size_bytes);
state.add_global_memory_writes(size_bytes);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::from_arrow_device_column(
schema.get(), input.get(), rmm::cuda_stream_view{launch.get_stream()});
});
}

template <cudf::type_id data_type>
void BM_from_arrow_host(nvbench::state& state, nvbench::type_list<nvbench::enum_type<data_type>>)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_columns = static_cast<cudf::size_type>(state.get_int64("num_columns"));
auto const num_elements = static_cast<int64_t>(num_rows) * num_columns;

std::vector<cudf::type_id> types(num_columns, data_type);

data_profile profile;
profile.set_struct_depth(1);
profile.set_list_depth(1);

auto const table = create_random_table(types, row_count{num_rows}, profile);
cudf::table_view table_view = table->view();
int64_t const size_bytes = estimate_size(table_view);

std::vector<cudf::column_metadata> table_metadata;

std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(num_columns),
std::back_inserter(table_metadata),
[&](auto const column) {
cudf::column_metadata column_metadata{""};
column_metadata.children_meta = std::vector(
table->get_column(column).num_children(), cudf::column_metadata{""});
return column_metadata;
});

cudf::unique_schema_t schema = cudf::to_arrow_schema(table_view, table_metadata);
cudf::unique_device_array_t input = cudf::to_arrow_host(table_view);

state.add_element_count(num_elements, "num_elements");
state.add_global_memory_reads(size_bytes);
state.add_global_memory_writes(size_bytes);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::from_arrow_host_column(
schema.get(), input.get(), rmm::cuda_stream_view{launch.get_stream()});
});
}

using data_types = nvbench::enum_type_list<cudf::type_id::INT8,
cudf::type_id::INT16,
cudf::type_id::INT32,
cudf::type_id::INT64,
cudf::type_id::UINT8,
cudf::type_id::UINT16,
cudf::type_id::UINT32,
cudf::type_id::UINT64,
cudf::type_id::FLOAT32,
cudf::type_id::FLOAT64,
cudf::type_id::BOOL8,
cudf::type_id::TIMESTAMP_SECONDS,
cudf::type_id::TIMESTAMP_MILLISECONDS,
cudf::type_id::TIMESTAMP_MICROSECONDS,
cudf::type_id::TIMESTAMP_NANOSECONDS,
cudf::type_id::DURATION_SECONDS,
cudf::type_id::DURATION_MILLISECONDS,
cudf::type_id::DURATION_MICROSECONDS,
cudf::type_id::DURATION_NANOSECONDS,
cudf::type_id::STRING,
cudf::type_id::LIST,
cudf::type_id::DECIMAL32,
cudf::type_id::DECIMAL64,
cudf::type_id::DECIMAL128,
cudf::type_id::STRUCT>;

static char const* stringify_type(cudf::type_id value)
{
switch (value) {
case cudf::type_id::INT8: return "INT8";
case cudf::type_id::INT16: return "INT16";
case cudf::type_id::INT32: return "INT32";
case cudf::type_id::INT64: return "INT64";
case cudf::type_id::UINT8: return "UINT8";
case cudf::type_id::UINT16: return "UINT16";
case cudf::type_id::UINT32: return "UINT32";
case cudf::type_id::UINT64: return "UINT64";
case cudf::type_id::FLOAT32: return "FLOAT32";
case cudf::type_id::FLOAT64: return "FLOAT64";
case cudf::type_id::BOOL8: return "BOOL8";
case cudf::type_id::TIMESTAMP_DAYS: return "TIMESTAMP_DAYS";
case cudf::type_id::TIMESTAMP_SECONDS: return "TIMESTAMP_SECONDS";
case cudf::type_id::TIMESTAMP_MILLISECONDS: return "TIMESTAMP_MILLISECONDS";
case cudf::type_id::TIMESTAMP_MICROSECONDS: return "TIMESTAMP_MICROSECONDS";
case cudf::type_id::TIMESTAMP_NANOSECONDS: return "TIMESTAMP_NANOSECONDS";
case cudf::type_id::DURATION_DAYS: return "DURATION_DAYS";
case cudf::type_id::DURATION_SECONDS: return "DURATION_SECONDS";
case cudf::type_id::DURATION_MILLISECONDS: return "DURATION_MILLISECONDS";
case cudf::type_id::DURATION_MICROSECONDS: return "DURATION_MICROSECONDS";
case cudf::type_id::DURATION_NANOSECONDS: return "DURATION_NANOSECONDS";
case cudf::type_id::DICTIONARY32: return "DICTIONARY32";
case cudf::type_id::STRING: return "STRING";
case cudf::type_id::LIST: return "LIST";
case cudf::type_id::DECIMAL32: return "DECIMAL32";
case cudf::type_id::DECIMAL64: return "DECIMAL64";
case cudf::type_id::DECIMAL128: return "DECIMAL128";
case cudf::type_id::STRUCT: return "STRUCT";
default: return "unknown";
}
}

NVBENCH_DECLARE_ENUM_TYPE_STRINGS(cudf::type_id, stringify_type, stringify_type)

NVBENCH_BENCH_TYPES(BM_to_arrow_host, NVBENCH_TYPE_AXES(data_types))
.set_type_axes_names({"data_type"})
.set_name("to_arrow_host")
.add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000})
.add_int64_axis("num_columns", {1});

NVBENCH_BENCH_TYPES(BM_to_arrow_device, NVBENCH_TYPE_AXES(data_types))
.set_type_axes_names({"data_type"})
.set_name("to_arrow_device")
.add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000})
.add_int64_axis("num_columns", {1});

NVBENCH_BENCH_TYPES(BM_from_arrow_host, NVBENCH_TYPE_AXES(data_types))
.set_type_axes_names({"data_type"})
.set_name("from_arrow_host")
.add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000})
.add_int64_axis("num_columns", {1});

NVBENCH_BENCH_TYPES(BM_from_arrow_device, NVBENCH_TYPE_AXES(data_types))
.set_type_axes_names({"data_type"})
.set_name("from_arrow_device")
.add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000})
.add_int64_axis("num_columns", {1});
56 changes: 46 additions & 10 deletions cpp/benchmarks/io/json/json_reader_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,19 @@

#include <nvbench/nvbench.cuh>

// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to
// run on most GPUs, but large enough to allow highest throughput
constexpr size_t data_size = 512 << 20;
// Default size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks
// to run on most GPUs, but large enough to allow highest throughput
constexpr size_t default_data_size = 512 << 20;
constexpr cudf::size_type num_cols = 64;

void json_read_common(cuio_source_sink_pair& source_sink,
cudf::size_type num_rows_to_read,
nvbench::state& state)
nvbench::state& state,
cudf::io::compression_type comptype = cudf::io::compression_type::NONE,
size_t data_size = default_data_size)
{
cudf::io::json_reader_options read_opts =
cudf::io::json_reader_options::builder(source_sink.make_source_info());
cudf::io::json_reader_options::builder(source_sink.make_source_info()).compression(comptype);

auto mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
Expand All @@ -57,15 +59,21 @@ void json_read_common(cuio_source_sink_pair& source_sink,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

cudf::size_type json_write_bm_data(cudf::io::sink_info sink,
std::vector<cudf::type_id> const& dtypes)
cudf::size_type json_write_bm_data(
cudf::io::sink_info sink,
std::vector<cudf::type_id> const& dtypes,
cudf::io::compression_type comptype = cudf::io::compression_type::NONE,
size_t data_size = default_data_size)
{
auto const tbl = create_random_table(
cycle_dtypes(dtypes, num_cols), table_size_bytes{data_size}, data_profile_builder());
auto const view = tbl->view();

cudf::io::json_writer_options const write_opts =
cudf::io::json_writer_options::builder(sink, view).na_rep("null").rows_per_chunk(100'000);
cudf::io::json_writer_options::builder(sink, view)
.na_rep("null")
.rows_per_chunk(100'000)
.compression(comptype);
cudf::io::write_json(write_opts);
return view.num_rows();
}
Expand All @@ -87,6 +95,26 @@ void BM_json_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_typ
json_read_common(source_sink, num_rows, state);
}

template <cudf::io::compression_type comptype, io_type IO>
void BM_json_read_compressed_io(
nvbench::state& state, nvbench::type_list<nvbench::enum_type<comptype>, nvbench::enum_type<IO>>)
{
size_t const data_size = state.get_int64("data_size");
cuio_source_sink_pair source_sink(IO);
auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
static_cast<int32_t>(data_type::FLOAT),
static_cast<int32_t>(data_type::DECIMAL),
static_cast<int32_t>(data_type::TIMESTAMP),
static_cast<int32_t>(data_type::DURATION),
static_cast<int32_t>(data_type::STRING),
static_cast<int32_t>(data_type::LIST),
static_cast<int32_t>(data_type::STRUCT)});
auto const num_rows =
json_write_bm_data(source_sink.make_sink_info(), d_type, comptype, data_size);

json_read_common(source_sink, num_rows, state, comptype, data_size);
}

template <data_type DataType, io_type IO>
void BM_json_read_data_type(
nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IO>>)
Expand All @@ -110,8 +138,9 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
using io_list =
nvbench::enum_type_list<io_type::FILEPATH, io_type::HOST_BUFFER, io_type::DEVICE_BUFFER>;

using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;
using compression_list = nvbench::enum_type_list<cudf::io::compression_type::GZIP,
cudf::io::compression_type::SNAPPY,
cudf::io::compression_type::NONE>;

NVBENCH_BENCH_TYPES(BM_json_read_data_type,
NVBENCH_TYPE_AXES(d_type_list, nvbench::enum_type_list<io_type::DEVICE_BUFFER>))
Expand All @@ -123,3 +152,10 @@ NVBENCH_BENCH_TYPES(BM_json_read_io, NVBENCH_TYPE_AXES(io_list))
.set_name("json_read_io")
.set_type_axes_names({"io"})
.set_min_samples(4);

NVBENCH_BENCH_TYPES(BM_json_read_compressed_io,
NVBENCH_TYPE_AXES(compression_list, nvbench::enum_type_list<io_type::FILEPATH>))
.set_name("json_read_compressed_io")
.set_type_axes_names({"compression_type", "io"})
.add_int64_power_of_two_axis("data_size", nvbench::range(20, 29, 1))
.set_min_samples(4);
1 change: 1 addition & 0 deletions cpp/benchmarks/io/nvbench_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
[](auto value) {
switch (value) {
case cudf::io::compression_type::SNAPPY: return "SNAPPY";
case cudf::io::compression_type::GZIP: return "GZIP";
case cudf::io::compression_type::NONE: return "NONE";
default: return "Unknown";
}
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/io/utilities/config_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ void set_up_kvikio()
{
static std::once_flag flag{};
std::call_once(flag, [] {
auto const compat_mode = kvikio::detail::getenv_or<bool>("KVIKIO_COMPAT_MODE", true);
auto const compat_mode =
kvikio::detail::getenv_or("KVIKIO_COMPAT_MODE", kvikio::CompatMode::ON);
kvikio::defaults::compat_mode_reset(compat_mode);

auto const nthreads = getenv_or<unsigned int>("KVIKIO_NTHREADS", 4u);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/utilities/data_sink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class file_sink : public data_sink {
cufile_integration::set_up_kvikio();
_kvikio_file = kvikio::FileHandle(filepath, "w");
CUDF_LOG_INFO("Writing a file using kvikIO, with compatibility mode {}.",
_kvikio_file.is_compat_mode_on() ? "on" : "off");
_kvikio_file.is_compat_mode_preferred() ? "on" : "off");
} else {
_cufile_out = detail::make_cufile_output(filepath);
}
Expand Down
Loading

0 comments on commit 7f4bebb

Please sign in to comment.