Skip to content

Commit

Permalink
Skip ORC and Parquet readers' benchmark cases that are not currently …
Browse files Browse the repository at this point in the history
…supported (#10194)

Closes: #9961
Skipping the following:
- ORC reader: row selection through stripe selection
- ORC reader: lists column with row selection
- Parquet reader: row selection through row group selection
- Parquet reader: lists column with row selection

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Devavret Makkar (https://github.com/devavret)
  - David Wendt (https://github.com/davidwendt)

URL: #10194
  • Loading branch information
vuule authored Feb 2, 2022
1 parent deb902a commit 83accc6
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 10 deletions.
7 changes: 4 additions & 3 deletions cpp/benchmarks/io/orc/orc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,13 @@ void BM_orc_read_varying_options(benchmark::State& state)
auto const use_np_dtypes = (flags & 2) != 0;
auto const ts_type = cudf::data_type{static_cast<cudf::type_id>(state.range(state_idx++))};

// skip_rows is not supported on nested types
auto const data_types =
dtypes_for_column_selection(get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
int32_t(type_group_id::FLOATING_POINT),
int32_t(type_group_id::FIXED_POINT),
int32_t(type_group_id::TIMESTAMP),
int32_t(cudf::type_id::STRING),
int32_t(cudf::type_id::LIST)}),
int32_t(cudf::type_id::STRING)}),
col_sel);
auto const tbl = create_random_table(data_types, data_types.size(), table_size_bytes{data_size});
auto const view = tbl->view();
Expand Down Expand Up @@ -181,11 +181,12 @@ BENCHMARK_REGISTER_F(OrcRead, column_selection)
->Unit(benchmark::kMillisecond)
->UseManualTime();

// Need an API to get the number of stripes to enable row_selection::STRIPES here
BENCHMARK_DEFINE_F(OrcRead, row_selection)
(::benchmark::State& state) { BM_orc_read_varying_options(state); }
BENCHMARK_REGISTER_F(OrcRead, row_selection)
->ArgsProduct({{int32_t(column_selection::ALL)},
{int32_t(row_selection::STRIPES), int32_t(row_selection::NROWS)},
{int32_t(row_selection::NROWS)},
{1, 8},
{0b11}, // defaults
{int32_t(cudf::type_id::EMPTY)}})
Expand Down
12 changes: 5 additions & 7 deletions cpp/benchmarks/io/parquet/parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,13 @@ void BM_parq_read_varying_options(benchmark::State& state)
auto const use_pandas_metadata = (flags & 2) != 0;
auto const ts_type = cudf::data_type{static_cast<cudf::type_id>(state.range(state_idx++))};

// No nested types here, because of https://github.com/rapidsai/cudf/issues/9970
auto const data_types = dtypes_for_column_selection(
get_type_or_group({static_cast<int32_t>(type_group_id::INTEGRAL),
static_cast<int32_t>(type_group_id::FLOATING_POINT),
static_cast<int32_t>(type_group_id::FIXED_POINT),
static_cast<int32_t>(type_group_id::TIMESTAMP),
static_cast<int32_t>(cudf::type_id::STRING),
static_cast<int32_t>(cudf::type_id::LIST)}),
static_cast<int32_t>(cudf::type_id::STRING)}),
col_sel);
auto const tbl = create_random_table(data_types, data_types.size(), table_size_bytes{data_size});
auto const view = tbl->view();
Expand Down Expand Up @@ -181,20 +181,18 @@ BENCHMARK_REGISTER_F(ParquetRead, column_selection)
->Unit(benchmark::kMillisecond)
->UseManualTime();

// Disabled until we add an API to read metadata from a parquet file and determine num row groups.
// https://github.com/rapidsai/cudf/pull/9963#issuecomment-1004832863
/*
// row_selection::ROW_GROUPS disabled until we add an API to read metadata from a parquet file and
// determine num row groups. https://github.com/rapidsai/cudf/pull/9963#issuecomment-1004832863
BENCHMARK_DEFINE_F(ParquetRead, row_selection)
(::benchmark::State& state) { BM_parq_read_varying_options(state); }
BENCHMARK_REGISTER_F(ParquetRead, row_selection)
->ArgsProduct({{int32_t(column_selection::ALL)},
{int32_t(row_selection::ROW_GROUPS), int32_t(row_selection::NROWS)},
{int32_t(row_selection::NROWS)},
{1, 4},
{0b01}, // defaults
{int32_t(cudf::type_id::EMPTY)}})
->Unit(benchmark::kMillisecond)
->UseManualTime();
*/

BENCHMARK_DEFINE_F(ParquetRead, misc_options)
(::benchmark::State& state) { BM_parq_read_varying_options(state); }
Expand Down

0 comments on commit 83accc6

Please sign in to comment.