diff --git a/cpp/benchmarks/io/orc/orc_reader.cpp b/cpp/benchmarks/io/orc/orc_reader.cpp index bb4a0ce72d8..e15513275ee 100644 --- a/cpp/benchmarks/io/orc/orc_reader.cpp +++ b/cpp/benchmarks/io/orc/orc_reader.cpp @@ -88,13 +88,13 @@ void BM_orc_read_varying_options(benchmark::State& state) auto const use_np_dtypes = (flags & 2) != 0; auto const ts_type = cudf::data_type{static_cast(state.range(state_idx++))}; + // skip_rows is not supported on nested types auto const data_types = dtypes_for_column_selection(get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED), int32_t(type_group_id::FLOATING_POINT), int32_t(type_group_id::FIXED_POINT), int32_t(type_group_id::TIMESTAMP), - int32_t(cudf::type_id::STRING), - int32_t(cudf::type_id::LIST)}), + int32_t(cudf::type_id::STRING)}), col_sel); auto const tbl = create_random_table(data_types, data_types.size(), table_size_bytes{data_size}); auto const view = tbl->view(); @@ -181,11 +181,12 @@ BENCHMARK_REGISTER_F(OrcRead, column_selection) ->Unit(benchmark::kMillisecond) ->UseManualTime(); +// Need an API to get the number of stripes to enable row_selection::STRIPES here BENCHMARK_DEFINE_F(OrcRead, row_selection) (::benchmark::State& state) { BM_orc_read_varying_options(state); } BENCHMARK_REGISTER_F(OrcRead, row_selection) ->ArgsProduct({{int32_t(column_selection::ALL)}, - {int32_t(row_selection::STRIPES), int32_t(row_selection::NROWS)}, + {int32_t(row_selection::NROWS)}, {1, 8}, {0b11}, // defaults {int32_t(cudf::type_id::EMPTY)}}) diff --git a/cpp/benchmarks/io/parquet/parquet_reader.cpp b/cpp/benchmarks/io/parquet/parquet_reader.cpp index d7a3a668bd1..09194931498 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader.cpp @@ -88,13 +88,13 @@ void BM_parq_read_varying_options(benchmark::State& state) auto const use_pandas_metadata = (flags & 2) != 0; auto const ts_type = cudf::data_type{static_cast(state.range(state_idx++))}; + // No nested types here, because of https://github.com/rapidsai/cudf/issues/9970 auto const data_types = dtypes_for_column_selection( get_type_or_group({static_cast(type_group_id::INTEGRAL), static_cast(type_group_id::FLOATING_POINT), static_cast(type_group_id::FIXED_POINT), static_cast(type_group_id::TIMESTAMP), - static_cast(cudf::type_id::STRING), - static_cast(cudf::type_id::LIST)}), + static_cast(cudf::type_id::STRING)}), col_sel); auto const tbl = create_random_table(data_types, data_types.size(), table_size_bytes{data_size}); auto const view = tbl->view(); @@ -181,20 +181,18 @@ BENCHMARK_REGISTER_F(ParquetRead, column_selection) ->Unit(benchmark::kMillisecond) ->UseManualTime(); -// Disabled until we add an API to read metadata from a parquet file and determine num row groups. -// https://github.com/rapidsai/cudf/pull/9963#issuecomment-1004832863 -/* +// row_selection::ROW_GROUPS disabled until we add an API to read metadata from a parquet file and +// determine num row groups. https://github.com/rapidsai/cudf/pull/9963#issuecomment-1004832863 BENCHMARK_DEFINE_F(ParquetRead, row_selection) (::benchmark::State& state) { BM_parq_read_varying_options(state); } BENCHMARK_REGISTER_F(ParquetRead, row_selection) ->ArgsProduct({{int32_t(column_selection::ALL)}, - {int32_t(row_selection::ROW_GROUPS), int32_t(row_selection::NROWS)}, + {int32_t(row_selection::NROWS)}, {1, 4}, {0b01}, // defaults {int32_t(cudf::type_id::EMPTY)}}) ->Unit(benchmark::kMillisecond) ->UseManualTime(); -*/ BENCHMARK_DEFINE_F(ParquetRead, misc_options) (::benchmark::State& state) { BM_parq_read_varying_options(state); }