From 72e6f9b08d3c52ca96ed64d963305ab9005ebff6 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Wed, 27 Dec 2023 13:40:43 -0800
Subject: [PATCH] Basic validation in reader benchmarks (#14647)

Check the output table shape in the CSV, JSON, ORC and Parquet reader benchmarks.

Other changes:
Fixed some chunking logic in the CSV reader benchmark.
Shortened the lifetime of the original table to reduce peak memory use (adopted the pattern from the JSON reader benchmark).

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: https://github.com/rapidsai/cudf/pull/14647
---
 cpp/benchmarks/io/csv/csv_reader_input.cpp    |  19 ++-
 cpp/benchmarks/io/csv/csv_reader_options.cpp  |  43 ++---
 cpp/benchmarks/io/json/json_reader_input.cpp  |  91 +++++------
 cpp/benchmarks/io/orc/orc_reader_input.cpp    |  69 ++++----
 cpp/benchmarks/io/orc/orc_reader_options.cpp  |  12 +-
 .../io/parquet/parquet_reader_input.cpp       | 153 ++++++++++--------
 .../io/parquet/parquet_reader_options.cpp     |  12 +-
 7 files changed, 221 insertions(+), 178 deletions(-)
diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp
index 6216a9ecec2..2ad3bc36f59 100644
--- a/cpp/benchmarks/io/csv/csv_reader_input.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp
@@ -47,14 +47,17 @@ void csv_read_common(DataType const& data_types,
 
   auto const mem_stats_logger = cudf::memory_stats_logger();  // init stats logger
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
-  state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
-             [&](nvbench::launch& launch, auto& timer) {
-               try_drop_l3_cache();  // Drop L3 cache for accurate measurement
-
-               timer.start();
-               cudf::io::read_csv(read_options);
-               timer.stop();
-             });
+  state.exec(
+    nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
+      try_drop_l3_cache();  // Drop L3 cache for accurate measurement
+
+      timer.start();
+      auto const result = cudf::io::read_csv(read_options);
+      timer.stop();
+
+      CUDF_EXPECTS(result.tbl->num_columns() == view.num_columns(), "Unexpected number of columns");
+      CUDF_EXPECTS(result.tbl->num_rows() == view.num_rows(), "Unexpected number of rows");
+    });
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
   state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
diff --git a/cpp/benchmarks/io/csv/csv_reader_options.cpp b/cpp/benchmarks/io/csv/csv_reader_options.cpp
index 93ef5bed774..84c3106cbdf 100644
--- a/cpp/benchmarks/io/csv/csv_reader_options.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_options.cpp
@@ -19,8 +19,9 @@
 #include <benchmarks/io/cuio_common.hpp>
 #include <benchmarks/io/nvbench_helpers.hpp>
 
+#include <cudf/detail/utilities/default_stream.hpp>
+#include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/io/csv.hpp>
-#include <cudf/utilities/default_stream.hpp>
 
 #include <nvbench/nvbench.cuh>
 
@@ -39,8 +40,9 @@ void BM_csv_read_varying_options(
                                                    static_cast<int32_t>(data_type::DURATION),
                                                    static_cast<int32_t>(data_type::STRING)}),
                                 ColSelection);
-  auto const cols_to_read = select_column_indexes(data_types.size(), ColSelection);
-  auto const num_chunks   = state.get_int64("num_chunks");
+  auto const cols_to_read                 = select_column_indexes(data_types.size(), ColSelection);
+  cudf::size_type const expected_num_cols = cols_to_read.size();
+  size_t const num_chunks                 = state.get_int64("num_chunks");
 
   auto const tbl  = create_random_table(data_types, table_size_bytes{data_size});
   auto const view = tbl->view();
@@ -60,43 +62,48 @@ void BM_csv_read_varying_options(
       .comment('#')
       .prefix("BM_");
 
-  size_t const chunk_size             = source_sink.size() / num_chunks;
-  cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
-  auto const mem_stats_logger         = cudf::memory_stats_logger();
+  size_t const chunk_size = cudf::util::div_rounding_up_safe(source_sink.size(), num_chunks);
+  auto const chunk_row_cnt =
+    cudf::util::div_rounding_up_safe(view.num_rows(), static_cast<cudf::size_type>(num_chunks));
+  auto const mem_stats_logger = cudf::memory_stats_logger();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
              [&](nvbench::launch& launch, auto& timer) {
                try_drop_l3_cache();  // Drop L3 cache for accurate measurement
-
+               cudf::size_type num_rows_read = 0;
                timer.start();
-               for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
-                 // only read the header in the first chunk
-                 read_options.set_header(chunk == 0 ? 0 : -1);
-
-                 auto const is_last_chunk = chunk == (num_chunks - 1);
+               for (auto chunk = 0u; chunk < num_chunks; ++chunk) {
                  switch (RowSelection) {
                    case row_selection::ALL: break;
                    case row_selection::BYTE_RANGE:
+                     // with byte_range, we can't read the header in any chunk but the first
+                     read_options.set_header(chunk == 0 ? 0 : -1);
                      read_options.set_byte_range_offset(chunk * chunk_size);
                      read_options.set_byte_range_size(chunk_size);
-                     if (is_last_chunk) read_options.set_byte_range_size(0);
                      break;
                    case row_selection::NROWS:
                      read_options.set_skiprows(chunk * chunk_row_cnt);
                      read_options.set_nrows(chunk_row_cnt);
-                     if (is_last_chunk) read_options.set_nrows(-1);
                      break;
-                   case row_selection::SKIPFOOTER:
+                   case row_selection::SKIPFOOTER: {
                      read_options.set_skiprows(chunk * chunk_row_cnt);
-                     read_options.set_skipfooter(view.num_rows() - (chunk + 1) * chunk_row_cnt);
-                     if (is_last_chunk) read_options.set_skipfooter(0);
+                     cudf::size_type const next_chunk_start = (chunk + 1) * chunk_row_cnt;
+                     auto const skip_footer =
+                       view.num_rows() > next_chunk_start ? view.num_rows() - next_chunk_start : 0;
+                     read_options.set_skipfooter(skip_footer);
                      break;
+                   }
                    default: CUDF_FAIL("Unsupported row selection method");
                  }
 
-                 cudf::io::read_csv(read_options);
+                 auto const result = cudf::io::read_csv(read_options);
+
+                 num_rows_read += result.tbl->num_rows();
+                 CUDF_EXPECTS(result.tbl->num_columns() == expected_num_cols,
+                              "Unexpected number of columns");
                }
                timer.stop();
+               CUDF_EXPECTS(num_rows_read == view.num_rows(), "Unexpected number of rows");
              });
 
   auto const elapsed_time   = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
diff --git a/cpp/benchmarks/io/json/json_reader_input.cpp b/cpp/benchmarks/io/json/json_reader_input.cpp
index 31bb5dafa88..aa73dacdbc5 100644
--- a/cpp/benchmarks/io/json/json_reader_input.cpp
+++ b/cpp/benchmarks/io/json/json_reader_input.cpp
@@ -29,21 +29,26 @@
 constexpr size_t data_size         = 512 << 20;
 constexpr cudf::size_type num_cols = 64;
 
-void json_read_common(cuio_source_sink_pair& source_sink, nvbench::state& state)
+void json_read_common(cuio_source_sink_pair& source_sink,
+                      cudf::size_type num_rows_to_read,
+                      nvbench::state& state)
 {
   cudf::io::json_reader_options read_opts =
     cudf::io::json_reader_options::builder(source_sink.make_source_info());
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
-  state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
-             [&](nvbench::launch& launch, auto& timer) {
-               try_drop_l3_cache();
+  state.exec(
+    nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
+      try_drop_l3_cache();
 
-               timer.start();
-               cudf::io::read_json(read_opts);
-               timer.stop();
-             });
+      timer.start();
+      auto const result = cudf::io::read_json(read_opts);
+      timer.stop();
+
+      CUDF_EXPECTS(result.tbl->num_columns() == num_cols, "Unexpected number of columns");
+      CUDF_EXPECTS(result.tbl->num_rows() == num_rows_to_read, "Unexpected number of rows");
+    });
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
   state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
@@ -52,55 +57,45 @@ void json_read_common(cuio_source_sink_pair& source_sink, nvbench::state& state)
   state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
 }
 
+cudf::size_type json_write_bm_data(cudf::io::sink_info sink,
+                                   std::vector<cudf::type_id> const& dtypes)
+{
+  auto const tbl = create_random_table(
+    cycle_dtypes(dtypes, num_cols), table_size_bytes{data_size}, data_profile_builder());
+  auto const view = tbl->view();
+
+  cudf::io::json_writer_options const write_opts =
+    cudf::io::json_writer_options::builder(sink, view).na_rep("null").rows_per_chunk(100'000);
+  cudf::io::write_json(write_opts);
+  return view.num_rows();
+}
+
 template <cudf::io::io_type IO>
 void BM_json_read_io(nvbench::state& state, nvbench::type_list<nvbench::enum_type<IO>>)
 {
-  auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
-                                         static_cast<int32_t>(data_type::FLOAT),
-                                         static_cast<int32_t>(data_type::DECIMAL),
-                                         static_cast<int32_t>(data_type::TIMESTAMP),
-                                         static_cast<int32_t>(data_type::DURATION),
-                                         static_cast<int32_t>(data_type::STRING),
-                                         static_cast<int32_t>(data_type::LIST),
-                                         static_cast<int32_t>(data_type::STRUCT)});
-
-  auto const source_type = IO;
-  cuio_source_sink_pair source_sink(source_type);
-
-  {
-    auto const tbl = create_random_table(
-      cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder());
-    auto const view = tbl->view();
-
-    cudf::io::json_writer_options const write_opts =
-      cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view)
-        .na_rep("null")
-        .rows_per_chunk(100'000);
-    cudf::io::write_json(write_opts);
-  }
-
-  json_read_common(source_sink, state);
+  cuio_source_sink_pair source_sink(IO);
+  auto const d_type   = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
+                                           static_cast<int32_t>(data_type::FLOAT),
+                                           static_cast<int32_t>(data_type::DECIMAL),
+                                           static_cast<int32_t>(data_type::TIMESTAMP),
+                                           static_cast<int32_t>(data_type::DURATION),
+                                           static_cast<int32_t>(data_type::STRING),
+                                           static_cast<int32_t>(data_type::LIST),
+                                           static_cast<int32_t>(data_type::STRUCT)});
+  auto const num_rows = json_write_bm_data(source_sink.make_sink_info(), d_type);
+
+  json_read_common(source_sink, num_rows, state);
 }
 
 template <data_type DataType, cudf::io::io_type IO>
 void BM_json_read_data_type(
   nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IO>>)
 {
-  auto const d_type      = get_type_or_group(static_cast<int32_t>(DataType));
-  auto const source_type = IO;
-  cuio_source_sink_pair source_sink(source_type);
-  {
-    auto const tbl = create_random_table(
-      cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, data_profile_builder());
-    auto const view = tbl->view();
-
-    cudf::io::json_writer_options const write_opts =
-      cudf::io::json_writer_options::builder(source_sink.make_sink_info(), view)
-        .na_rep("null")
-        .rows_per_chunk(100'000);
-    cudf::io::write_json(write_opts);
-  }
-  json_read_common(source_sink, state);
+  cuio_source_sink_pair source_sink(IO);
+  auto const d_type   = get_type_or_group(static_cast<int32_t>(DataType));
+  auto const num_rows = json_write_bm_data(source_sink.make_sink_info(), d_type);
+
+  json_read_common(source_sink, num_rows, state);
 }
 
 using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp
index b6e15fb3923..fdb7dbe59b8 100644
--- a/cpp/benchmarks/io/orc/orc_reader_input.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp
@@ -29,25 +29,26 @@
 constexpr int64_t data_size        = 512 << 20;
 constexpr cudf::size_type num_cols = 64;
 
-void orc_read_common(cudf::io::orc_writer_options const& opts,
+void orc_read_common(cudf::size_type num_rows_to_read,
                      cuio_source_sink_pair& source_sink,
                      nvbench::state& state)
 {
-  cudf::io::write_orc(opts);
-
   cudf::io::orc_reader_options read_opts =
     cudf::io::orc_reader_options::builder(source_sink.make_source_info());
 
   auto mem_stats_logger = cudf::memory_stats_logger();  // init stats logger
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
-  state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
-             [&](nvbench::launch& launch, auto& timer) {
-               try_drop_l3_cache();
+  state.exec(
+    nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
+      try_drop_l3_cache();
+
+      timer.start();
+      auto const result = cudf::io::read_orc(read_opts);
+      timer.stop();
 
-               timer.start();
-               cudf::io::read_orc(read_opts);
-               timer.stop();
-             });
+      CUDF_EXPECTS(result.tbl->num_columns() == num_cols, "Unexpected number of columns");
+      CUDF_EXPECTS(result.tbl->num_rows() == num_rows_to_read, "Unexpected number of rows");
+    });
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
   state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
@@ -63,18 +64,22 @@ void BM_orc_read_data(nvbench::state& state,
   auto const d_type                 = get_type_or_group(static_cast<int32_t>(DataType));
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
+  cuio_source_sink_pair source_sink(IOType);
 
-  auto const tbl =
-    create_random_table(cycle_dtypes(d_type, num_cols),
-                        table_size_bytes{data_size},
-                        data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
-  auto const view = tbl->view();
+  auto const num_rows_written = [&]() {
+    auto const tbl = create_random_table(
+      cycle_dtypes(d_type, num_cols),
+      table_size_bytes{data_size},
+      data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
+    auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(IOType);
-  cudf::io::orc_writer_options opts =
-    cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view);
+    cudf::io::orc_writer_options opts =
+      cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view);
+    cudf::io::write_orc(opts);
+    return view.num_rows();
+  }();
 
-  orc_read_common(opts, source_sink, state);
+  orc_read_common(num_rows_written, source_sink, state);
 }
 
 template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
@@ -92,19 +97,23 @@ void BM_orc_read_io_compression(
 
   cudf::size_type const cardinality = state.get_int64("cardinality");
   cudf::size_type const run_length  = state.get_int64("run_length");
-
-  auto const tbl =
-    create_random_table(cycle_dtypes(d_type, num_cols),
-                        table_size_bytes{data_size},
-                        data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
-  auto const view = tbl->view();
-
   cuio_source_sink_pair source_sink(IOType);
-  cudf::io::orc_writer_options opts =
-    cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view)
-      .compression(Compression);
 
-  orc_read_common(opts, source_sink, state);
+  auto const num_rows_written = [&]() {
+    auto const tbl = create_random_table(
+      cycle_dtypes(d_type, num_cols),
+      table_size_bytes{data_size},
+      data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
+    auto const view = tbl->view();
+
+    cudf::io::orc_writer_options opts =
+      cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view)
+        .compression(Compression);
+    cudf::io::write_orc(opts);
+    return view.num_rows();
+  }();
+
+  orc_read_common(num_rows_written, source_sink, state);
 }
 
 using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL_SIGNED,
diff --git a/cpp/benchmarks/io/orc/orc_reader_options.cpp b/cpp/benchmarks/io/orc/orc_reader_options.cpp
index 1f656f7ea70..5627613762e 100644
--- a/cpp/benchmarks/io/orc/orc_reader_options.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_options.cpp
@@ -80,6 +80,7 @@ void BM_orc_read_varying_options(nvbench::state& state,
 
   auto const cols_to_read =
     select_column_names(get_top_level_col_names(source_sink.make_source_info()), ColSelection);
+  cudf::size_type const expected_num_cols = cols_to_read.size();
   cudf::io::orc_reader_options read_options =
     cudf::io::orc_reader_options::builder(source_sink.make_source_info())
       .columns(cols_to_read)
@@ -96,9 +97,8 @@ void BM_orc_read_varying_options(nvbench::state& state,
   state.exec(
     nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
       try_drop_l3_cache();
-
+      cudf::size_type num_rows_read = 0;
       timer.start();
-      cudf::size_type rows_read = 0;
       for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
         switch (RowSelection) {
           case row_selection::ALL: break;
@@ -112,11 +112,15 @@ void BM_orc_read_varying_options(nvbench::state& state,
           default: CUDF_FAIL("Unsupported row selection method");
         }
 
-        rows_read += cudf::io::read_orc(read_options).tbl->num_rows();
+        auto const result = cudf::io::read_orc(read_options);
+
+        num_rows_read += result.tbl->num_rows();
+        CUDF_EXPECTS(result.tbl->num_columns() == expected_num_cols,
+                     "Unexpected number of columns");
       }
 
-      CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table");
       timer.stop();
+      CUDF_EXPECTS(num_rows_read == view.num_rows(), "Benchmark did not read the entire table");
     });
 
   auto const elapsed_time   = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
index 6db147cbfef..019e0f30fe9 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
@@ -29,25 +29,27 @@
 constexpr size_t data_size         = 512 << 20;
 constexpr cudf::size_type num_cols = 64;
 
-void parquet_read_common(cudf::io::parquet_writer_options const& write_opts,
+void parquet_read_common(cudf::size_type num_rows_to_read,
+                         cudf::size_type num_cols_to_read,
                          cuio_source_sink_pair& source_sink,
                          nvbench::state& state)
 {
-  cudf::io::write_parquet(write_opts);
-
   cudf::io::parquet_reader_options read_opts =
     cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
-  state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
-             [&](nvbench::launch& launch, auto& timer) {
-               try_drop_l3_cache();
+  state.exec(
+    nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
+      try_drop_l3_cache();
+
+      timer.start();
+      auto const result = cudf::io::read_parquet(read_opts);
+      timer.stop();
 
-               timer.start();
-               cudf::io::read_parquet(read_opts);
-               timer.stop();
-             });
+      CUDF_EXPECTS(result.tbl->num_columns() == num_cols_to_read, "Unexpected number of columns");
+      CUDF_EXPECTS(result.tbl->num_rows() == num_rows_to_read, "Unexpected number of rows");
+    });
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
   state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
@@ -64,19 +66,23 @@ void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enu
   auto const run_length  = static_cast<cudf::size_type>(state.get_int64("run_length"));
   auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
   auto const compression = cudf::io::compression_type::SNAPPY;
-
-  auto const tbl =
-    create_random_table(cycle_dtypes(d_type, num_cols),
-                        table_size_bytes{data_size},
-                        data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
-  auto const view = tbl->view();
-
   cuio_source_sink_pair source_sink(source_type);
-  cudf::io::parquet_writer_options write_opts =
-    cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
-      .compression(compression);
 
-  parquet_read_common(write_opts, source_sink, state);
+  auto const num_rows_written = [&]() {
+    auto const tbl = create_random_table(
+      cycle_dtypes(d_type, num_cols),
+      table_size_bytes{data_size},
+      data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
+    auto const view = tbl->view();
+
+    cudf::io::parquet_writer_options write_opts =
+      cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
+        .compression(compression);
+    cudf::io::write_parquet(write_opts);
+    return view.num_rows();
+  }();
+
+  parquet_read_common(num_rows_written, num_cols, source_sink, state);
 }
 
 void BM_parquet_read_io_compression(nvbench::state& state)
@@ -94,19 +100,23 @@ void BM_parquet_read_io_compression(nvbench::state& state)
   auto const run_length  = static_cast<cudf::size_type>(state.get_int64("run_length"));
   auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
   auto const compression = retrieve_compression_type_enum(state.get_string("compression_type"));
-
-  auto const tbl =
-    create_random_table(cycle_dtypes(d_type, num_cols),
-                        table_size_bytes{data_size},
-                        data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
-  auto const view = tbl->view();
-
   cuio_source_sink_pair source_sink(source_type);
-  cudf::io::parquet_writer_options write_opts =
-    cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
-      .compression(compression);
 
-  parquet_read_common(write_opts, source_sink, state);
+  auto const num_rows_written = [&]() {
+    auto const tbl = create_random_table(
+      cycle_dtypes(d_type, num_cols),
+      table_size_bytes{data_size},
+      data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
+    auto const view = tbl->view();
+
+    cudf::io::parquet_writer_options write_opts =
+      cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
+        .compression(compression);
+    cudf::io::write_parquet(write_opts);
+    return view.num_rows();
+  }();
+
+  parquet_read_common(num_rows_written, num_cols, source_sink, state);
 }
 
 void BM_parquet_read_io_small_mixed(nvbench::state& state)
@@ -118,25 +128,28 @@ void BM_parquet_read_io_small_mixed(nvbench::state& state)
   auto const run_length  = static_cast<cudf::size_type>(state.get_int64("run_length"));
   auto const num_strings = static_cast<cudf::size_type>(state.get_int64("num_string_cols"));
   auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
+  cuio_source_sink_pair source_sink(source_type);
 
   // want 80 pages total, across 4 columns, so 20 pages per column
   cudf::size_type constexpr n_col          = 4;
   cudf::size_type constexpr page_size_rows = 10'000;
   cudf::size_type constexpr num_rows       = page_size_rows * (80 / n_col);
 
-  auto const tbl =
-    create_random_table(mix_dtypes(d_type, n_col, num_strings),
-                        row_count{num_rows},
-                        data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
-  auto const view = tbl->view();
-
-  cuio_source_sink_pair source_sink(source_type);
-  cudf::io::parquet_writer_options write_opts =
-    cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
-      .max_page_size_rows(10'000)
-      .compression(cudf::io::compression_type::NONE);
-
-  parquet_read_common(write_opts, source_sink, state);
+  {
+    auto const tbl = create_random_table(
+      mix_dtypes(d_type, n_col, num_strings),
+      row_count{num_rows},
+      data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
+    auto const view = tbl->view();
+
+    cudf::io::parquet_writer_options write_opts =
+      cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
+        .max_page_size_rows(10'000)
+        .compression(cudf::io::compression_type::NONE);
+    cudf::io::write_parquet(write_opts);
+  }
+
+  parquet_read_common(num_rows, n_col, source_sink, state);
 }
 
 template <data_type DataType>
@@ -148,36 +161,44 @@ void BM_parquet_read_chunks(nvbench::state& state, nvbench::type_list<nvbench::e
   auto const byte_limit  = static_cast<cudf::size_type>(state.get_int64("byte_limit"));
   auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
   auto const compression = cudf::io::compression_type::SNAPPY;
+  cuio_source_sink_pair source_sink(source_type);
 
-  auto const tbl =
-    create_random_table(cycle_dtypes(d_type, num_cols),
-                        table_size_bytes{data_size},
-                        data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
-  auto const view = tbl->view();
+  auto const num_rows_written = [&]() {
+    auto const tbl = create_random_table(
+      cycle_dtypes(d_type, num_cols),
+      table_size_bytes{data_size},
+      data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
+    auto const view = tbl->view();
 
-  cuio_source_sink_pair source_sink(source_type);
-  cudf::io::parquet_writer_options write_opts =
-    cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
-      .compression(compression);
+    cudf::io::parquet_writer_options write_opts =
+      cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
+        .compression(compression);
 
-  cudf::io::write_parquet(write_opts);
+    cudf::io::write_parquet(write_opts);
+    return view.num_rows();
+  }();
 
   cudf::io::parquet_reader_options read_opts =
     cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
-  state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
-             [&](nvbench::launch& launch, auto& timer) {
-               try_drop_l3_cache();
-
-               timer.start();
-               auto reader = cudf::io::chunked_parquet_reader(byte_limit, read_opts);
-               do {
-                 [[maybe_unused]] auto const chunk = reader.read_chunk();
-               } while (reader.has_next());
-               timer.stop();
-             });
+  state.exec(
+    nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
+      try_drop_l3_cache();
+
+      timer.start();
+      auto reader                   = cudf::io::chunked_parquet_reader(byte_limit, read_opts);
+      cudf::size_type num_rows_read = 0;
+      do {
+        auto const result = reader.read_chunk();
+        num_rows_read += result.tbl->num_rows();
+        CUDF_EXPECTS(result.tbl->num_columns() == num_cols, "Unexpected number of columns");
+      } while (reader.has_next());
+      timer.stop();
+
+      CUDF_EXPECTS(num_rows_read == num_rows_written, "Benchmark did not read the entire table");
+    });
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
   state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
index 9f221de7da2..62925e8d315 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
@@ -83,6 +83,7 @@ void BM_parquet_read_options(nvbench::state& state,
 
   auto const cols_to_read =
     select_column_names(get_top_level_col_names(source_sink.make_source_info()), ColSelection);
+  cudf::size_type const expected_num_cols = cols_to_read.size();
   cudf::io::parquet_reader_options read_options =
     cudf::io::parquet_reader_options::builder(source_sink.make_source_info())
       .columns(cols_to_read)
@@ -98,9 +99,8 @@ void BM_parquet_read_options(nvbench::state& state,
   state.exec(
     nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
       try_drop_l3_cache();
-
+      cudf::size_type num_rows_read = 0;
       timer.start();
-      cudf::size_type rows_read = 0;
       for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
         switch (RowSelection) {
           case row_selection::ALL: break;
@@ -114,11 +114,15 @@ void BM_parquet_read_options(nvbench::state& state,
           default: CUDF_FAIL("Unsupported row selection method");
         }
 
-        rows_read += cudf::io::read_parquet(read_options).tbl->num_rows();
+        auto const result = cudf::io::read_parquet(read_options);
+
+        num_rows_read += result.tbl->num_rows();
+        CUDF_EXPECTS(result.tbl->num_columns() == expected_num_cols,
+                     "Unexpected number of columns");
       }
 
-      CUDF_EXPECTS(rows_read == view.num_rows(), "Benchmark did not read the entire table");
       timer.stop();
+      CUDF_EXPECTS(num_rows_read == view.num_rows(), "Benchmark did not read the entire table");
     });
 
   auto const elapsed_time   = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");